1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
3
4define amdgpu_cs void @if_then(<4 x i32> inreg %input, <4 x i32> inreg %output, <3 x i32> %LocalInvocationId) {
5; GCN-LABEL: if_then:
6; GCN:       ; %bb.0: ; %.entry
7; GCN-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
8; GCN-NEXT:    v_mov_b32_e32 v3, 0
9; GCN-NEXT:    s_and_saveexec_b32 s0, vcc_lo
10; GCN-NEXT:  ; %bb.1: ; %.bb0
11; GCN-NEXT:    v_mov_b32_e32 v3, 1
12; GCN-NEXT:  ; %bb.2: ; %.merge
13; GCN-NEXT:    s_or_b32 exec_lo, exec_lo, s0
14; GCN-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 3, v0
15; GCN-NEXT:    s_and_saveexec_b32 s0, vcc_lo
16; GCN-NEXT:    s_cbranch_execz .LBB0_4
17; GCN-NEXT:  ; %bb.3: ; %.then
18; GCN-NEXT:    v_mov_b32_e32 v1, v3
19; GCN-NEXT:    s_not_b32 exec_lo, exec_lo
20; GCN-NEXT:    v_mov_b32_e32 v1, 0
21; GCN-NEXT:    s_not_b32 exec_lo, exec_lo
22; GCN-NEXT:    s_or_saveexec_b32 s1, -1
23; GCN-NEXT:    v_mov_b32_e32 v2, 0
24; GCN-NEXT:    v_mov_b32_dpp v2, v1 row_shr:1 row_mask:0xf bank_mask:0xf
25; GCN-NEXT:    s_mov_b32 exec_lo, s1
26; GCN-NEXT:    v_mov_b32_e32 v0, v2
27; GCN-NEXT:    v_mov_b32_e32 v4, -1
28; GCN-NEXT:    v_mov_b32_e32 v3, 0
29; GCN-NEXT:    buffer_store_dword v4, v0, s[4:7], 0 offen
30; GCN-NEXT:  .LBB0_4: ; %.end
31; GCN-NEXT:    s_waitcnt_depctr 0xffe3
32; GCN-NEXT:    s_or_b32 exec_lo, exec_lo, s0
33; GCN-NEXT:    v_mov_b32_e32 v0, -1
34; GCN-NEXT:    buffer_store_dword v0, v3, s[4:7], 0 offen
35; GCN-NEXT:    s_endpgm
36.entry:
37  %LocalInvocationId.i0 = extractelement <3 x i32> %LocalInvocationId, i32 0
38  %.not10002 = icmp eq i32 %LocalInvocationId.i0, 0
39  br i1 %.not10002, label %.merge, label %.bb0
40
41.bb0:
42  br label %.merge
43
44.merge:
45  %src = phi i32 [ 0, %.entry ], [ 1, %.bb0 ]
46  %i530 = icmp ult i32 %LocalInvocationId.i0, 4
47  br i1 %i530, label %.end, label %.then
48
49.then:
50  %i562 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %src, i32 0)
51  %i563 = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %i562, i32 273, i32 15, i32 15, i1 false)
52  %i564 = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %i563)
53  call void @llvm.amdgcn.raw.buffer.store.i32(i32 -1, <4 x i32> %output, i32 %i564, i32 0, i32 0)
54  br label %.end
55
56.end:
57  %idx = phi i32 [ 0, %.then ], [ %src, %.merge ]
58  call void @llvm.amdgcn.raw.buffer.store.i32(i32 -1, <4 x i32> %output, i32 %idx, i32 0, i32 0)
59  ret void
60}
61
62
63define amdgpu_cs void @if_else_vgpr_opt(<4 x i32> inreg %input, <4 x i32> inreg %output, <3 x i32> %LocalInvocationId) {
64; GCN-LABEL: if_else_vgpr_opt:
65; GCN:       ; %bb.0: ; %.entry
66; GCN-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
67; GCN-NEXT:    v_mov_b32_e32 v3, 0
68; GCN-NEXT:    s_and_saveexec_b32 s0, vcc_lo
69; GCN-NEXT:  ; %bb.1: ; %.bb0
70; GCN-NEXT:    v_mov_b32_e32 v3, 1
71; GCN-NEXT:  ; %bb.2: ; %.merge
72; GCN-NEXT:    s_or_b32 exec_lo, exec_lo, s0
73; GCN-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 3, v0
74; GCN-NEXT:    s_and_saveexec_b32 s0, vcc_lo
75; GCN-NEXT:    s_xor_b32 s0, exec_lo, s0
76; GCN-NEXT:    s_cbranch_execnz .LBB1_5
77; GCN-NEXT:  ; %bb.3: ; %Flow
78; GCN-NEXT:    s_andn2_saveexec_b32 s0, s0
79; GCN-NEXT:    s_cbranch_execnz .LBB1_6
80; GCN-NEXT:  .LBB1_4: ; %.end
81; GCN-NEXT:    s_endpgm
82; GCN-NEXT:  .LBB1_5: ; %.else
83; GCN-NEXT:    s_or_saveexec_b32 s1, -1
84; GCN-NEXT:    v_mov_b32_e32 v1, 0
85; GCN-NEXT:    s_mov_b32 exec_lo, s1
86; GCN-NEXT:    v_mov_b32_e32 v2, v3
87; GCN-NEXT:    s_not_b32 exec_lo, exec_lo
88; GCN-NEXT:    v_mov_b32_e32 v2, 0
89; GCN-NEXT:    s_not_b32 exec_lo, exec_lo
90; GCN-NEXT:    s_or_saveexec_b32 s1, -1
91; GCN-NEXT:    v_mov_b32_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf
92; GCN-NEXT:    s_mov_b32 exec_lo, s1
93; GCN-NEXT:    v_mov_b32_e32 v0, v1
94; GCN-NEXT:    v_mov_b32_e32 v3, -1
95; GCN-NEXT:    buffer_store_dword v3, v0, s[4:7], 0 offen
96; GCN-NEXT:    ; implicit-def: $vgpr3
97; GCN-NEXT:    s_andn2_saveexec_b32 s0, s0
98; GCN-NEXT:    s_cbranch_execz .LBB1_4
99; GCN-NEXT:  .LBB1_6: ; %.then
100; GCN-NEXT:    v_mov_b32_e32 v0, -1
101; GCN-NEXT:    buffer_store_dword v0, v3, s[4:7], 0 offen
102; GCN-NEXT:    s_endpgm
103.entry:
104  %LocalInvocationId.i0 = extractelement <3 x i32> %LocalInvocationId, i32 0
105  %.not10002 = icmp eq i32 %LocalInvocationId.i0, 0
106  br i1 %.not10002, label %.merge, label %.bb0
107
108.bb0:
109  br label %.merge
110
111.merge:
112  %src = phi i32 [ 0, %.entry ], [ 1, %.bb0 ]
113  %i530 = icmp ult i32 %LocalInvocationId.i0, 4
114  br i1 %i530, label %.then, label %.else
115
116.then:
117  call void @llvm.amdgcn.raw.buffer.store.i32(i32 -1, <4 x i32> %output, i32 %src, i32 0, i32 0)
118  br label %.end
119
120.else:
121  %i562 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %src, i32 0)
122  %i563 = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %i562, i32 273, i32 15, i32 15, i1 false)
123  %i564 = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %i563)
124  call void @llvm.amdgcn.raw.buffer.store.i32(i32 -1, <4 x i32> %output, i32 %i564, i32 0, i32 0)
125  br label %.end
126
127.end:
128  ret void
129}
130
131declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #0
132declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) #0
133declare i32 @llvm.amdgcn.strict.wwm.i32(i32) #1
134declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32 immarg, i32 immarg) #2
135
136attributes #0 = { convergent nounwind readnone willreturn }
137attributes #1 = { convergent nounwind readnone speculatable willreturn }
138attributes #2 = { nounwind willreturn writeonly }
139
140