1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck -check-prefix=SI %s
3; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=VI %s
4; RUN: llc < %s -march=amdgcn -mcpu=gfx90a -verify-machineinstrs | FileCheck -check-prefix=GFX90A %s
5
6define amdgpu_kernel void @select0(i64 addrspace(1)* %out, i32 %cond, i64 %in) {
7; SI-LABEL: select0:
8; SI:       ; %bb.0: ; %entry
9; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
10; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
11; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
12; SI-NEXT:    s_mov_b32 s7, 0xf000
13; SI-NEXT:    s_mov_b32 s6, -1
14; SI-NEXT:    s_waitcnt lgkmcnt(0)
15; SI-NEXT:    s_cmp_lt_u32 s2, 6
16; SI-NEXT:    v_mov_b32_e32 v0, s1
17; SI-NEXT:    s_cselect_b64 vcc, -1, 0
18; SI-NEXT:    v_cndmask_b32_e32 v1, 0, v0, vcc
19; SI-NEXT:    v_mov_b32_e32 v0, s0
20; SI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
21; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
22; SI-NEXT:    s_endpgm
23;
24; VI-LABEL: select0:
25; VI:       ; %bb.0: ; %entry
26; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
27; VI-NEXT:    s_load_dword s4, s[0:1], 0x2c
28; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
29; VI-NEXT:    s_waitcnt lgkmcnt(0)
30; VI-NEXT:    v_mov_b32_e32 v0, s2
31; VI-NEXT:    s_cmp_lt_u32 s4, 6
32; VI-NEXT:    s_cselect_b64 s[0:1], s[0:1], 0
33; VI-NEXT:    v_mov_b32_e32 v3, s1
34; VI-NEXT:    v_mov_b32_e32 v1, s3
35; VI-NEXT:    v_mov_b32_e32 v2, s0
36; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
37; VI-NEXT:    s_endpgm
38;
39; GFX90A-LABEL: select0:
40; GFX90A:       ; %bb.0: ; %entry
41; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
42; GFX90A-NEXT:    s_load_dword s6, s[0:1], 0x2c
43; GFX90A-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x34
44; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
45; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
46; GFX90A-NEXT:    s_cmp_lt_u32 s6, 6
47; GFX90A-NEXT:    s_cselect_b64 s[0:1], s[4:5], 0
48; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
49; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
50; GFX90A-NEXT:    s_endpgm
51entry:
52  %0 = icmp ugt i32 %cond, 5
53  %1 = select i1 %0, i64 0, i64 %in
54  store i64 %1, i64 addrspace(1)* %out
55  ret void
56}
57
58define amdgpu_kernel void @select_trunc_i64(i32 addrspace(1)* %out, i32 %cond, i64 %in) nounwind {
59; SI-LABEL: select_trunc_i64:
60; SI:       ; %bb.0:
61; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
62; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
63; SI-NEXT:    s_load_dword s0, s[0:1], 0xd
64; SI-NEXT:    s_mov_b32 s7, 0xf000
65; SI-NEXT:    s_mov_b32 s6, -1
66; SI-NEXT:    s_waitcnt lgkmcnt(0)
67; SI-NEXT:    s_cmp_lt_u32 s2, 6
68; SI-NEXT:    v_mov_b32_e32 v0, s0
69; SI-NEXT:    s_cselect_b64 vcc, -1, 0
70; SI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
71; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
72; SI-NEXT:    s_endpgm
73;
74; VI-LABEL: select_trunc_i64:
75; VI:       ; %bb.0:
76; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
77; VI-NEXT:    s_load_dword s4, s[0:1], 0x2c
78; VI-NEXT:    s_load_dword s0, s[0:1], 0x34
79; VI-NEXT:    s_waitcnt lgkmcnt(0)
80; VI-NEXT:    v_mov_b32_e32 v0, s2
81; VI-NEXT:    s_cmp_lt_u32 s4, 6
82; VI-NEXT:    s_cselect_b32 s0, s0, 0
83; VI-NEXT:    v_mov_b32_e32 v1, s3
84; VI-NEXT:    v_mov_b32_e32 v2, s0
85; VI-NEXT:    flat_store_dword v[0:1], v2
86; VI-NEXT:    s_endpgm
87;
88; GFX90A-LABEL: select_trunc_i64:
89; GFX90A:       ; %bb.0:
90; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
91; GFX90A-NEXT:    s_load_dword s4, s[0:1], 0x2c
92; GFX90A-NEXT:    s_load_dword s5, s[0:1], 0x34
93; GFX90A-NEXT:    v_mov_b32_e32 v0, 0
94; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
95; GFX90A-NEXT:    s_cmp_lt_u32 s4, 6
96; GFX90A-NEXT:    s_cselect_b32 s0, s5, 0
97; GFX90A-NEXT:    v_mov_b32_e32 v1, s0
98; GFX90A-NEXT:    global_store_dword v0, v1, s[2:3]
99; GFX90A-NEXT:    s_endpgm
100  %cmp = icmp ugt i32 %cond, 5
101  %sel = select i1 %cmp, i64 0, i64 %in
102  %trunc = trunc i64 %sel to i32
103  store i32 %trunc, i32 addrspace(1)* %out, align 4
104  ret void
105}
106
107define amdgpu_kernel void @select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 %a, i64 %b) nounwind {
108; SI-LABEL: select_trunc_i64_2:
109; SI:       ; %bb.0:
110; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
111; SI-NEXT:    s_load_dword s8, s[0:1], 0xb
112; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
113; SI-NEXT:    s_mov_b32 s7, 0xf000
114; SI-NEXT:    s_mov_b32 s6, -1
115; SI-NEXT:    s_waitcnt lgkmcnt(0)
116; SI-NEXT:    s_cmp_gt_u32 s8, 5
117; SI-NEXT:    v_mov_b32_e32 v0, s2
118; SI-NEXT:    v_mov_b32_e32 v1, s0
119; SI-NEXT:    s_cselect_b64 vcc, -1, 0
120; SI-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
121; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
122; SI-NEXT:    s_endpgm
123;
124; VI-LABEL: select_trunc_i64_2:
125; VI:       ; %bb.0:
126; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
127; VI-NEXT:    s_load_dword s6, s[0:1], 0x2c
128; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x34
129; VI-NEXT:    s_waitcnt lgkmcnt(0)
130; VI-NEXT:    v_mov_b32_e32 v0, s4
131; VI-NEXT:    s_cmp_gt_u32 s6, 5
132; VI-NEXT:    s_cselect_b32 s0, s0, s2
133; VI-NEXT:    v_mov_b32_e32 v1, s5
134; VI-NEXT:    v_mov_b32_e32 v2, s0
135; VI-NEXT:    flat_store_dword v[0:1], v2
136; VI-NEXT:    s_endpgm
137;
138; GFX90A-LABEL: select_trunc_i64_2:
139; GFX90A:       ; %bb.0:
140; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
141; GFX90A-NEXT:    s_load_dword s8, s[0:1], 0x2c
142; GFX90A-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
143; GFX90A-NEXT:    v_mov_b32_e32 v0, 0
144; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
145; GFX90A-NEXT:    s_cmp_gt_u32 s8, 5
146; GFX90A-NEXT:    s_cselect_b32 s0, s4, s6
147; GFX90A-NEXT:    v_mov_b32_e32 v1, s0
148; GFX90A-NEXT:    global_store_dword v0, v1, s[2:3]
149; GFX90A-NEXT:    s_endpgm
150  %cmp = icmp ugt i32 %cond, 5
151  %sel = select i1 %cmp, i64 %a, i64 %b
152  %trunc = trunc i64 %sel to i32
153  store i32 %trunc, i32 addrspace(1)* %out, align 4
154  ret void
155}
156
157define amdgpu_kernel void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
158; SI-LABEL: v_select_trunc_i64_2:
159; SI:       ; %bb.0:
160; SI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0xd
161; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
162; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
163; SI-NEXT:    s_mov_b32 s7, 0xf000
164; SI-NEXT:    s_mov_b32 s6, -1
165; SI-NEXT:    s_waitcnt lgkmcnt(0)
166; SI-NEXT:    s_load_dword s1, s[8:9], 0x0
167; SI-NEXT:    s_load_dword s2, s[10:11], 0x0
168; SI-NEXT:    s_cmp_gt_u32 s0, 5
169; SI-NEXT:    s_cselect_b64 vcc, -1, 0
170; SI-NEXT:    s_waitcnt lgkmcnt(0)
171; SI-NEXT:    v_mov_b32_e32 v1, s1
172; SI-NEXT:    v_mov_b32_e32 v0, s2
173; SI-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
174; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
175; SI-NEXT:    s_endpgm
176;
177; VI-LABEL: v_select_trunc_i64_2:
178; VI:       ; %bb.0:
179; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
180; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
181; VI-NEXT:    s_load_dword s0, s[0:1], 0x2c
182; VI-NEXT:    s_waitcnt lgkmcnt(0)
183; VI-NEXT:    s_load_dword s1, s[4:5], 0x0
184; VI-NEXT:    s_load_dword s4, s[6:7], 0x0
185; VI-NEXT:    v_mov_b32_e32 v0, s2
186; VI-NEXT:    s_cmp_gt_u32 s0, 5
187; VI-NEXT:    v_mov_b32_e32 v1, s3
188; VI-NEXT:    s_waitcnt lgkmcnt(0)
189; VI-NEXT:    s_cselect_b32 s0, s1, s4
190; VI-NEXT:    v_mov_b32_e32 v2, s0
191; VI-NEXT:    flat_store_dword v[0:1], v2
192; VI-NEXT:    s_endpgm
193;
194; GFX90A-LABEL: v_select_trunc_i64_2:
195; GFX90A:       ; %bb.0:
196; GFX90A-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
197; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
198; GFX90A-NEXT:    s_load_dword s8, s[0:1], 0x2c
199; GFX90A-NEXT:    v_mov_b32_e32 v0, 0
200; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
201; GFX90A-NEXT:    s_load_dword s0, s[4:5], 0x0
202; GFX90A-NEXT:    s_load_dword s1, s[6:7], 0x0
203; GFX90A-NEXT:    s_cmp_gt_u32 s8, 5
204; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
205; GFX90A-NEXT:    s_cselect_b32 s0, s0, s1
206; GFX90A-NEXT:    v_mov_b32_e32 v1, s0
207; GFX90A-NEXT:    global_store_dword v0, v1, s[2:3]
208; GFX90A-NEXT:    s_endpgm
209  %cmp = icmp ugt i32 %cond, 5
210  %a = load i64, i64 addrspace(1)* %aptr, align 8
211  %b = load i64, i64 addrspace(1)* %bptr, align 8
212  %sel = select i1 %cmp, i64 %a, i64 %b
213  %trunc = trunc i64 %sel to i32
214  store i32 %trunc, i32 addrspace(1)* %out, align 4
215  ret void
216}
217
218define amdgpu_kernel void @v_select_i64_split_imm(i64 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
219; SI-LABEL: v_select_i64_split_imm:
220; SI:       ; %bb.0:
221; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
222; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
223; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
224; SI-NEXT:    s_mov_b32 s7, 0xf000
225; SI-NEXT:    s_mov_b32 s6, -1
226; SI-NEXT:    s_waitcnt lgkmcnt(0)
227; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
228; SI-NEXT:    s_cmp_gt_u32 s2, 5
229; SI-NEXT:    s_cselect_b64 vcc, -1, 0
230; SI-NEXT:    s_waitcnt lgkmcnt(0)
231; SI-NEXT:    v_mov_b32_e32 v0, s1
232; SI-NEXT:    v_mov_b32_e32 v2, s0
233; SI-NEXT:    v_cndmask_b32_e32 v1, 63, v0, vcc
234; SI-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
235; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
236; SI-NEXT:    s_endpgm
237;
238; VI-LABEL: v_select_i64_split_imm:
239; VI:       ; %bb.0:
240; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
241; VI-NEXT:    s_load_dword s6, s[0:1], 0x2c
242; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
243; VI-NEXT:    s_mov_b32 s4, 0
244; VI-NEXT:    s_mov_b32 s5, 63
245; VI-NEXT:    s_waitcnt lgkmcnt(0)
246; VI-NEXT:    v_mov_b32_e32 v0, s2
247; VI-NEXT:    s_cmp_gt_u32 s6, 5
248; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
249; VI-NEXT:    v_mov_b32_e32 v1, s3
250; VI-NEXT:    s_waitcnt lgkmcnt(0)
251; VI-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
252; VI-NEXT:    v_mov_b32_e32 v3, s1
253; VI-NEXT:    v_mov_b32_e32 v2, s0
254; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
255; VI-NEXT:    s_endpgm
256;
257; GFX90A-LABEL: v_select_i64_split_imm:
258; GFX90A:       ; %bb.0:
259; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
260; GFX90A-NEXT:    s_load_dword s6, s[0:1], 0x2c
261; GFX90A-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x34
262; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
263; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
264; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
265; GFX90A-NEXT:    s_mov_b32 s4, 0
266; GFX90A-NEXT:    s_cmp_gt_u32 s6, 5
267; GFX90A-NEXT:    s_mov_b32 s5, 63
268; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
269; GFX90A-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
270; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
271; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
272; GFX90A-NEXT:    s_endpgm
273  %cmp = icmp ugt i32 %cond, 5
274  %a = load i64, i64 addrspace(1)* %aptr, align 8
275  %b = load i64, i64 addrspace(1)* %bptr, align 8
276  %sel = select i1 %cmp, i64 %a, i64 270582939648 ; 63 << 32
277  store i64 %sel, i64 addrspace(1)* %out, align 8
278  ret void
279}
280