1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
3; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4
5; FUNC-LABEL: {{^}}sext_bool_icmp_eq_0:
6; GCN-NOT: v_cmp
7; GCN: s_cmp_lg_u32
8; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0
9; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]]
10; GCN-NEXT:buffer_store_byte [[RESULT]]
11; GCN-NEXT: s_endpgm
12
13; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W
14; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1
15define amdgpu_kernel void @sext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
16  %icmp0 = icmp eq i32 %a, %b
17  %ext = sext i1 %icmp0 to i32
18  %icmp1 = icmp eq i32 %ext, 0
19  store i1 %icmp1, i1 addrspace(1)* %out
20  ret void
21}
22
23; FUNC-LABEL: {{^}}sext_bool_icmp_ne_0:
24; GCN-NOT: v_cmp
25; GCN: s_cmp_lg_u32
26; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0
27; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]]
28; GCN-NEXT: buffer_store_byte [[RESULT]]
29; GCN-NEXT: s_endpgm
30
31; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W
32; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1
33define amdgpu_kernel void @sext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
34  %icmp0 = icmp ne i32 %a, %b
35  %ext = sext i1 %icmp0 to i32
36  %icmp1 = icmp ne i32 %ext, 0
37  store i1 %icmp1, i1 addrspace(1)* %out
38  ret void
39}
40
41; FUNC-LABEL: {{^}}sext_bool_icmp_eq_neg1:
42; GCN-NOT: v_cmp
43; GCN: s_cmp_eq_u32
44; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0
45; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]]
46; GCN-NEXT: buffer_store_byte [[RESULT]]
47; GCN-NEXT: s_endpgm
48define amdgpu_kernel void @sext_bool_icmp_eq_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
49  %icmp0 = icmp eq i32 %a, %b
50  %ext = sext i1 %icmp0 to i32
51  %icmp1 = icmp eq i32 %ext, -1
52  store i1 %icmp1, i1 addrspace(1)* %out
53  ret void
54}
55
56; FUNC-LABEL: {{^}}sext_bool_icmp_ne_neg1:
57; GCN-NOT: v_cmp
58; GCN: s_cmp_eq_u32
59; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0
60; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]]
61; GCN-NEXT: buffer_store_byte [[RESULT]]
62; GCN-NEXT: s_endpgm
63define amdgpu_kernel void @sext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
64  %icmp0 = icmp ne i32 %a, %b
65  %ext = sext i1 %icmp0 to i32
66  %icmp1 = icmp ne i32 %ext, -1
67  store i1 %icmp1, i1 addrspace(1)* %out
68  ret void
69}
70
71; FUNC-LABEL: {{^}}zext_bool_icmp_eq_0:
72; GCN-NOT: v_cmp
73; GCN: s_cmp_lg_u32
74; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0
75; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]]
76; GCN-NEXT: buffer_store_byte [[RESULT]]
77; GCN-NEXT: s_endpgm
78define amdgpu_kernel void @zext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
79  %icmp0 = icmp eq i32 %a, %b
80  %ext = zext i1 %icmp0 to i32
81  %icmp1 = icmp eq i32 %ext, 0
82  store i1 %icmp1, i1 addrspace(1)* %out
83  ret void
84}
85
86; FUNC-LABEL: {{^}}zext_bool_icmp_ne_0:
87; GCN-NOT: v_cmp
88; GCN: s_cmp_lg_u32
89; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0
90; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]]
91; GCN-NEXT: buffer_store_byte [[RESULT]]
92; GCN-NEXT: s_endpgm
93define amdgpu_kernel void @zext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
94  %icmp0 = icmp ne i32 %a, %b
95  %ext = zext i1 %icmp0 to i32
96  %icmp1 = icmp ne i32 %ext, 0
97  store i1 %icmp1, i1 addrspace(1)* %out
98  ret void
99}
100
101; FUNC-LABEL: {{^}}zext_bool_icmp_eq_1:
102; GCN-NOT: v_cmp
103; GCN: s_cmp_eq_u32
104; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0
105; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]]
106; GCN-NEXT: buffer_store_byte [[RESULT]]
107; GCN-NEXT: s_endpgm
108define amdgpu_kernel void @zext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
109  %icmp0 = icmp eq i32 %a, %b
110  %ext = zext i1 %icmp0 to i32
111  %icmp1 = icmp eq i32 %ext, 1
112  store i1 %icmp1, i1 addrspace(1)* %out
113  ret void
114}
115
116; FUNC-LABEL: {{^}}zext_bool_icmp_ne_1:
117; GCN-NOT: v_cmp
118; GCN: s_cmp_eq_u32
119; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0
120; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]]
121; GCN-NEXT: buffer_store_byte [[RESULT]]
122define amdgpu_kernel void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
123  %icmp0 = icmp ne i32 %a, %b
124  %ext = zext i1 %icmp0 to i32
125  %icmp1 = icmp ne i32 %ext, 1
126  store i1 %icmp1, i1 addrspace(1)* %out
127  ret void
128}
129
130; Reduces to false:
131; FUNC-LABEL: {{^}}zext_bool_icmp_eq_neg1:
132; GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], 0{{$}}
133; GCN: buffer_store_byte [[TMP]]
134; GCN-NEXT: s_endpgm
135define amdgpu_kernel void @zext_bool_icmp_eq_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
136  %icmp0 = icmp eq i32 %a, %b
137  %ext = zext i1 %icmp0 to i32
138  %icmp1 = icmp eq i32 %ext, -1
139  store i1 %icmp1, i1 addrspace(1)* %out
140  ret void
141}
142
143; Reduces to true:
144; FUNC-LABEL: {{^}}zext_bool_icmp_ne_neg1:
145; GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], 1{{$}}
146; GCN: buffer_store_byte [[TMP]]
147; GCN-NEXT: s_endpgm
148define amdgpu_kernel void @zext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
149  %icmp0 = icmp ne i32 %a, %b
150  %ext = zext i1 %icmp0 to i32
151  %icmp1 = icmp ne i32 %ext, -1
152  store i1 %icmp1, i1 addrspace(1)* %out
153  ret void
154}
155
156; FUNC-LABEL: {{^}}cmp_zext_k_i8max:
157; SI: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
158; VI: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
159; GCN: s_movk_i32 [[K255:s[0-9]+]], 0xff
160; SI-DAG: s_and_b32 [[B:s[0-9]+]], [[VALUE]], [[K255]]
161; SI: s_cmp_lg_u32 [[B]], [[K255]]
162; SI: s_cselect_b64 [[CC:[^,]+]], -1, 0
163
164; VI: v_mov_b32_e32 [[VK255:v[0-9]+]], [[K255]]
165; VI: v_and_b32_e32 [[B:v[0-9]+]], [[VALUE]], [[VK255]]
166; VI: v_cmp_ne_u16_e32 vcc, [[K255]], [[B]]
167
168; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]]
169; VI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
170; GCN: buffer_store_byte [[RESULT]]
171; GCN: s_endpgm
172define amdgpu_kernel void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind {
173  %b.ext = zext i8 %b to i32
174  %icmp0 = icmp ne i32 %b.ext, 255
175  store i1 %icmp0, i1 addrspace(1)* %out
176  ret void
177}
178
179; FUNC-LABEL: {{^}}cmp_sext_k_neg1:
180; GCN: buffer_load_sbyte [[B:v[0-9]+]]
181; GCN: v_cmp_ne_u32_e32 vcc, -1, [[B]]{{$}}
182; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
183; GCN: buffer_store_byte [[RESULT]]
184; GCN: s_endpgm
185define amdgpu_kernel void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nounwind {
186  %b = load i8, i8 addrspace(1)* %b.ptr
187  %b.ext = sext i8 %b to i32
188  %icmp0 = icmp ne i32 %b.ext, -1
189  store i1 %icmp0, i1 addrspace(1)* %out
190  ret void
191}
192
193; FUNC-LABEL: {{^}}v_cmp_sext_k_neg1_i8_sext_arg:
194; GCN: v_cmp_ne_u32_e32 vcc, -1, v0
195; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 0, 1, vcc
196; GCN: buffer_store_byte [[SELECT]]
197define void @v_cmp_sext_k_neg1_i8_sext_arg(i8 signext %b) nounwind {
198  %b.ext = sext i8 %b to i32
199  %icmp0 = icmp ne i32 %b.ext, -1
200  store i1 %icmp0, i1 addrspace(1)* undef
201  ret void
202}
203
204; FIXME: This ends up doing a buffer_load_ubyte, and and compare to
205; 255. Seems to be because of ordering problems when not allowing load widths to be reduced.
206; Should do a buffer_load_sbyte and compare with -1
207
208; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_arg:
209; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
210; VI: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
211; GCN: s_movk_i32 [[K:s[0-9]+]], 0xff
212; GCN-DAG: s_and_b32 [[B:s[0-9]+]], [[VAL]], [[K]]
213; GCN: s_cmp_lg_u32 [[B]], [[K]]{{$}}
214; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0
215; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]]
216; GCN: buffer_store_byte [[RESULT]]
217; GCN: s_endpgm
218define amdgpu_kernel void @cmp_sext_k_neg1_i8_arg(i1 addrspace(1)* %out, i8 %b) nounwind {
219  %b.ext = sext i8 %b to i32
220  %icmp0 = icmp ne i32 %b.ext, -1
221  store i1 %icmp0, i1 addrspace(1)* %out
222  ret void
223}
224
225; FUNC-LABEL: {{^}}cmp_zext_k_neg1:
226; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
227; GCN: buffer_store_byte [[RESULT]]
228; GCN: s_endpgm
229define amdgpu_kernel void @cmp_zext_k_neg1(i1 addrspace(1)* %out, i8 %b) nounwind {
230  %b.ext = zext i8 %b to i32
231  %icmp0 = icmp ne i32 %b.ext, -1
232  store i1 %icmp0, i1 addrspace(1)* %out
233  ret void
234}
235
236; FUNC-LABEL: {{^}}zext_bool_icmp_ne_k:
237; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
238; GCN: buffer_store_byte [[RESULT]]
239; GCN-NEXT: s_endpgm
240define amdgpu_kernel void @zext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
241  %icmp0 = icmp ne i32 %a, %b
242  %ext = zext i1 %icmp0 to i32
243  %icmp1 = icmp ne i32 %ext, 2
244  store i1 %icmp1, i1 addrspace(1)* %out
245  ret void
246}
247
248; FUNC-LABEL: {{^}}zext_bool_icmp_eq_k:
249; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
250; GCN: buffer_store_byte [[RESULT]]
251; GCN-NEXT: s_endpgm
252define amdgpu_kernel void @zext_bool_icmp_eq_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
253  %icmp0 = icmp ne i32 %a, %b
254  %ext = zext i1 %icmp0 to i32
255  %icmp1 = icmp eq i32 %ext, 2
256  store i1 %icmp1, i1 addrspace(1)* %out
257  ret void
258}
259
260; FIXME: These cases should really be able fold to true/false in
261; DAGCombiner
262
263; This really folds away to false
264; FUNC-LABEL: {{^}}sext_bool_icmp_eq_1:
265; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0{{$}}
266; GCN: buffer_store_byte [[K]]
267define amdgpu_kernel void @sext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
268  %icmp0 = icmp eq i32 %a, %b
269  %ext = sext i1 %icmp0 to i32
270  %icmp1 = icmp eq i32 %ext, 1
271  store i1 %icmp1, i1 addrspace(1)* %out
272  ret void
273}
274
275; FUNC-LABEL: {{^}}sext_bool_icmp_ne_1:
276; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 1{{$}}
277; GCN: buffer_store_byte [[K]]
278define amdgpu_kernel void @sext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
279  %icmp0 = icmp ne i32 %a, %b
280  %ext = sext i1 %icmp0 to i32
281  %icmp1 = icmp ne i32 %ext, 1
282  store i1 %icmp1, i1 addrspace(1)* %out
283  ret void
284}
285
286; FUNC-LABEL: {{^}}sext_bool_icmp_ne_k:
287; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 1{{$}}
288; GCN: buffer_store_byte [[K]]
289define amdgpu_kernel void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
290  %icmp0 = icmp ne i32 %a, %b
291  %ext = sext i1 %icmp0 to i32
292  %icmp1 = icmp ne i32 %ext, 2
293  store i1 %icmp1, i1 addrspace(1)* %out
294  ret void
295}
296