1; RUN: llc -march=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -enable-var-scope -check-prefixes=GCN-ISEL                %s
2
3; RUN: llc -march=amdgcn -mcpu=verde   -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CISI    %s
4; RUN: llc -march=amdgcn -mcpu=fiji    -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI      %s
5; RUN: llc -march=amdgcn -mcpu=gfx900  -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9    %s
6; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,GFX1010,GFX10W32 %s
7; RUN: llc -march=amdgcn -mcpu=gfx1030 -mattr=+wavefrontsize32 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,GFX1030,GFX10W32 %s
8; RUN: llc -march=amdgcn -mcpu=gfx1030 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,GFX1030,GFX10W64 %s
9; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX11 %s
10
11; GCN-ISEL-LABEL: name:   sadd64rr
12; GCN-ISEL-LABEL: body:
13; GCN-ISEL-LABEL: bb.0.entry:
14; GCN-ISEL: S_ADD_U64_PSEUDO
15
16; GCN-LABEL: @sadd64rr
17; GCN:       s_add_u32
18; GCN:       s_addc_u32
19define amdgpu_kernel void @sadd64rr(i64 addrspace(1)* %out, i64 %a, i64 %b) {
20entry:
21  %add = add i64 %a, %b
22  store i64 %add, i64 addrspace(1)* %out
23  ret void
24}
25
26; GCN-ISEL-LABEL: name:   sadd64ri
27; GCN-ISEL-LABEL: body:
28; GCN-ISEL-LABEL: bb.0.entry:
29; GCN-ISEL: S_ADD_U64_PSEUDO
30
31; GCN-LABEL: @sadd64ri
32; GCN:       s_add_u32  s{{[0-9]+}}, s{{[0-9]+}}, 0x56789876
33; GCN:       s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x1234
34define amdgpu_kernel void @sadd64ri(i64 addrspace(1)* %out, i64 %a) {
35entry:
36  %add = add i64 20015998343286, %a
37  store i64 %add, i64 addrspace(1)* %out
38  ret void
39}
40
41; GCN-ISEL-LABEL: name:   vadd64rr
42; GCN-ISEL-LABEL: body:
43; GCN-ISEL-LABEL: bb.0.entry:
44; GCN-ISEL: V_ADD_U64_PSEUDO
45
46; GCN-LABEL: @vadd64rr
47;
48; CISI:	v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
49; CISI:	v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
50;
51; VI:	v_add_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
52; VI:	v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
53;
54; GFX9:	v_add_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
55; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
56;
57; GFX10W32: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}
58; GFX10W64: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, v{{[0-9]+}}
59; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]]
60; GFX1030: v_add_co_ci_u32_e64 v{{[0-9]+}}, null, s{{[0-9]+}}, 0, [[CARRY]]
61;
62; GFX11: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}
63; GFX11: v_add_co_ci_u32_e64 v{{[0-9]+}}, null, s{{[0-9]+}}, 0, [[CARRY]]
64define amdgpu_kernel void @vadd64rr(i64 addrspace(1)* %out, i64 %a) {
65entry:
66  %tid = call i32 @llvm.amdgcn.workitem.id.x()
67  %tid.ext = sext i32 %tid to i64
68  %add = add i64 %a, %tid.ext
69  store i64 %add, i64 addrspace(1)* %out
70  ret void
71}
72
73; GCN-ISEL-LABEL: name:   vadd64ri
74; GCN-ISEL-LABEL: body:
75; GCN-ISEL-LABEL: bb.0.entry:
76; GCN-ISEL: V_ADD_U64_PSEUDO
77
78; GCN-LABEL: @vadd64ri
79;
80; CISI:	v_add_i32_e32 v0, vcc, 0x56789876, v0
81; CISI:	v_mov_b32_e32 v1, 0x1234
82; CISI: v_addc_u32_e32 v1, vcc, 0, v1, vcc
83;
84; VI: v_add_u32_e32 v0, vcc, 0x56789876, v0
85; VI: v_mov_b32_e32 v1, 0x1234
86; VI: v_addc_u32_e32 v1, vcc, 0, v1, vcc
87;
88; GFX9:	v_add_co_u32_e32 v0, vcc, 0x56789876, v0
89; GFX9: v_mov_b32_e32 v1, 0x1234
90; GFX9: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
91;
92; GFX10W32: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], 0x56789876, v{{[0-9]+}}
93; GFX10W64: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s\[[0-9]+:[0-9]+\]]], 0x56789876, v{{[0-9]+}}
94; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], 0, 0x1234, [[CARRY]]
95; GFX1030: v_add_co_ci_u32_e64 v{{[0-9]+}}, null, 0, 0x1234, [[CARRY]]
96;
97; GFX11: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], 0x56789876, v{{[0-9]+}}
98; GFX11: v_add_co_ci_u32_e64 v{{[0-9]+}}, null, 0, 0x1234, [[CARRY]]
99define amdgpu_kernel void @vadd64ri(i64 addrspace(1)* %out) {
100entry:
101  %tid = call i32 @llvm.amdgcn.workitem.id.x()
102  %tid.ext = sext i32 %tid to i64
103  %add = add i64 20015998343286, %tid.ext
104  store i64 %add, i64 addrspace(1)* %out
105  ret void
106}
107
108; GCN-ISEL-LABEL: name:   suaddo32
109; GCN-ISEL-LABEL: body:
110; GCN-ISEL-LABEL: bb.0
111; GCN-ISEL: S_ADD_I32
112define amdgpu_kernel void @suaddo32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) #0 {
113  %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
114  %val = extractvalue { i32, i1 } %uadd, 0
115  %carry = extractvalue { i32, i1 } %uadd, 1
116  store i32 %val, i32 addrspace(1)* %out, align 4
117  ret void
118}
119
120
121; GCN-ISEL-LABEL: name:   uaddo32_vcc_user
122; GCN-ISEL-LABEL: body:
123; GCN-ISEL-LABEL: bb.0
124; GCN-ISEL: V_ADD_CO_U32_e64
125
126; below we check selection to v_add/addc
127; because the only user of VCC produced by the UADDOis v_cndmask.
128; We select to VALU form to avoid unnecessary s_cselect to copy SCC to VCC
129
130; GCN-LABEL: @uaddo32_vcc_user
131;
132; CISI:	v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
133; CISI:	v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
134;
135; VI:	v_add_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
136; VI:	v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
137;
138; GFX9:	v_add_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
139; GFX9:	v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
140;
141; GFX10W32: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, s{{[0-9]+}}
142; GFX10W64: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, s{{[0-9]+}}
143; GFX10: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, [[CARRY]]
144;
145; GFX11: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, s{{[0-9]+}}
146; GFX11: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, [[CARRY]]
147define amdgpu_kernel void @uaddo32_vcc_user(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) #0 {
148  %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
149  %val = extractvalue { i32, i1 } %uadd, 0
150  %carry = extractvalue { i32, i1 } %uadd, 1
151  store i32 %val, i32 addrspace(1)* %out, align 4
152  store i1 %carry, i1 addrspace(1)* %carryout
153  ret void
154}
155
156; GCN-ISEL-LABEL: name:   suaddo64
157; GCN-ISEL-LABEL: body:
158; GCN-ISEL-LABEL: bb.0
159; GCN-ISEL: S_ADD_U64_PSEUDO
160
161; GCN-LABEL: @suaddo64
162;
163; GCN: s_add_u32
164; GCN: s_addc_u32
165define amdgpu_kernel void @suaddo64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) #0 {
166  %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
167  %val = extractvalue { i64, i1 } %uadd, 0
168  %carry = extractvalue { i64, i1 } %uadd, 1
169  store i64 %val, i64 addrspace(1)* %out, align 8
170  store i1 %carry, i1 addrspace(1)* %carryout
171  ret void
172}
173
174; GCN-ISEL-LABEL: name:   vuaddo64
175; GCN-ISEL-LABEL: body:
176; GCN-ISEL-LABEL: bb.0
177; GCN-ISEL: V_ADD_U64_PSEUDO
178
179; GCN-LABEL: @vuaddo64
180;
181; CISI:	v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0
182; CISI:	v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
183;
184; VI:	v_add_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0
185; VI:	v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
186;
187; GFX9:	v_add_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0
188; GFX9:	v_addc_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
189;
190; GFX10W32: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v0
191; GFX10W64: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, v0
192; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]]
193; GFX1030: v_add_co_ci_u32_e64 v{{[0-9]+}}, null, s{{[0-9]+}}, 0, [[CARRY]]
194;
195; GFX11: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v0
196; GFX11: v_add_co_ci_u32_e64 v{{[0-9]+}}, null, s{{[0-9]+}}, 0, [[CARRY]]
197define amdgpu_kernel void @vuaddo64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a) #0 {
198  %tid = call i32 @llvm.amdgcn.workitem.id.x()
199  %tid.ext = sext i32 %tid to i64
200  %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %tid.ext)
201  %val = extractvalue { i64, i1 } %uadd, 0
202  %carry = extractvalue { i64, i1 } %uadd, 1
203  store i64 %val, i64 addrspace(1)* %out, align 8
204  store i1 %carry, i1 addrspace(1)* %carryout
205  ret void
206}
207
208; GCN-ISEL-LABEL: name:   ssub64rr
209; GCN-ISEL-LABEL: body:
210; GCN-ISEL-LABEL: bb.0.entry:
211; GCN-ISEL: S_SUB_U64_PSEUDO
212
213; GCN-LABEL: @ssub64rr
214; GCN:       s_sub_u32
215; GCN:       s_subb_u32
216define amdgpu_kernel void @ssub64rr(i64 addrspace(1)* %out, i64 %a, i64 %b) {
217entry:
218  %sub = sub i64 %a, %b
219  store i64 %sub, i64 addrspace(1)* %out
220  ret void
221}
222
223; GCN-ISEL-LABEL: name:   ssub64ri
224; GCN-ISEL-LABEL: body:
225; GCN-ISEL-LABEL: bb.0.entry:
226; GCN-ISEL: S_SUB_U64_PSEUDO
227
228; GCN-LABEL: @ssub64ri
229; GCN:       s_sub_u32  s{{[0-9]+}}, 0x56789876, s{{[0-9]+}}
230; GCN:       s_subb_u32 s{{[0-9]+}}, 0x1234, s{{[0-9]+}}
231define amdgpu_kernel void @ssub64ri(i64 addrspace(1)* %out, i64 %a) {
232entry:
233  %sub = sub i64 20015998343286, %a
234  store i64 %sub, i64 addrspace(1)* %out
235  ret void
236}
237
238; GCN-ISEL-LABEL: name:   vsub64rr
239; GCN-ISEL-LABEL: body:
240; GCN-ISEL-LABEL: bb.0.entry:
241; GCN-ISEL: V_SUB_U64_PSEUDO
242
243; GCN-LABEL: @vsub64rr
244;
245; CISI:	v_sub_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
246; CISI:	v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
247;
248; VI:	v_sub_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
249; VI:	v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
250;
251; GFX9:	v_sub_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
252; GFX9: v_subbrev_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
253;
254; GFX10W32: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}
255; GFX10W64: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, v{{[0-9]+}}
256; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]]
257; GFX1030: v_sub_co_ci_u32_e64 v{{[0-9]+}}, null, s{{[0-9]+}}, 0, [[CARRY]]
258;
259; GFX11: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}
260; GFX11: v_sub_co_ci_u32_e64 v{{[0-9]+}}, null, s{{[0-9]+}}, 0, [[CARRY]]
261define amdgpu_kernel void @vsub64rr(i64 addrspace(1)* %out, i64 %a) {
262entry:
263  %tid = call i32 @llvm.amdgcn.workitem.id.x()
264  %tid.ext = sext i32 %tid to i64
265  %sub = sub i64 %a, %tid.ext
266  store i64 %sub, i64 addrspace(1)* %out
267  ret void
268}
269
270; GCN-ISEL-LABEL: name:   vsub64ri
271; GCN-ISEL-LABEL: body:
272; GCN-ISEL-LABEL: bb.0.entry:
273; GCN-ISEL: V_SUB_U64_PSEUDO
274
275; GCN-LABEL: @vsub64ri
276;
277; CISI:	v_sub_i32_e32 v0, vcc, 0x56789876, v0
278; CISI:	v_mov_b32_e32 v1, 0x1234
279; CISI: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
280;
281; VI: v_sub_u32_e32 v0, vcc, 0x56789876, v0
282; VI: v_mov_b32_e32 v1, 0x1234
283; VI: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
284;
285; GFX9:	v_sub_co_u32_e32 v0, vcc, 0x56789876, v0
286; GFX9: v_mov_b32_e32 v1, 0x1234
287; GFX9: v_subbrev_co_u32_e32 v1, vcc, 0, v1, vcc
288;
289; GFX10W32: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], 0x56789876, v{{[0-9]+}}
290; GFX10W64: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s\[[0-9]+:[0-9]+\]]], 0x56789876, v{{[0-9]+}}
291; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], 0x1234, 0, [[CARRY]]
292; GFX1030: v_sub_co_ci_u32_e64 v{{[0-9]+}}, null, 0x1234, 0, [[CARRY]]
293;
294; GFX11: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], 0x56789876, v{{[0-9]+}}
295; GFX11: v_sub_co_ci_u32_e64 v{{[0-9]+}}, null, 0x1234, 0, [[CARRY]]
296define amdgpu_kernel void @vsub64ri(i64 addrspace(1)* %out) {
297entry:
298  %tid = call i32 @llvm.amdgcn.workitem.id.x()
299  %tid.ext = sext i32 %tid to i64
300  %sub = sub i64 20015998343286, %tid.ext
301  store i64 %sub, i64 addrspace(1)* %out
302  ret void
303}
304
305; GCN-ISEL-LABEL: name:   susubo32
306; GCN-ISEL-LABEL: body:
307; GCN-ISEL-LABEL: bb.0
308; GCN-ISEL: S_SUB_I32
309define amdgpu_kernel void @susubo32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) #0 {
310  %usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
311  %val = extractvalue { i32, i1 } %usub, 0
312  %carry = extractvalue { i32, i1 } %usub, 1
313  store i32 %val, i32 addrspace(1)* %out, align 4
314  ret void
315}
316
317
318; GCN-ISEL-LABEL: name:   usubo32_vcc_user
319; GCN-ISEL-LABEL: body:
320; GCN-ISEL-LABEL: bb.0
321; GCN-ISEL: V_SUB_CO_U32_e64
322
323; below we check selection to v_sub/subb
324; because the only user of VCC produced by the USUBOis v_cndmask.
325; We select to VALU form to avoid unnecessary s_cselect to copy SCC to VCC
326
327; GCN-LABEL: @usubo32_vcc_user
328;
329; CISI:	v_sub_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
330; CISI:	v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
331;
332; VI:	v_sub_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
333; VI:	v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
334;
335; GFX9:	v_sub_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
336; GFX9:	v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
337;
338; GFX10W32: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, s{{[0-9]+}}
339; GFX10W64: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, s{{[0-9]+}}
340; GFX10: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, [[CARRY]]
341;
342; GFX11: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, s{{[0-9]+}}
343; GFX11: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, [[CARRY]]
344define amdgpu_kernel void @usubo32_vcc_user(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) #0 {
345  %usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
346  %val = extractvalue { i32, i1 } %usub, 0
347  %carry = extractvalue { i32, i1 } %usub, 1
348  store i32 %val, i32 addrspace(1)* %out, align 4
349  store i1 %carry, i1 addrspace(1)* %carryout
350  ret void
351}
352
353; GCN-ISEL-LABEL: name:   susubo64
354; GCN-ISEL-LABEL: body:
355; GCN-ISEL-LABEL: bb.0
356; GCN-ISEL: S_SUB_U64_PSEUDO
357
358; GCN-LABEL: @susubo64
359;
360; GCN: s_sub_u32
361; GCN: s_subb_u32
362define amdgpu_kernel void @susubo64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) #0 {
363  %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
364  %val = extractvalue { i64, i1 } %usub, 0
365  %carry = extractvalue { i64, i1 } %usub, 1
366  store i64 %val, i64 addrspace(1)* %out, align 8
367  store i1 %carry, i1 addrspace(1)* %carryout
368  ret void
369}
370
371; GCN-ISEL-LABEL: name:   vusubo64
372; GCN-ISEL-LABEL: body:
373; GCN-ISEL-LABEL: bb.0
374; GCN-ISEL: V_SUB_U64_PSEUDO
375
376; GCN-LABEL: @vusubo64
377;
378; CISI:	v_sub_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0
379; CISI:	v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
380;
381; VI:	v_sub_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0
382; VI:	v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
383;
384; GFX9:	v_sub_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0
385; GFX9:	v_subbrev_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
386;
387; GFX10W32: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v0
388; GFX10W64: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, v0
389; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]]
390; GFX1030: v_sub_co_ci_u32_e64 v{{[0-9]+}}, null, s{{[0-9]+}}, 0, [[CARRY]]
391;
392; GFX11: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v0
393; GFX11: v_sub_co_ci_u32_e64 v{{[0-9]+}}, null, s{{[0-9]+}}, 0, [[CARRY]]
394define amdgpu_kernel void @vusubo64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a) #0 {
395  %tid = call i32 @llvm.amdgcn.workitem.id.x()
396  %tid.ext = sext i32 %tid to i64
397  %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %tid.ext)
398  %val = extractvalue { i64, i1 } %usub, 0
399  %carry = extractvalue { i64, i1 } %usub, 1
400  store i64 %val, i64 addrspace(1)* %out, align 8
401  store i1 %carry, i1 addrspace(1)* %carryout
402  ret void
403}
404
405; GCN-ISEL-LABEL: name:   sudiv64
406; GCN-ISEL-LABEL: body:
407; GCN-ISEL-LABEL: bb.3
408; GCN-ISEL: %[[CARRY:[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64
409; GCN-ISEL: S_ADD_CO_PSEUDO %{{[0-9]+}}, killed %{{[0-9]+}}, killed %[[CARRY]]
410; GCN-ISEL: %[[CARRY:[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64
411; GCN-ISEL: S_SUB_CO_PSEUDO killed %{{[0-9]+}}, %{{[0-9]+}}, %[[CARRY]]
412define amdgpu_kernel void @sudiv64(i64 addrspace(1)* %out, i64 %x, i64 %y) {
413  %result = udiv i64 %x, %y
414  store i64 %result, i64 addrspace(1)* %out
415  ret void
416}
417
418
419
420declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) #1
421
422declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1
423
424declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64) #1
425
426declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #1
427
428declare i32 @llvm.amdgcn.workitem.id.x() #1
429
430attributes #0 = { nounwind }
431attributes #1 = { nounwind readnone }
432
433