1; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI,FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI,FUNC %s
3; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,FUNC %s
4; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,FUNC %s
5
6; FUNC-LABEL: {{^}}s_add_i32:
7; GCN: s_add_i32 s[[REG:[0-9]+]], {{s[0-9]+, s[0-9]+}}
8; GCN: v_mov_b32_e32 v[[V_REG:[0-9]+]], s[[REG]]
9; GCN: buffer_store_dword v[[V_REG]],
10define amdgpu_kernel void @s_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
11  %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
12  %a = load i32, i32 addrspace(1)* %in
13  %b = load i32, i32 addrspace(1)* %b_ptr
14  %result = add i32 %a, %b
15  store i32 %result, i32 addrspace(1)* %out
16  ret void
17}
18
19; FUNC-LABEL: {{^}}s_add_v2i32:
20; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
21; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
22define amdgpu_kernel void @s_add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
23  %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
24  %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
25  %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
26  %result = add <2 x i32> %a, %b
27  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
28  ret void
29}
30
31; FUNC-LABEL: {{^}}s_add_v4i32:
32; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
33; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
34; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
35; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
36define amdgpu_kernel void @s_add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
37  %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
38  %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
39  %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
40  %result = add <4 x i32> %a, %b
41  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
42  ret void
43}
44
45; FUNC-LABEL: {{^}}s_add_v8i32:
46; GCN: s_add_i32
47; GCN: s_add_i32
48; GCN: s_add_i32
49; GCN: s_add_i32
50; GCN: s_add_i32
51; GCN: s_add_i32
52; GCN: s_add_i32
53; GCN: s_add_i32
54define amdgpu_kernel void @s_add_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) {
55entry:
56  %0 = add <8 x i32> %a, %b
57  store <8 x i32> %0, <8 x i32> addrspace(1)* %out
58  ret void
59}
60
61; FUNC-LABEL: {{^}}s_add_v16i32:
62; GCN: s_add_i32
63; GCN: s_add_i32
64; GCN: s_add_i32
65; GCN: s_add_i32
66; GCN: s_add_i32
67; GCN: s_add_i32
68; GCN: s_add_i32
69; GCN: s_add_i32
70; GCN: s_add_i32
71; GCN: s_add_i32
72; GCN: s_add_i32
73; GCN: s_add_i32
74; GCN: s_add_i32
75; GCN: s_add_i32
76; GCN: s_add_i32
77; GCN: s_add_i32
78define amdgpu_kernel void @s_add_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) {
79entry:
80  %0 = add <16 x i32> %a, %b
81  store <16 x i32> %0, <16 x i32> addrspace(1)* %out
82  ret void
83}
84
85; FUNC-LABEL: {{^}}v_add_i32:
86; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
87; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
88; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, [[A]], [[B]]
89; GFX9: v_add_u32_e32 v{{[0-9]+}}, [[A]], [[B]]
90; GFX10: v_add_nc_u32_e32 v{{[0-9]+}}, [[A]], [[B]]
91define amdgpu_kernel void @v_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
92  %tid = call i32 @llvm.amdgcn.workitem.id.x()
93  %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid
94  %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1
95  %a = load volatile i32, i32 addrspace(1)* %gep
96  %b = load volatile i32, i32 addrspace(1)* %b_ptr
97  %result = add i32 %a, %b
98  store i32 %result, i32 addrspace(1)* %out
99  ret void
100}
101
102; FUNC-LABEL: {{^}}v_add_imm_i32:
103; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
104; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, 0x7b, [[A]]
105; GFX9: v_add_u32_e32 v{{[0-9]+}}, 0x7b, [[A]]
106; GFX10: v_add_nc_u32_e32 v{{[0-9]+}}, 0x7b, [[A]]
107define amdgpu_kernel void @v_add_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
108  %tid = call i32 @llvm.amdgcn.workitem.id.x()
109  %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid
110  %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1
111  %a = load volatile i32, i32 addrspace(1)* %gep
112  %result = add i32 %a, 123
113  store i32 %result, i32 addrspace(1)* %out
114  ret void
115}
116
117; FUNC-LABEL: {{^}}add64:
118; GCN: s_add_u32
119; GCN: s_addc_u32
120define amdgpu_kernel void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
121entry:
122  %add = add i64 %a, %b
123  store i64 %add, i64 addrspace(1)* %out
124  ret void
125}
126
127; The v_addc_u32 and v_add_i32 instruction can't read SGPRs, because they
128; use VCC.  The test is designed so that %a will be stored in an SGPR and
129; %0 will be stored in a VGPR, so the comiler will be forced to copy %a
130; to a VGPR before doing the add.
131
132; FUNC-LABEL: {{^}}add64_sgpr_vgpr:
133; GCN-NOT: v_addc_u32_e32 s
134define amdgpu_kernel void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) {
135entry:
136  %0 = load i64, i64 addrspace(1)* %in
137  %1 = add i64 %a, %0
138  store i64 %1, i64 addrspace(1)* %out
139  ret void
140}
141
142; Test i64 add inside a branch.
143; FUNC-LABEL: {{^}}add64_in_branch:
144; GCN: s_add_u32
145; GCN: s_addc_u32
146define amdgpu_kernel void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
147entry:
148  %0 = icmp eq i64 %a, 0
149  br i1 %0, label %if, label %else
150
151if:
152  %1 = load i64, i64 addrspace(1)* %in
153  br label %endif
154
155else:
156  %2 = add i64 %a, %b
157  br label %endif
158
159endif:
160  %3 = phi i64 [%1, %if], [%2, %else]
161  store i64 %3, i64 addrspace(1)* %out
162  ret void
163}
164
165; Make sure the VOP3 form of add is initially selected. Otherwise pair
166; of opies from/to VCC would be necessary
167
168; GCN-LABEL: {{^}}add_select_vop3:
169; SI: v_add_i32_e64 v0, s[0:1], s0, v0
170; VI: v_add_u32_e64 v0, s[0:1], s0, v0
171; GFX9: v_add_u32_e32 v0, s0, v0
172; GFX10: v_add_nc_u32_e32 v0, s0, v0
173
174; GCN: ; def vcc
175; GCN: ds_write_b32
176; GCN: ; use vcc
177define amdgpu_ps void @add_select_vop3(i32 inreg %s, i32 %v) {
178  %vcc = call i64 asm sideeffect "; def vcc", "={vcc}"()
179  %sub = add i32 %v, %s
180  store i32 %sub, i32 addrspace(3)* undef
181  call void asm sideeffect "; use vcc", "{vcc}"(i64 %vcc)
182  ret void
183}
184
185declare i32 @llvm.amdgcn.workitem.id.x() #1
186
187attributes #0 = { nounwind }
188attributes #1 = { nounwind readnone speculatable }
189