1; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefixes=SI,GCN,MESA-GCN,FUNC
2; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefixes=VI,GCN,MESA-VI,MESA-GCN,FUNC
3; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs | FileCheck %s --check-prefixes=VI,GCN,HSA-VI,FUNC
4; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
5; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
6
7; FUNC-LABEL: {{^}}i8_arg:
8; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
9; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
10; MESA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
11; MESA-GCN: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff
12; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8
13; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0
14; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]]
15; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]]
16; FIXME: Should be using s_load_dword
17; HSA-VI: flat_load_ubyte v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]]
18
19define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
20entry:
21  %0 = zext i8 %in to i32
22  store i32 %0, i32 addrspace(1)* %out, align 4
23  ret void
24}
25
26; FUNC-LABEL: {{^}}i8_zext_arg:
27; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
28; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
29; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
30; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8
31; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0
32; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]]
33; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]]
34; FIXME: Should be using s_load_dword
35; HSA-VI: flat_load_ubyte v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]]
36
37define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
38entry:
39  %0 = zext i8 %in to i32
40  store i32 %0, i32 addrspace(1)* %out, align 4
41  ret void
42}
43
44; FUNC-LABEL: {{^}}i8_sext_arg:
45; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
46; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
47; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
48; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8
49; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0
50; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]]
51; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]]
52; FIXME: Should be using s_load_dword
53; HSA-VI: flat_load_sbyte v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]]
54
55define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
56entry:
57  %0 = sext i8 %in to i32
58  store i32 %0, i32 addrspace(1)* %out, align 4
59  ret void
60}
61
62; FUNC-LABEL: {{^}}i16_arg:
63; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
64; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
65; MESA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
66; MESA-GCN: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff
67; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8
68; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0
69; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]]
70; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]]
71; FIXME: Should be using s_load_dword
72; HSA-VI: flat_load_ushort v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]]
73
74define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
75entry:
76  %0 = zext i16 %in to i32
77  store i32 %0, i32 addrspace(1)* %out, align 4
78  ret void
79}
80
81; FUNC-LABEL: {{^}}i16_zext_arg:
82; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
83; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
84; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
85; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8
86; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0
87; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]]
88; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]]
89; FIXME: Should be using s_load_dword
90; HSA-VI: flat_load_ushort v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]]
91
92define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
93entry:
94  %0 = zext i16 %in to i32
95  store i32 %0, i32 addrspace(1)* %out, align 4
96  ret void
97}
98
99; FUNC-LABEL: {{^}}i16_sext_arg:
100; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
101; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
102; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
103; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8
104; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0
105; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]]
106; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]]
107; FIXME: Should be using s_load_dword
108; HSA-VI: flat_load_sshort v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]]
109
110define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
111entry:
112  %0 = sext i16 %in to i32
113  store i32 %0, i32 addrspace(1)* %out, align 4
114  ret void
115}
116
117; FUNC-LABEL: {{^}}i32_arg:
118; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
119; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
120; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
121; HSA-VI: s_load_dword s{{[0-9]}}, s[4:5], 0x8
122define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
123entry:
124  store i32 %in, i32 addrspace(1)* %out, align 4
125  ret void
126}
127
128; FUNC-LABEL: {{^}}f32_arg:
129; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
130; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
131; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
132; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x8
133define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
134entry:
135  store float %in, float addrspace(1)* %out, align 4
136  ret void
137}
138
139; FUNC-LABEL: {{^}}v2i8_arg:
140; EG: VTX_READ_8
141; EG: VTX_READ_8
142; MESA-GCN: buffer_load_ubyte
143; MESA-GCN: buffer_load_ubyte
144; HSA-VI: flat_load_ubyte
145; HSA-VI: flat_load_ubyte
146define void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
147entry:
148  store <2 x i8> %in, <2 x i8> addrspace(1)* %out
149  ret void
150}
151
152; FUNC-LABEL: {{^}}v2i16_arg:
153; EG: VTX_READ_16
154; EG: VTX_READ_16
155; MESA-GCN: buffer_load_ushort
156; MESA-GCN: buffer_load_ushort
157; HSA-VI: flat_load_ushort
158; HSA-VI: flat_load_ushort
159define void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
160entry:
161  store <2 x i16> %in, <2 x i16> addrspace(1)* %out
162  ret void
163}
164
165; FUNC-LABEL: {{^}}v2i32_arg:
166; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
167; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
168; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
169; MESA-VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c
170; HSA-VI: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x8
171define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
172entry:
173  store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
174  ret void
175}
176
177; FUNC-LABEL: {{^}}v2f32_arg:
178; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
179; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
180; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
181; MESA-VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c
182; HSA-VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[4:5], 0x8
183define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
184entry:
185  store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
186  ret void
187}
188
189; FUNC-LABEL: {{^}}v3i8_arg:
190; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40
191; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41
192; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42
193; MESA-GCN: buffer_load_ubyte
194; MESA-GCN: buffer_load_ubyte
195; MESA-GCN: buffer_load_ubyte
196; HSA-VI: flat_load_ubyte
197; HSA-VI: flat_load_ubyte
198; HSA-VI: flat_load_ubyte
199define void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind {
200entry:
201  store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4
202  ret void
203}
204
205; FUNC-LABEL: {{^}}v3i16_arg:
206; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44
207; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46
208; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48
209; MESA-GCN: buffer_load_ushort
210; MESA-GCN: buffer_load_ushort
211; MESA-GCN: buffer_load_ushort
212; HSA-VI: flat_load_ushort
213; HSA-VI: flat_load_ushort
214; HSA-VI: flat_load_ushort
215define void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind {
216entry:
217  store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4
218  ret void
219}
220; FUNC-LABEL: {{^}}v3i32_arg:
221; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
222; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
223; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
224; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
225; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34
226; HSA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x10
227define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
228entry:
229  store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
230  ret void
231}
232
233; FUNC-LABEL: {{^}}v3f32_arg:
234; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
235; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
236; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
237; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
238; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34
239; HSA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x10
240define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
241entry:
242  store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
243  ret void
244}
245
246; FUNC-LABEL: {{^}}v4i8_arg:
247; EG: VTX_READ_8
248; EG: VTX_READ_8
249; EG: VTX_READ_8
250; EG: VTX_READ_8
251; MESA-GCN: buffer_load_ubyte
252; MESA-GCN: buffer_load_ubyte
253; MESA-GCN: buffer_load_ubyte
254; MESA-GCN: buffer_load_ubyte
255; HSA-VI: flat_load_ubyte
256; HSA-VI: flat_load_ubyte
257; HSA-VI: flat_load_ubyte
258; HSA-VI: flat_load_ubyte
259define void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
260entry:
261  store <4 x i8> %in, <4 x i8> addrspace(1)* %out
262  ret void
263}
264
265; FUNC-LABEL: {{^}}v4i16_arg:
266; EG: VTX_READ_16
267; EG: VTX_READ_16
268; EG: VTX_READ_16
269; EG: VTX_READ_16
270; MESA-GCN: buffer_load_ushort
271; MESA-GCN: buffer_load_ushort
272; MESA-GCN: buffer_load_ushort
273; MESA-GCN: buffer_load_ushort
274; HSA-GCN: flat_load_ushort
275; HSA-GCN: flat_load_ushort
276; HSA-GCN: flat_load_ushort
277; HSA-GCN: flat_load_ushort
278define void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
279entry:
280  store <4 x i16> %in, <4 x i16> addrspace(1)* %out
281  ret void
282}
283
284; FUNC-LABEL: {{^}}v4i32_arg:
285; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
286; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
287; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
288; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
289; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
290; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34
291; HSA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x10
292define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
293entry:
294  store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
295  ret void
296}
297
298; FUNC-LABEL: {{^}}v4f32_arg:
299; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
300; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
301; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
302; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
303; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
304; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34
305; HSA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x10
306define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
307entry:
308  store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
309  ret void
310}
311
312; FUNC-LABEL: {{^}}v8i8_arg:
313; EG: VTX_READ_8
314; EG: VTX_READ_8
315; EG: VTX_READ_8
316; EG: VTX_READ_8
317; EG: VTX_READ_8
318; EG: VTX_READ_8
319; EG: VTX_READ_8
320; EG: VTX_READ_8
321; MESA-GCN: buffer_load_ubyte
322; MESA-GCN: buffer_load_ubyte
323; MESA-GCN: buffer_load_ubyte
324; MESA-GCN: buffer_load_ubyte
325; MESA-GCN: buffer_load_ubyte
326; MESA-GCN: buffer_load_ubyte
327; MESA-GCN: buffer_load_ubyte
328; HSA-GCN: float_load_ubyte
329; HSA-GCN: float_load_ubyte
330; HSA-GCN: float_load_ubyte
331; HSA-GCN: float_load_ubyte
332; HSA-GCN: float_load_ubyte
333; HSA-GCN: float_load_ubyte
334; HSA-GCN: float_load_ubyte
335; HSA-GCN: float_load_ubyte
336define void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
337entry:
338  store <8 x i8> %in, <8 x i8> addrspace(1)* %out
339  ret void
340}
341
342; FUNC-LABEL: {{^}}v8i16_arg:
343; EG: VTX_READ_16
344; EG: VTX_READ_16
345; EG: VTX_READ_16
346; EG: VTX_READ_16
347; EG: VTX_READ_16
348; EG: VTX_READ_16
349; EG: VTX_READ_16
350; EG: VTX_READ_16
351; MESA-GCN: buffer_load_ushort
352; MESA-GCN: buffer_load_ushort
353; MESA-GCN: buffer_load_ushort
354; MESA-GCN: buffer_load_ushort
355; MESA-GCN: buffer_load_ushort
356; MESA-GCN: buffer_load_ushort
357; MESA-GCN: buffer_load_ushort
358; MESA-GCN: buffer_load_ushort
359; HSA-VI: flat_load_ushort
360; HSA-VI: flat_load_ushort
361; HSA-VI: flat_load_ushort
362; HSA-VI: flat_load_ushort
363; HSA-VI: flat_load_ushort
364; HSA-VI: flat_load_ushort
365; HSA-VI: flat_load_ushort
366; HSA-VI: flat_load_ushort
367define void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) {
368entry:
369  store <8 x i16> %in, <8 x i16> addrspace(1)* %out
370  ret void
371}
372
373; FUNC-LABEL: {{^}}v8i32_arg:
374; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
375; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
376; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
377; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X
378; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y
379; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
380; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
381; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
382; SI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x11
383; MESA-VI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x44
384; HSA-VI: s_load_dwordx8 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x20
385define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
386entry:
387  store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
388  ret void
389}
390
391; FUNC-LABEL: {{^}}v8f32_arg:
392; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
393; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
394; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
395; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X
396; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y
397; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
398; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
399; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
400; SI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x11
401define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
402entry:
403  store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4
404  ret void
405}
406
407; FUNC-LABEL: {{^}}v16i8_arg:
408; EG: VTX_READ_8
409; EG: VTX_READ_8
410; EG: VTX_READ_8
411; EG: VTX_READ_8
412; EG: VTX_READ_8
413; EG: VTX_READ_8
414; EG: VTX_READ_8
415; EG: VTX_READ_8
416; EG: VTX_READ_8
417; EG: VTX_READ_8
418; EG: VTX_READ_8
419; EG: VTX_READ_8
420; EG: VTX_READ_8
421; EG: VTX_READ_8
422; EG: VTX_READ_8
423; EG: VTX_READ_8
424; MESA-GCN: buffer_load_ubyte
425; MESA-GCN: buffer_load_ubyte
426; MESA-GCN: buffer_load_ubyte
427; MESA-GCN: buffer_load_ubyte
428; MESA-GCN: buffer_load_ubyte
429; MESA-GCN: buffer_load_ubyte
430; MESA-GCN: buffer_load_ubyte
431; MESA-GCN: buffer_load_ubyte
432; MESA-GCN: buffer_load_ubyte
433; MESA-GCN: buffer_load_ubyte
434; MESA-GCN: buffer_load_ubyte
435; MESA-GCN: buffer_load_ubyte
436; MESA-GCN: buffer_load_ubyte
437; MESA-GCN: buffer_load_ubyte
438; MESA-GCN: buffer_load_ubyte
439; MESA-GCN: buffer_load_ubyte
440; HSA-VI: flat_load_ubyte
441; HSA-VI: flat_load_ubyte
442; HSA-VI: flat_load_ubyte
443; HSA-VI: flat_load_ubyte
444; HSA-VI: flat_load_ubyte
445; HSA-VI: flat_load_ubyte
446; HSA-VI: flat_load_ubyte
447; HSA-VI: flat_load_ubyte
448; HSA-VI: flat_load_ubyte
449; HSA-VI: flat_load_ubyte
450; HSA-VI: flat_load_ubyte
451; HSA-VI: flat_load_ubyte
452; HSA-VI: flat_load_ubyte
453; HSA-VI: flat_load_ubyte
454; HSA-VI: flat_load_ubyte
455; HSA-VI: flat_load_ubyte
456define void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) {
457entry:
458  store <16 x i8> %in, <16 x i8> addrspace(1)* %out
459  ret void
460}
461
462; FUNC-LABEL: {{^}}v16i16_arg:
463; EG: VTX_READ_16
464; EG: VTX_READ_16
465; EG: VTX_READ_16
466; EG: VTX_READ_16
467; EG: VTX_READ_16
468; EG: VTX_READ_16
469; EG: VTX_READ_16
470; EG: VTX_READ_16
471; EG: VTX_READ_16
472; EG: VTX_READ_16
473; EG: VTX_READ_16
474; EG: VTX_READ_16
475; EG: VTX_READ_16
476; EG: VTX_READ_16
477; EG: VTX_READ_16
478; EG: VTX_READ_16
479; MESA-GCN: buffer_load_ushort
480; MESA-GCN: buffer_load_ushort
481; MESA-GCN: buffer_load_ushort
482; MESA-GCN: buffer_load_ushort
483; MESA-GCN: buffer_load_ushort
484; MESA-GCN: buffer_load_ushort
485; MESA-GCN: buffer_load_ushort
486; MESA-GCN: buffer_load_ushort
487; MESA-GCN: buffer_load_ushort
488; MESA-GCN: buffer_load_ushort
489; MESA-GCN: buffer_load_ushort
490; MESA-GCN: buffer_load_ushort
491; MESA-GCN: buffer_load_ushort
492; MESA-GCN: buffer_load_ushort
493; MESA-GCN: buffer_load_ushort
494; MESA-GCN: buffer_load_ushort
495; HSA-VI: flat_load_ushort
496; HSA-VI: flat_load_ushort
497; HSA-VI: flat_load_ushort
498; HSA-VI: flat_load_ushort
499; HSA-VI: flat_load_ushort
500; HSA-VI: flat_load_ushort
501; HSA-VI: flat_load_ushort
502; HSA-VI: flat_load_ushort
503; HSA-VI: flat_load_ushort
504; HSA-VI: flat_load_ushort
505; HSA-VI: flat_load_ushort
506; HSA-VI: flat_load_ushort
507; HSA-VI: flat_load_ushort
508; HSA-VI: flat_load_ushort
509; HSA-VI: flat_load_ushort
510; HSA-VI: flat_load_ushort
511define void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) {
512entry:
513  store <16 x i16> %in, <16 x i16> addrspace(1)* %out
514  ret void
515}
516
517; FUNC-LABEL: {{^}}v16i32_arg:
518; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
519; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
520; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
521; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
522; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
523; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
524; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
525; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
526; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
527; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
528; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W
529; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X
530; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y
531; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
532; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
533; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
534; SI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x19
535; MESA-VI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x64
536; HSA-VI: s_load_dwordx16 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x40
537define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
538entry:
539  store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
540  ret void
541}
542
543; FUNC-LABEL: {{^}}v16f32_arg:
544; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
545; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
546; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
547; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
548; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
549; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
550; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
551; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
552; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
553; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
554; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W
555; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X
556; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y
557; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
558; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
559; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
560; SI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x19
561; MESA-VI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x64
562; HSA-VI: s_load_dwordx16 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x40
563define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
564entry:
565  store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
566  ret void
567}
568
569; FUNC-LABEL: {{^}}kernel_arg_i64:
570; MESA-GCN: s_load_dwordx2
571; MESA-GCN: s_load_dwordx2
572; MESA-GCN: buffer_store_dwordx2
573; HSA-VI: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x8
574define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
575  store i64 %a, i64 addrspace(1)* %out, align 8
576  ret void
577}
578
579; FUNC-LABEL: {{^}}f64_kernel_arg:
580; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9
581; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb
582; MESA-VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x24
583; MESA-VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x2c
584; MESA-GCN: buffer_store_dwordx2
585; HSA-VI: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x8
586define void @f64_kernel_arg(double addrspace(1)* %out, double  %in) {
587entry:
588  store double %in, double addrspace(1)* %out
589  ret void
590}
591
592; XFUNC-LABEL: {{^}}kernel_arg_v1i64:
593; XGCN: s_load_dwordx2
594; XGCN: s_load_dwordx2
595; XGCN: buffer_store_dwordx2
596; define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind {
597;   store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
598;   ret void
599; }
600
601; FUNC-LABEL: {{^}}i1_arg:
602; SI: buffer_load_ubyte
603; SI: v_and_b32_e32
604; SI: buffer_store_byte
605; SI: s_endpgm
606define void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
607  store i1 %x, i1 addrspace(1)* %out, align 1
608  ret void
609}
610
611; FUNC-LABEL: {{^}}i1_arg_zext_i32:
612; SI: buffer_load_ubyte
613; SI: buffer_store_dword
614; SI: s_endpgm
615define void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
616  %ext = zext i1 %x to i32
617  store i32 %ext, i32 addrspace(1)* %out, align 4
618  ret void
619}
620
621; FUNC-LABEL: {{^}}i1_arg_zext_i64:
622; SI: buffer_load_ubyte
623; SI: buffer_store_dwordx2
624; SI: s_endpgm
625define void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
626  %ext = zext i1 %x to i64
627  store i64 %ext, i64 addrspace(1)* %out, align 8
628  ret void
629}
630
631; FUNC-LABEL: {{^}}i1_arg_sext_i32:
632; SI: buffer_load_ubyte
633; SI: buffer_store_dword
634; SI: s_endpgm
635define void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
636  %ext = sext i1 %x to i32
637  store i32 %ext, i32addrspace(1)* %out, align 4
638  ret void
639}
640
641; FUNC-LABEL: {{^}}i1_arg_sext_i64:
642; SI: buffer_load_ubyte
643; SI: v_bfe_i32
644; SI: v_ashrrev_i32
645; SI: buffer_store_dwordx2
646; SI: s_endpgm
647define void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
648  %ext = sext i1 %x to i64
649  store i64 %ext, i64 addrspace(1)* %out, align 8
650  ret void
651}
652