1; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GCN-NOHSA,SI,FUNC %s
2; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GCN-HSA,SI,FUNC %s
3; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GCN-NOHSA,VI,FUNC %s
4; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
5; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
6
7
8; FUNC-LABEL: {{^}}global_load_i8:
9; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}}
10; GCN-HSA: flat_load_ubyte
11
12; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
13; TODO: NOT AND
14define amdgpu_kernel void @global_load_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
15entry:
16  %ld = load i8, i8 addrspace(1)* %in
17  store i8 %ld, i8 addrspace(1)* %out
18  ret void
19}
20
21; FUNC-LABEL: {{^}}global_load_v2i8:
22; GCN-NOHSA: buffer_load_ushort v
23; GCN-HSA: flat_load_ushort v
24
25; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
26define amdgpu_kernel void @global_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
27entry:
28  %ld = load <2 x i8>, <2 x i8> addrspace(1)* %in
29  store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
30  ret void
31}
32
33; FUNC-LABEL: {{^}}global_load_v3i8:
34; GCN-NOHSA: buffer_load_dword v
35; GCN-HSA: flat_load_dword v
36
37; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
38define amdgpu_kernel void @global_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 {
39entry:
40  %ld = load <3 x i8>, <3 x i8> addrspace(1)* %in
41  store <3 x i8> %ld, <3 x i8> addrspace(1)* %out
42  ret void
43}
44
45; FUNC-LABEL: {{^}}global_load_v4i8:
46; GCN-NOHSA: buffer_load_dword v
47; GCN-HSA: flat_load_dword v
48
49; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
50define amdgpu_kernel void @global_load_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
51entry:
52  %ld = load <4 x i8>, <4 x i8> addrspace(1)* %in
53  store <4 x i8> %ld, <4 x i8> addrspace(1)* %out
54  ret void
55}
56
57; FUNC-LABEL: {{^}}global_load_v8i8:
58; GCN-NOHSA: buffer_load_dwordx2
59; GCN-HSA: flat_load_dwordx2
60
61; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
62define amdgpu_kernel void @global_load_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
63entry:
64  %ld = load <8 x i8>, <8 x i8> addrspace(1)* %in
65  store <8 x i8> %ld, <8 x i8> addrspace(1)* %out
66  ret void
67}
68
69; FUNC-LABEL: {{^}}global_load_v16i8:
70; GCN-NOHSA: buffer_load_dwordx4
71
72; GCN-HSA: flat_load_dwordx4
73
74; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
75define amdgpu_kernel void @global_load_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
76entry:
77  %ld = load <16 x i8>, <16 x i8> addrspace(1)* %in
78  store <16 x i8> %ld, <16 x i8> addrspace(1)* %out
79  ret void
80}
81
82; FUNC-LABEL: {{^}}global_zextload_i8_to_i32:
83; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}},
84; GCN-HSA: flat_load_ubyte
85
86; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
87define amdgpu_kernel void @global_zextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
88  %a = load i8, i8 addrspace(1)* %in
89  %ext = zext i8 %a to i32
90  store i32 %ext, i32 addrspace(1)* %out
91  ret void
92}
93
94; FUNC-LABEL: {{^}}global_sextload_i8_to_i32:
95; GCN-NOHSA: buffer_load_sbyte
96; GCN-HSA: flat_load_sbyte
97
98; EG: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
99; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
100; EG: 8
101define amdgpu_kernel void @global_sextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
102  %ld = load i8, i8 addrspace(1)* %in
103  %ext = sext i8 %ld to i32
104  store i32 %ext, i32 addrspace(1)* %out
105  ret void
106}
107
108; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i32:
109
110; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
111define amdgpu_kernel void @global_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
112  %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
113  %ext = zext <1 x i8> %load to <1 x i32>
114  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
115  ret void
116}
117
118; FUNC-LABEL: {{^}}global_sextload_v1i8_to_v1i32:
119
120; EG: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
121; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
122; EG: 8
123define amdgpu_kernel void @global_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
124  %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
125  %ext = sext <1 x i8> %load to <1 x i32>
126  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
127  ret void
128}
129
130; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i32:
131; GCN-NOHSA: buffer_load_ushort
132; GCN-HSA: flat_load_ushort
133
134; EG: VTX_READ_16 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
135; TODO: These should use DST, but for some there are redundant MOVs
136; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
137; EG-DAG: 8
138define amdgpu_kernel void @global_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
139  %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
140  %ext = zext <2 x i8> %load to <2 x i32>
141  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
142  ret void
143}
144
145; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i32:
146; GCN-NOHSA: buffer_load_ushort
147; GCN-HSA: flat_load_ushort
148
149; EG: VTX_READ_16 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
150; TODO: These should use DST, but for some there are redundant MOVs
151; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
152; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
153; EG-DAG: 8
154; EG-DAG: 8
155define amdgpu_kernel void @global_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
156  %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
157  %ext = sext <2 x i8> %load to <2 x i32>
158  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
159  ret void
160}
161
162; FUNC-LABEL: {{^}}global_zextload_v3i8_to_v3i32:
163; GCN-NOHSA: buffer_load_dword v
164; GCN-HSA: flat_load_dword v
165
166; SI-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8
167; VI-DAG: v_lshrrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
168; GCN-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 8
169; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xff,
170
171; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
172; TODO: These should use DST, but for some there are redundant MOVs
173; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
174; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
175; EG-DAG: 8
176; EG-DAG: 8
177define amdgpu_kernel void @global_zextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 {
178entry:
179  %ld = load <3 x i8>, <3 x i8> addrspace(1)* %in
180  %ext = zext <3 x i8> %ld to <3 x i32>
181  store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
182  ret void
183}
184
185; FUNC-LABEL: {{^}}global_sextload_v3i8_to_v3i32:
186; GCN-NOHSA: buffer_load_dword v
187; GCN-HSA: flat_load_dword v
188
189;FIXME: Need to optimize this sequence to avoid extra shift on VI.
190
191; t23: i16 = truncate t18
192; t49: i16 = srl t23, Constant:i32<8>
193; t57: i32 = any_extend t49
194; t58: i32 = sign_extend_inreg t57, ValueType:ch:i8
195
196; SI-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8
197; VI-DAG: v_lshrrev_b16_e32 [[SHIFT:v[0-9]+]], 8, v{{[0-9]+}}
198; VI-DAG: v_bfe_i32 v{{[0-9]+}}, [[SHIFT]], 0, 8
199; GCN-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
200; GCN-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 8
201
202; EG: VTX_READ_32 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
203; TODO: These should use DST, but for some there are redundant MOVs
204; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
205; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
206; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
207; EG-DAG: 8
208; EG-DAG: 8
209; EG-DAG: 8
210define amdgpu_kernel void @global_sextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 {
211entry:
212  %ld = load <3 x i8>, <3 x i8> addrspace(1)* %in
213  %ext = sext <3 x i8> %ld to <3 x i32>
214  store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
215  ret void
216}
217
218; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i32:
219; GCN-NOHSA: buffer_load_dword
220; GCN-HSA: flat_load_dword
221
222; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
223; TODO: These should use DST, but for some there are redundant MOVs
224; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
225; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
226; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
227; EG-DAG: 8
228; EG-DAG: 8
229; EG-DAG: 8
230define amdgpu_kernel void @global_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
231  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
232  %ext = zext <4 x i8> %load to <4 x i32>
233  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
234  ret void
235}
236
237; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i32:
238; GCN-NOHSA: buffer_load_dword
239; GCN-HSA: flat_load_dword
240
241; EG: VTX_READ_32 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
242; TODO: These should use DST, but for some there are redundant MOVs
243; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
244; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
245; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
246; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
247; EG-DAG: 8
248; EG-DAG: 8
249; EG-DAG: 8
250; EG-DAG: 8
251define amdgpu_kernel void @global_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
252  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
253  %ext = sext <4 x i8> %load to <4 x i32>
254  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
255  ret void
256}
257
258; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i32:
259
260; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
261; TODO: These should use DST, but for some there are redundant MOVs
262; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
263; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
264; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
265; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
266; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
267; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
268; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
269; EG-DAG: 8
270; EG-DAG: 8
271; EG-DAG: 8
272; EG-DAG: 8
273; EG-DAG: 8
274; EG-DAG: 8
275; EG-DAG: 8
276define amdgpu_kernel void @global_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
277  %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
278  %ext = zext <8 x i8> %load to <8 x i32>
279  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
280  ret void
281}
282
283; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i32:
284
285; EG: VTX_READ_64 [[DST:T[0-9]+\.XY]], T{{[0-9]+}}.X, 0, #1
286; TODO: These should use DST, but for some there are redundant MOVs
287; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
288; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
289; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
290; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
291; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
292; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
293; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
294; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
295; EG-DAG: 8
296; EG-DAG: 8
297; EG-DAG: 8
298; EG-DAG: 8
299; EG-DAG: 8
300; EG-DAG: 8
301; EG-DAG: 8
302; EG-DAG: 8
303define amdgpu_kernel void @global_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
304  %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
305  %ext = sext <8 x i8> %load to <8 x i32>
306  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
307  ret void
308}
309
310; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i32:
311
312; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
313; TODO: These should use DST, but for some there are redundant MOVs
314; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
315; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
316; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
317; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
318; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
319; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
320; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
321; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
322; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
323; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
324; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
325; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
326; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
327; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
328; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
329; EG-DAG: 8
330; EG-DAG: 8
331; EG-DAG: 8
332; EG-DAG: 8
333; EG-DAG: 8
334; EG-DAG: 8
335; EG-DAG: 8
336; EG-DAG: 8
337; EG-DAG: 8
338; EG-DAG: 8
339; EG-DAG: 8
340; EG-DAG: 8
341; EG-DAG: 8
342; EG-DAG: 8
343; EG-DAG: 8
344define amdgpu_kernel void @global_zextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
345  %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
346  %ext = zext <16 x i8> %load to <16 x i32>
347  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
348  ret void
349}
350
351; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i32:
352
353; EG: VTX_READ_128 [[DST:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 0, #1
354; TODO: These should use DST, but for some there are redundant MOVs
355; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
356; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
357; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
358; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
359; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
360; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
361; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
362; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
363; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
364; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
365; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
366; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
367; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
368; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
369; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
370; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
371; EG-DAG: 8
372; EG-DAG: 8
373; EG-DAG: 8
374; EG-DAG: 8
375; EG-DAG: 8
376; EG-DAG: 8
377; EG-DAG: 8
378; EG-DAG: 8
379; EG-DAG: 8
380; EG-DAG: 8
381; EG-DAG: 8
382; EG-DAG: 8
383; EG-DAG: 8
384; EG-DAG: 8
385; EG-DAG: 8
386; EG-DAG: 8
387define amdgpu_kernel void @global_sextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
388  %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
389  %ext = sext <16 x i8> %load to <16 x i32>
390  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
391  ret void
392}
393
394; FUNC-LABEL: {{^}}global_zextload_v32i8_to_v32i32:
395
396; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
397; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
398; TODO: These should use DST, but for some there are redundant MOVs
399; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
400; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
401; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
402; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
403; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
404; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
405; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
406; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
407; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
408; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
409; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
410; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
411; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
412; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
413; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
414; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
415; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
416; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
417; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
418; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
419; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
420; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
421; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
422; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
423; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
424; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
425; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
426; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
427; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
428; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
429; EG-DAG: 8
430; EG-DAG: 8
431; EG-DAG: 8
432; EG-DAG: 8
433; EG-DAG: 8
434; EG-DAG: 8
435; EG-DAG: 8
436; EG-DAG: 8
437; EG-DAG: 8
438; EG-DAG: 8
439; EG-DAG: 8
440; EG-DAG: 8
441; EG-DAG: 8
442; EG-DAG: 8
443; EG-DAG: 8
444; EG-DAG: 8
445; EG-DAG: 8
446; EG-DAG: 8
447; EG-DAG: 8
448; EG-DAG: 8
449; EG-DAG: 8
450; EG-DAG: 8
451; EG-DAG: 8
452; EG-DAG: 8
453; EG-DAG: 8
454; EG-DAG: 8
455; EG-DAG: 8
456; EG-DAG: 8
457; EG-DAG: 8
458; EG-DAG: 8
459define amdgpu_kernel void @global_zextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
460  %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
461  %ext = zext <32 x i8> %load to <32 x i32>
462  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
463  ret void
464}
465
466; FUNC-LABEL: {{^}}global_sextload_v32i8_to_v32i32:
467
468; EG-DAG: VTX_READ_128 [[DST_LO:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 0, #1
469; EG-DAG: VTX_READ_128 [[DST_HI:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 16, #1
470; TODO: These should use DST, but for some there are redundant MOVs
471; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
472; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
473; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
474; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
475; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
476; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
477; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
478; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
479; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
480; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
481; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
482; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
483; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
484; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
485; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
486; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
487; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
488; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
489; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
490; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
491; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
492; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
493; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
494; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
495; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
496; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
497; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
498; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
499; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
500; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
501; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
502; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
503; EG-DAG: 8
504; EG-DAG: 8
505; EG-DAG: 8
506; EG-DAG: 8
507; EG-DAG: 8
508; EG-DAG: 8
509; EG-DAG: 8
510; EG-DAG: 8
511; EG-DAG: 8
512; EG-DAG: 8
513; EG-DAG: 8
514; EG-DAG: 8
515; EG-DAG: 8
516; EG-DAG: 8
517; EG-DAG: 8
518; EG-DAG: 8
519; EG-DAG: 8
520; EG-DAG: 8
521; EG-DAG: 8
522; EG-DAG: 8
523; EG-DAG: 8
524; EG-DAG: 8
525; EG-DAG: 8
526; EG-DAG: 8
527; EG-DAG: 8
528; EG-DAG: 8
529; EG-DAG: 8
530; EG-DAG: 8
531; EG-DAG: 8
532; EG-DAG: 8
533; EG-DAG: 8
534; EG-DAG: 8
535define amdgpu_kernel void @global_sextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
536  %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
537  %ext = sext <32 x i8> %load to <32 x i32>
538  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
539  ret void
540}
541
542; FUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i32:
543
544; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 0, #1
545; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1
546; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1
547; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1
548define amdgpu_kernel void @global_zextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
549  %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
550  %ext = zext <64 x i8> %load to <64 x i32>
551  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
552  ret void
553}
554
555; FUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i32:
556
557; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 0, #1
558; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1
559; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1
560; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1
561define amdgpu_kernel void @global_sextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
562  %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
563  %ext = sext <64 x i8> %load to <64 x i32>
564  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
565  ret void
566}
567
568; FUNC-LABEL: {{^}}global_zextload_i8_to_i64:
569; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
570
571; GCN-NOHSA-DAG: buffer_load_ubyte v[[LO:[0-9]+]],
572; GCN-NOHSA: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
573
574; GCN-HSA-DAG: flat_load_ubyte v[[LO:[0-9]+]],
575; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]]
576
577; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
578; EG: MOV {{.*}}, 0.0
579define amdgpu_kernel void @global_zextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
580  %a = load i8, i8 addrspace(1)* %in
581  %ext = zext i8 %a to i64
582  store i64 %ext, i64 addrspace(1)* %out
583  ret void
584}
585
586; FUNC-LABEL: {{^}}global_sextload_i8_to_i64:
587; GCN-NOHSA: buffer_load_sbyte v[[LO:[0-9]+]],
588; GCN-HSA: flat_load_sbyte v[[LO:[0-9]+]],
589; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
590
591; GCN-NOHSA: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
592; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]]
593
594; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
595; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
596; TODO: Why not 7 ?
597; EG: 31
598define amdgpu_kernel void @global_sextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
599  %a = load i8, i8 addrspace(1)* %in
600  %ext = sext i8 %a to i64
601  store i64 %ext, i64 addrspace(1)* %out
602  ret void
603}
604
605; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i64:
606
607; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
608; EG: MOV {{.*}}, 0.0
609define amdgpu_kernel void @global_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
610  %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
611  %ext = zext <1 x i8> %load to <1 x i64>
612  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
613  ret void
614}
615
616; FUNC-LABEL: {{^}}global_sextload_v1i8_to_v1i64:
617
618; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
619; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
620; TODO: Why not 7 ?
621; EG: 31
622define amdgpu_kernel void @global_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
623  %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
624  %ext = sext <1 x i8> %load to <1 x i64>
625  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
626  ret void
627}
628
629; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i64:
630
631; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
632define amdgpu_kernel void @global_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
633  %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
634  %ext = zext <2 x i8> %load to <2 x i64>
635  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
636  ret void
637}
638
639; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i64:
640
641; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
642define amdgpu_kernel void @global_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
643  %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
644  %ext = sext <2 x i8> %load to <2 x i64>
645  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
646  ret void
647}
648
649; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i64:
650
651; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
652define amdgpu_kernel void @global_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
653  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
654  %ext = zext <4 x i8> %load to <4 x i64>
655  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
656  ret void
657}
658
659; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i64:
660
661; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
662define amdgpu_kernel void @global_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
663  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
664  %ext = sext <4 x i8> %load to <4 x i64>
665  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
666  ret void
667}
668
669; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i64:
670
671; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
672define amdgpu_kernel void @global_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
673  %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
674  %ext = zext <8 x i8> %load to <8 x i64>
675  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
676  ret void
677}
678
679; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i64:
680
681; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
682define amdgpu_kernel void @global_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
683  %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
684  %ext = sext <8 x i8> %load to <8 x i64>
685  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
686  ret void
687}
688
689; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i64:
690
691; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
692define amdgpu_kernel void @global_zextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
693  %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
694  %ext = zext <16 x i8> %load to <16 x i64>
695  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
696  ret void
697}
698
699; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i64:
700
701; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
702define amdgpu_kernel void @global_sextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
703  %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
704  %ext = sext <16 x i8> %load to <16 x i64>
705  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
706  ret void
707}
708
709; FUNC-LABEL: {{^}}global_zextload_v32i8_to_v32i64:
710
711; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
712; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
713define amdgpu_kernel void @global_zextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
714  %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
715  %ext = zext <32 x i8> %load to <32 x i64>
716  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
717  ret void
718}
719
720; FUNC-LABEL: {{^}}global_sextload_v32i8_to_v32i64:
721
722; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
723; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
724define amdgpu_kernel void @global_sextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
725  %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
726  %ext = sext <32 x i8> %load to <32 x i64>
727  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
728  ret void
729}
730
731; XFUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i64:
732; define amdgpu_kernel void @global_zextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
733;   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
734;   %ext = zext <64 x i8> %load to <64 x i64>
735;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
736;   ret void
737; }
738
739; XFUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i64:
740; define amdgpu_kernel void @global_sextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
741;   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
742;   %ext = sext <64 x i8> %load to <64 x i64>
743;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
744;   ret void
745; }
746
747; FUNC-LABEL: {{^}}global_zextload_i8_to_i16:
748; GCN-NOHSA: buffer_load_ubyte v[[VAL:[0-9]+]],
749; GCN-NOHSA: buffer_store_short v[[VAL]]
750
751; GCN-HSA: flat_load_ubyte v[[VAL:[0-9]+]],
752; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
753
754; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
755define amdgpu_kernel void @global_zextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
756  %a = load i8, i8 addrspace(1)* %in
757  %ext = zext i8 %a to i16
758  store i16 %ext, i16 addrspace(1)* %out
759  ret void
760}
761
762; FUNC-LABEL: {{^}}global_sextload_i8_to_i16:
763; GCN-NOHSA: buffer_load_sbyte v[[VAL:[0-9]+]],
764; GCN-HSA: flat_load_sbyte v[[VAL:[0-9]+]],
765
766; GCN-NOHSA: buffer_store_short v[[VAL]]
767; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
768
769; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
770; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
771define amdgpu_kernel void @global_sextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
772  %a = load i8, i8 addrspace(1)* %in
773  %ext = sext i8 %a to i16
774  store i16 %ext, i16 addrspace(1)* %out
775  ret void
776}
777
778; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i16:
779
780; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
781define amdgpu_kernel void @global_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
782  %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
783  %ext = zext <1 x i8> %load to <1 x i16>
784  store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
785  ret void
786}
787
788; FUNC-LABEL: {{^}}global_sextload_v1i8_to_v1i16:
789
790; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
791; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
792define amdgpu_kernel void @global_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
793  %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
794  %ext = sext <1 x i8> %load to <1 x i16>
795  store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
796  ret void
797}
798
799; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i16:
800
801; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
802define amdgpu_kernel void @global_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
803  %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
804  %ext = zext <2 x i8> %load to <2 x i16>
805  store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
806  ret void
807}
808
809; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i16:
810
811; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
812; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
813; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
814define amdgpu_kernel void @global_sextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
815  %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
816  %ext = sext <2 x i8> %load to <2 x i16>
817  store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
818  ret void
819}
820
821; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i16:
822
823; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
824define amdgpu_kernel void @global_zextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
825  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
826  %ext = zext <4 x i8> %load to <4 x i16>
827  store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
828  ret void
829}
830
831; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i16:
832
833; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
834; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
835; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
836; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
837; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
838define amdgpu_kernel void @global_sextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
839  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
840  %ext = sext <4 x i8> %load to <4 x i16>
841  store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
842  ret void
843}
844
845; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i16:
846
847; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
848define amdgpu_kernel void @global_zextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
849  %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
850  %ext = zext <8 x i8> %load to <8 x i16>
851  store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
852  ret void
853}
854
855; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i16:
856
857; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
858; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
859; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
860; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
861; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
862; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
863; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
864; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
865; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
866define amdgpu_kernel void @global_sextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
867  %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
868  %ext = sext <8 x i8> %load to <8 x i16>
869  store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
870  ret void
871}
872
873; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i16:
874
875; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
876define amdgpu_kernel void @global_zextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
877  %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
878  %ext = zext <16 x i8> %load to <16 x i16>
879  store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
880  ret void
881}
882
883; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i16:
884
885; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
886; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
887; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
888; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
889; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
890; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
891; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
892; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
893; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
894; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
895; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
896; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
897; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
898; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
899; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
900; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
901; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
902define amdgpu_kernel void @global_sextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
903  %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
904  %ext = sext <16 x i8> %load to <16 x i16>
905  store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
906  ret void
907}
908
909; FUNC-LABEL: {{^}}global_zextload_v32i8_to_v32i16:
910
911; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
912; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
913define amdgpu_kernel void @global_zextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
914  %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
915  %ext = zext <32 x i8> %load to <32 x i16>
916  store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
917  ret void
918}
919
920; FUNC-LABEL: {{^}}global_sextload_v32i8_to_v32i16:
921
922; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
923; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
924; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
925; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
926; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
927; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
928; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
929; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
930; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
931; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
932; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
933; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
934; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
935; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
936; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
937; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
938; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
939; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
940; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
941; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
942; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
943; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
944; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
945; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
946; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
947; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
948; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
949; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
950; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
951; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
952; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
953; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
954; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
955; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
956define amdgpu_kernel void @global_sextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
957  %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
958  %ext = sext <32 x i8> %load to <32 x i16>
959  store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
960  ret void
961}
962
963; XFUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i16:
964; define amdgpu_kernel void @global_zextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
965;   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
966;   %ext = zext <64 x i8> %load to <64 x i16>
967;   store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
968;   ret void
969; }
970
971; XFUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i16:
972; define amdgpu_kernel void @global_sextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
973;   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
974;   %ext = sext <64 x i8> %load to <64 x i16>
975;   store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
976;   ret void
977; }
978
979attributes #0 = { nounwind }
980