1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s
3; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4
5
6; FUNC-LABEL: {{^}}local_load_i8:
7; GCN-NOT: s_wqm_b64
8; GCN: s_mov_b32 m0
9; GCN: ds_read_u8
10
11; EG: LDS_UBYTE_READ_RET
12define void @local_load_i8(i8 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
13entry:
14  %ld = load i8, i8 addrspace(3)* %in
15  store i8 %ld, i8 addrspace(3)* %out
16  ret void
17}
18
19; FUNC-LABEL: {{^}}local_load_v2i8:
20; GCN-NOT: s_wqm_b64
21; GCN: s_mov_b32 m0
22; GCN: ds_read_u16
23
24; EG: LDS_USHORT_READ_RET
25define void @local_load_v2i8(<2 x i8> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
26entry:
27  %ld = load <2 x i8>, <2 x i8> addrspace(3)* %in
28  store <2 x i8> %ld, <2 x i8> addrspace(3)* %out
29  ret void
30}
31
32; FUNC-LABEL: {{^}}local_load_v3i8:
33; GCN: ds_read_b32
34
35; EG: DS_READ_RET
36define void @local_load_v3i8(<3 x i8> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 {
37entry:
38  %ld = load <3 x i8>, <3 x i8> addrspace(3)* %in
39  store <3 x i8> %ld, <3 x i8> addrspace(3)* %out
40  ret void
41}
42
43; FUNC-LABEL: {{^}}local_load_v4i8:
44; GCN: ds_read_b32
45
46; EG: LDS_READ_RET
47define void @local_load_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
48entry:
49  %ld = load <4 x i8>, <4 x i8> addrspace(3)* %in
50  store <4 x i8> %ld, <4 x i8> addrspace(3)* %out
51  ret void
52}
53
54; FUNC-LABEL: {{^}}local_load_v8i8:
55; GCN: ds_read_b64
56
57; EG: LDS_READ_RET
58; EG: LDS_READ_RET
59define void @local_load_v8i8(<8 x i8> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
60entry:
61  %ld = load <8 x i8>, <8 x i8> addrspace(3)* %in
62  store <8 x i8> %ld, <8 x i8> addrspace(3)* %out
63  ret void
64}
65
66; FUNC-LABEL: {{^}}local_load_v16i8:
67; GCN: ds_read2_b64  v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1{{$}}
68; GCN: ds_write2_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:{{[0-9]+}}], v[{{[0-9]+}}:[[HI]]{{\]}} offset1:1{{$}}
69
70; EG: LDS_READ_RET
71; EG: LDS_READ_RET
72; EG: LDS_READ_RET
73; EG: LDS_READ_RET
74define void @local_load_v16i8(<16 x i8> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
75entry:
76  %ld = load <16 x i8>, <16 x i8> addrspace(3)* %in
77  store <16 x i8> %ld, <16 x i8> addrspace(3)* %out
78  ret void
79}
80
81; FUNC-LABEL: {{^}}local_zextload_i8_to_i32:
82; GCN-NOT: s_wqm_b64
83; GCN: s_mov_b32 m0
84; GCN: ds_read_u8
85
86; EG: LDS_UBYTE_READ_RET
87define void @local_zextload_i8_to_i32(i32 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
88  %a = load i8, i8 addrspace(3)* %in
89  %ext = zext i8 %a to i32
90  store i32 %ext, i32 addrspace(3)* %out
91  ret void
92}
93
94; FUNC-LABEL: {{^}}local_sextload_i8_to_i32:
95; GCN-NOT: s_wqm_b64
96; GCN: s_mov_b32 m0
97; GCN: ds_read_i8
98
99; EG: LDS_UBYTE_READ_RET
100; EG: BFE_INT
101define void @local_sextload_i8_to_i32(i32 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
102  %ld = load i8, i8 addrspace(3)* %in
103  %ext = sext i8 %ld to i32
104  store i32 %ext, i32 addrspace(3)* %out
105  ret void
106}
107
108; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i32:
109
110; EG: LDS_UBYTE_READ_RET
111define void @local_zextload_v1i8_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
112  %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
113  %ext = zext <1 x i8> %load to <1 x i32>
114  store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
115  ret void
116}
117
118; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i32:
119
120; EG: LDS_UBYTE_READ_RET
121; EG: BFE_INT
122define void @local_sextload_v1i8_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
123  %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
124  %ext = sext <1 x i8> %load to <1 x i32>
125  store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
126  ret void
127}
128
129; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i32:
130; GCN: ds_read_u16
131
132; EG: LDS_USHORT_READ_RET
133define void @local_zextload_v2i8_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
134  %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
135  %ext = zext <2 x i8> %load to <2 x i32>
136  store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
137  ret void
138}
139
140; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i32:
141; GCN-NOT: s_wqm_b64
142; GCN: s_mov_b32 m0
143; GCN: ds_read_u16
144; FIXME: Need to optimize this sequence to avoid extra shift on VI.
145;         t23: i16 = srl t39, Constant:i32<8>
146;          t31: i32 = any_extend t23
147;        t33: i32 = sign_extend_inreg t31, ValueType:ch:i8
148
149; SI-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8
150; SI-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
151
152; VI-DAG: v_lshrrev_b16_e32 [[SHIFT:v[0-9]+]], 8, v{{[0-9]+}}
153; VI-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
154; VI-DAG: v_bfe_i32 v{{[0-9]+}}, [[SHIFT]], 0, 8
155
156; EG: LDS_USHORT_READ_RET
157; EG-DAG: BFE_INT
158; EG-DAG: BFE_INT
159define void @local_sextload_v2i8_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
160  %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
161  %ext = sext <2 x i8> %load to <2 x i32>
162  store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
163  ret void
164}
165
166; FUNC-LABEL: {{^}}local_zextload_v3i8_to_v3i32:
167; GCN: ds_read_b32
168
169; SI-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8
170; VI-DAG: v_lshrrev_b16_e32 v{{[0-9]+}}, 8, {{v[0-9]+}}
171; GCN-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 8
172; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xff,
173
174; EG: LDS_READ_RET
175define void @local_zextload_v3i8_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 {
176entry:
177  %ld = load <3 x i8>, <3 x i8> addrspace(3)* %in
178  %ext = zext <3 x i8> %ld to <3 x i32>
179  store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
180  ret void
181}
182
183; FUNC-LABEL: {{^}}local_sextload_v3i8_to_v3i32:
184; GCN-NOT: s_wqm_b64
185; GCN: s_mov_b32 m0
186; GCN: ds_read_b32
187
188; GCN-DAG: v_bfe_i32
189; GCN-DAG: v_bfe_i32
190; GCN-DAG: v_bfe_i32
191; GCN-DAG: v_bfe_i32
192
193; GCN-DAG: ds_write_b64
194; GCN-DAG: ds_write_b32
195
196; EG: LDS_READ_RET
197; EG-DAG: BFE_INT
198; EG-DAG: BFE_INT
199; EG-DAG: BFE_INT
200define void @local_sextload_v3i8_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 {
201entry:
202  %ld = load <3 x i8>, <3 x i8> addrspace(3)* %in
203  %ext = sext <3 x i8> %ld to <3 x i32>
204  store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
205  ret void
206}
207
208; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i32:
209; GCN-NOT: s_wqm_b64
210; GCN: s_mov_b32 m0
211; GCN: ds_read_b32
212
213; EG: LDS_READ_RET
214; EG-DAG: BFE_UINT
215; EG-DAG: BFE_UINT
216; EG-DAG: BFE_UINT
217define void @local_zextload_v4i8_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
218  %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
219  %ext = zext <4 x i8> %load to <4 x i32>
220  store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
221  ret void
222}
223
224; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i32:
225; GCN-NOT: s_wqm_b64
226; GCN: s_mov_b32 m0
227; GCN: ds_read_b32
228
229; EG-DAG: LDS_READ_RET
230; EG-DAG: BFE_INT
231; EG-DAG: BFE_INT
232; EG-DAG: BFE_INT
233; EG-DAG: BFE_INT
234define void @local_sextload_v4i8_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
235  %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
236  %ext = sext <4 x i8> %load to <4 x i32>
237  store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
238  ret void
239}
240
241; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i32:
242
243; EG-DAG: LDS_READ_RET
244; EG-DAG: LDS_READ_RET
245; EG-DAG: BFE_UINT
246; EG-DAG: BFE_UINT
247; EG-DAG: BFE_UINT
248; EG-DAG: BFE_UINT
249; EG-DAG: BFE_UINT
250; EG-DAG: BFE_UINT
251define void @local_zextload_v8i8_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
252  %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
253  %ext = zext <8 x i8> %load to <8 x i32>
254  store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
255  ret void
256}
257
258; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i32:
259
260; EG-DAG: LDS_READ_RET
261; EG-DAG: LDS_READ_RET
262; EG-DAG: BFE_INT
263; EG-DAG: BFE_INT
264; EG-DAG: BFE_INT
265; EG-DAG: BFE_INT
266; EG-DAG: BFE_INT
267; EG-DAG: BFE_INT
268; EG-DAG: BFE_INT
269; EG-DAG: BFE_INT
270define void @local_sextload_v8i8_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
271  %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
272  %ext = sext <8 x i8> %load to <8 x i32>
273  store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
274  ret void
275}
276
277; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i32:
278
279; EG-DAG: LDS_READ_RET
280; EG-DAG: LDS_READ_RET
281; EG-DAG: LDS_READ_RET
282; EG-DAG: LDS_READ_RET
283; EG-DAG: BFE_UINT
284; EG-DAG: BFE_UINT
285; EG-DAG: BFE_UINT
286; EG-DAG: BFE_UINT
287; EG-DAG: BFE_UINT
288; EG-DAG: BFE_UINT
289; EG-DAG: BFE_UINT
290; EG-DAG: BFE_UINT
291; EG-DAG: BFE_UINT
292; EG-DAG: BFE_UINT
293; EG-DAG: BFE_UINT
294; EG-DAG: BFE_UINT
295define void @local_zextload_v16i8_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
296  %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
297  %ext = zext <16 x i8> %load to <16 x i32>
298  store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
299  ret void
300}
301
302; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i32:
303
304; EG-DAG: LDS_READ_RET
305; EG-DAG: LDS_READ_RET
306; EG-DAG: LDS_READ_RET
307; EG-DAG: LDS_READ_RET
308; EG-DAG: BFE_INT
309; EG-DAG: BFE_INT
310; EG-DAG: BFE_INT
311; EG-DAG: BFE_INT
312; EG-DAG: BFE_INT
313; EG-DAG: BFE_INT
314; EG-DAG: BFE_INT
315; EG-DAG: BFE_INT
316; EG-DAG: BFE_INT
317; EG-DAG: BFE_INT
318; EG-DAG: BFE_INT
319; EG-DAG: BFE_INT
320; EG-DAG: BFE_INT
321; EG-DAG: BFE_INT
322; EG-DAG: BFE_INT
323; EG-DAG: BFE_INT
324define void @local_sextload_v16i8_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
325  %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
326  %ext = sext <16 x i8> %load to <16 x i32>
327  store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
328  ret void
329}
330
331; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i32:
332
333; EG-DAG: LDS_READ_RET
334; EG-DAG: LDS_READ_RET
335; EG-DAG: LDS_READ_RET
336; EG-DAG: LDS_READ_RET
337; EG-DAG: LDS_READ_RET
338; EG-DAG: LDS_READ_RET
339; EG-DAG: LDS_READ_RET
340; EG-DAG: LDS_READ_RET
341define void @local_zextload_v32i8_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
342  %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
343  %ext = zext <32 x i8> %load to <32 x i32>
344  store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
345  ret void
346}
347
348; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i32:
349
350; EG-DAG: LDS_READ_RET
351; EG-DAG: LDS_READ_RET
352; EG-DAG: LDS_READ_RET
353; EG-DAG: LDS_READ_RET
354; EG-DAG: LDS_READ_RET
355; EG-DAG: LDS_READ_RET
356; EG-DAG: LDS_READ_RET
357; EG-DAG: LDS_READ_RET
358define void @local_sextload_v32i8_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
359  %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
360  %ext = sext <32 x i8> %load to <32 x i32>
361  store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
362  ret void
363}
364
365; FUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i32:
366
367; EG-DAG: LDS_READ_RET
368; EG-DAG: LDS_READ_RET
369; EG-DAG: LDS_READ_RET
370; EG-DAG: LDS_READ_RET
371; EG-DAG: LDS_READ_RET
372; EG-DAG: LDS_READ_RET
373; EG-DAG: LDS_READ_RET
374; EG-DAG: LDS_READ_RET
375; EG-DAG: LDS_READ_RET
376; EG-DAG: LDS_READ_RET
377; EG-DAG: LDS_READ_RET
378; EG-DAG: LDS_READ_RET
379; EG-DAG: LDS_READ_RET
380; EG-DAG: LDS_READ_RET
381; EG-DAG: LDS_READ_RET
382; EG-DAG: LDS_READ_RET
383define void @local_zextload_v64i8_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
384  %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
385  %ext = zext <64 x i8> %load to <64 x i32>
386  store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
387  ret void
388}
389
390; FUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i32:
391
392; EG-DAG: LDS_READ_RET
393; EG-DAG: LDS_READ_RET
394; EG-DAG: LDS_READ_RET
395; EG-DAG: LDS_READ_RET
396; EG-DAG: LDS_READ_RET
397; EG-DAG: LDS_READ_RET
398; EG-DAG: LDS_READ_RET
399; EG-DAG: LDS_READ_RET
400; EG-DAG: LDS_READ_RET
401; EG-DAG: LDS_READ_RET
402; EG-DAG: LDS_READ_RET
403; EG-DAG: LDS_READ_RET
404; EG-DAG: LDS_READ_RET
405; EG-DAG: LDS_READ_RET
406; EG-DAG: LDS_READ_RET
407; EG-DAG: LDS_READ_RET
408define void @local_sextload_v64i8_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
409  %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
410  %ext = sext <64 x i8> %load to <64 x i32>
411  store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
412  ret void
413}
414
415; FUNC-LABEL: {{^}}local_zextload_i8_to_i64:
416; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
417; GCN-DAG: ds_read_u8 v[[LO:[0-9]+]],
418; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]]
419
420; EG: LDS_UBYTE_READ_RET
421; EG: MOV {{.*}}, literal
422; EG: 0.0
423define void @local_zextload_i8_to_i64(i64 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
424  %a = load i8, i8 addrspace(3)* %in
425  %ext = zext i8 %a to i64
426  store i64 %ext, i64 addrspace(3)* %out
427  ret void
428}
429
430; FUNC-LABEL: {{^}}local_sextload_i8_to_i64:
431; GCN: ds_read_i8 v[[LO:[0-9]+]],
432; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
433
434; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
435
436; EG: LDS_UBYTE_READ_RET
437; EG: ASHR
438; TODO: why not 7?
439; EG: 31
440define void @local_sextload_i8_to_i64(i64 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
441  %a = load i8, i8 addrspace(3)* %in
442  %ext = sext i8 %a to i64
443  store i64 %ext, i64 addrspace(3)* %out
444  ret void
445}
446
447; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i64:
448
449; EG: LDS_UBYTE_READ_RET
450; EG: MOV {{.*}}, literal
451; TODO: merge?
452; EG: 0.0
453define void @local_zextload_v1i8_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
454  %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
455  %ext = zext <1 x i8> %load to <1 x i64>
456  store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
457  ret void
458}
459
460; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i64:
461
462; EG: LDS_UBYTE_READ_RET
463; EG: ASHR
464; TODO: why not 7?
465; EG: 31
466define void @local_sextload_v1i8_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
467  %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
468  %ext = sext <1 x i8> %load to <1 x i64>
469  store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
470  ret void
471}
472
473; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i64:
474
475; EG: LDS_USHORT_READ_RET
476define void @local_zextload_v2i8_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
477  %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
478  %ext = zext <2 x i8> %load to <2 x i64>
479  store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
480  ret void
481}
482
483; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i64:
484
485; EG: LDS_USHORT_READ_RET
486; EG: BFE_INT
487; EG: BFE_INT
488define void @local_sextload_v2i8_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
489  %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
490  %ext = sext <2 x i8> %load to <2 x i64>
491  store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
492  ret void
493}
494
495; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i64:
496
497; EG: LDS_READ_RET
498define void @local_zextload_v4i8_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
499  %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
500  %ext = zext <4 x i8> %load to <4 x i64>
501  store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
502  ret void
503}
504
505; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i64:
506
507; EG: LDS_READ_RET
508define void @local_sextload_v4i8_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
509  %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
510  %ext = sext <4 x i8> %load to <4 x i64>
511  store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
512  ret void
513}
514
515; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i64:
516
517; EG: LDS_READ_RET
518; EG: LDS_READ_RET
519define void @local_zextload_v8i8_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
520  %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
521  %ext = zext <8 x i8> %load to <8 x i64>
522  store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
523  ret void
524}
525
526; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i64:
527
528; EG: LDS_READ_RET
529; EG: LDS_READ_RET
530; EG-DAG: ASHR
531; EG-DAG: ASHR
532; EG-DAG: BFE_INT
533; EG-DAG: BFE_INT
534; EG-DAG: BFE_INT
535; EG-DAG: BFE_INT
536; EG-DAG: BFE_INT
537; EG-DAG: BFE_INT
538; EG-DAG: BFE_INT
539define void @local_sextload_v8i8_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
540  %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
541  %ext = sext <8 x i8> %load to <8 x i64>
542  store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
543  ret void
544}
545
546; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i64:
547
548; EG: LDS_READ_RET
549; EG: LDS_READ_RET
550; EG: LDS_READ_RET
551; EG: LDS_READ_RET
552define void @local_zextload_v16i8_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
553  %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
554  %ext = zext <16 x i8> %load to <16 x i64>
555  store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
556  ret void
557}
558
559; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i64:
560
561; EG: LDS_READ_RET
562; EG: LDS_READ_RET
563; EG: LDS_READ_RET
564; EG: LDS_READ_RET
565define void @local_sextload_v16i8_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
566  %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
567  %ext = sext <16 x i8> %load to <16 x i64>
568  store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
569  ret void
570}
571
572; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i64:
573
574; EG: LDS_READ_RET
575; EG: LDS_READ_RET
576; EG: LDS_READ_RET
577; EG: LDS_READ_RET
578; EG: LDS_READ_RET
579; EG: LDS_READ_RET
580; EG: LDS_READ_RET
581; EG: LDS_READ_RET
582define void @local_zextload_v32i8_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
583  %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
584  %ext = zext <32 x i8> %load to <32 x i64>
585  store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
586  ret void
587}
588
589; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i64:
590
591; EG: LDS_READ_RET
592; EG: LDS_READ_RET
593; EG: LDS_READ_RET
594; EG: LDS_READ_RET
595; EG: LDS_READ_RET
596; EG: LDS_READ_RET
597; EG: LDS_READ_RET
598; EG: LDS_READ_RET
599define void @local_sextload_v32i8_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
600  %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
601  %ext = sext <32 x i8> %load to <32 x i64>
602  store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
603  ret void
604}
605
606; XFUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i64:
607; define void @local_zextload_v64i8_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
608;   %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
609;   %ext = zext <64 x i8> %load to <64 x i64>
610;   store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
611;   ret void
612; }
613
614; XFUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i64:
615; define void @local_sextload_v64i8_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
616;   %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
617;   %ext = sext <64 x i8> %load to <64 x i64>
618;   store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
619;   ret void
620; }
621
622; FUNC-LABEL: {{^}}local_zextload_i8_to_i16:
623; GCN: ds_read_u8 v[[VAL:[0-9]+]],
624; GCN: ds_write_b16 v[[VAL:[0-9]+]]
625
626; EG: LDS_UBYTE_READ_RET
627; EG: LDS_SHORT_WRITE
628define void @local_zextload_i8_to_i16(i16 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
629  %a = load i8, i8 addrspace(3)* %in
630  %ext = zext i8 %a to i16
631  store i16 %ext, i16 addrspace(3)* %out
632  ret void
633}
634
635; FUNC-LABEL: {{^}}local_sextload_i8_to_i16:
636; GCN: ds_read_i8 v[[VAL:[0-9]+]],
637; GCN: ds_write_b16 v{{[0-9]+}}, v[[VAL]]
638
639; EG: LDS_UBYTE_READ_RET
640; EG: BFE_INT
641; EG: LDS_SHORT_WRITE
642define void @local_sextload_i8_to_i16(i16 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
643  %a = load i8, i8 addrspace(3)* %in
644  %ext = sext i8 %a to i16
645  store i16 %ext, i16 addrspace(3)* %out
646  ret void
647}
648
649; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i16:
650
651; EG: LDS_UBYTE_READ_RET
652; EG: LDS_SHORT_WRITE
653define void @local_zextload_v1i8_to_v1i16(<1 x i16> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
654  %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
655  %ext = zext <1 x i8> %load to <1 x i16>
656  store <1 x i16> %ext, <1 x i16> addrspace(3)* %out
657  ret void
658}
659
660; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i16:
661
662; EG: LDS_UBYTE_READ_RET
663; EG: BFE_INT
664; EG: LDS_SHORT_WRITE
665define void @local_sextload_v1i8_to_v1i16(<1 x i16> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
666  %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
667  %ext = sext <1 x i8> %load to <1 x i16>
668  store <1 x i16> %ext, <1 x i16> addrspace(3)* %out
669  ret void
670}
671
672; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i16:
673
674; EG: LDS_USHORT_READ_RET
675; EG: LDS_WRITE
676define void @local_zextload_v2i8_to_v2i16(<2 x i16> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
677  %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
678  %ext = zext <2 x i8> %load to <2 x i16>
679  store <2 x i16> %ext, <2 x i16> addrspace(3)* %out
680  ret void
681}
682
683; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i16:
684
685; EG: LDS_USHORT_READ_RET
686; EG: BFE_INT
687; EG: BFE_INT
688; EG: LDS_WRITE
689define void @local_sextload_v2i8_to_v2i16(<2 x i16> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
690  %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
691  %ext = sext <2 x i8> %load to <2 x i16>
692  store <2 x i16> %ext, <2 x i16> addrspace(3)* %out
693  ret void
694}
695
696; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i16:
697
698; EG: LDS_READ_RET
699; EG: LDS_WRITE
700; EG: LDS_WRITE
701define void @local_zextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
702  %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
703  %ext = zext <4 x i8> %load to <4 x i16>
704  store <4 x i16> %ext, <4 x i16> addrspace(3)* %out
705  ret void
706}
707
708; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i16:
709
710; EG: LDS_READ_RET
711; EG-DAG: BFE_INT
712; EG-DAG: BFE_INT
713; EG-DAG: BFE_INT
714; EG-DAG: ASHR
715; EG: LDS_WRITE
716; EG: LDS_WRITE
717define void @local_sextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
718  %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
719  %ext = sext <4 x i8> %load to <4 x i16>
720  store <4 x i16> %ext, <4 x i16> addrspace(3)* %out
721  ret void
722}
723
724; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i16:
725
726; EG: LDS_READ_RET
727; EG: LDS_READ_RET
728; EG: LDS_WRITE
729; EG: LDS_WRITE
730; EG: LDS_WRITE
731; EG: LDS_WRITE
732define void @local_zextload_v8i8_to_v8i16(<8 x i16> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
733  %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
734  %ext = zext <8 x i8> %load to <8 x i16>
735  store <8 x i16> %ext, <8 x i16> addrspace(3)* %out
736  ret void
737}
738
739; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i16:
740
741; EG: LDS_READ_RET
742; EG: LDS_READ_RET
743; EG-DAG: BFE_INT
744; EG-DAG: BFE_INT
745; EG-DAG: BFE_INT
746; EG-DAG: BFE_INT
747; EG-DAG: BFE_INT
748; EG-DAG: BFE_INT
749; EG-DAG: ASHR
750; EG-DAG: ASHR
751; EG: LDS_WRITE
752; EG: LDS_WRITE
753; EG: LDS_WRITE
754; EG: LDS_WRITE
755define void @local_sextload_v8i8_to_v8i16(<8 x i16> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
756  %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
757  %ext = sext <8 x i8> %load to <8 x i16>
758  store <8 x i16> %ext, <8 x i16> addrspace(3)* %out
759  ret void
760}
761
762; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i16:
763
764; EG: LDS_READ_RET
765; EG: LDS_READ_RET
766; EG: LDS_READ_RET
767; EG: LDS_READ_RET
768; EG: LDS_WRITE
769; EG: LDS_WRITE
770; EG: LDS_WRITE
771; EG: LDS_WRITE
772; EG: LDS_WRITE
773; EG: LDS_WRITE
774; EG: LDS_WRITE
775; EG: LDS_WRITE
776define void @local_zextload_v16i8_to_v16i16(<16 x i16> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
777  %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
778  %ext = zext <16 x i8> %load to <16 x i16>
779  store <16 x i16> %ext, <16 x i16> addrspace(3)* %out
780  ret void
781}
782
783; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i16:
784
785; EG: LDS_READ_RET
786; EG: LDS_READ_RET
787; EG: LDS_READ_RET
788; EG: LDS_READ_RET
789; EG-DAG: BFE_INT
790; EG-DAG: BFE_INT
791; EG-DAG: BFE_INT
792; EG-DAG: BFE_INT
793; EG-DAG: BFE_INT
794; EG-DAG: BFE_INT
795; EG-DAG: BFE_INT
796; EG-DAG: BFE_INT
797; EG-DAG: BFE_INT
798; EG-DAG: BFE_INT
799; EG-DAG: BFE_INT
800; EG-DAG: BFE_INT
801; EG-DAG: ASHR
802; EG-DAG: ASHR
803; EG-DAG: ASHR
804; EG-DAG: ASHR
805; EG: LDS_WRITE
806; EG: LDS_WRITE
807; EG: LDS_WRITE
808; EG: LDS_WRITE
809; EG: LDS_WRITE
810; EG: LDS_WRITE
811; EG: LDS_WRITE
812; EG: LDS_WRITE
813define void @local_sextload_v16i8_to_v16i16(<16 x i16> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
814  %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
815  %ext = sext <16 x i8> %load to <16 x i16>
816  store <16 x i16> %ext, <16 x i16> addrspace(3)* %out
817  ret void
818}
819
820; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i16:
821
822; EG: LDS_READ_RET
823; EG: LDS_READ_RET
824; EG: LDS_READ_RET
825; EG: LDS_READ_RET
826; EG: LDS_READ_RET
827; EG: LDS_READ_RET
828; EG: LDS_READ_RET
829; EG: LDS_READ_RET
830; EG: LDS_WRITE
831; EG: LDS_WRITE
832; EG: LDS_WRITE
833; EG: LDS_WRITE
834; EG: LDS_WRITE
835; EG: LDS_WRITE
836; EG: LDS_WRITE
837; EG: LDS_WRITE
838; EG: LDS_WRITE
839; EG: LDS_WRITE
840; EG: LDS_WRITE
841; EG: LDS_WRITE
842; EG: LDS_WRITE
843; EG: LDS_WRITE
844; EG: LDS_WRITE
845; EG: LDS_WRITE
846define void @local_zextload_v32i8_to_v32i16(<32 x i16> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
847  %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
848  %ext = zext <32 x i8> %load to <32 x i16>
849  store <32 x i16> %ext, <32 x i16> addrspace(3)* %out
850  ret void
851}
852
853; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i16:
854
855; EG: LDS_READ_RET
856; EG: LDS_READ_RET
857; EG: LDS_READ_RET
858; EG: LDS_READ_RET
859; EG: LDS_READ_RET
860; EG: LDS_READ_RET
861; EG: LDS_READ_RET
862; EG: LDS_READ_RET
863; EG-DAG: BFE_INT
864; EG-DAG: BFE_INT
865; EG-DAG: BFE_INT
866; EG-DAG: BFE_INT
867; EG-DAG: BFE_INT
868; EG-DAG: BFE_INT
869; EG-DAG: BFE_INT
870; EG-DAG: BFE_INT
871; EG-DAG: BFE_INT
872; EG-DAG: BFE_INT
873; EG-DAG: BFE_INT
874; EG-DAG: BFE_INT
875; EG-DAG: BFE_INT
876; EG-DAG: BFE_INT
877; EG-DAG: BFE_INT
878; EG-DAG: BFE_INT
879; EG-DAG: BFE_INT
880; EG-DAG: BFE_INT
881; EG-DAG: BFE_INT
882; EG-DAG: BFE_INT
883; EG-DAG: BFE_INT
884; EG-DAG: BFE_INT
885; EG-DAG: BFE_INT
886; EG-DAG: BFE_INT
887; EG-DAG: ASHR
888; EG-DAG: ASHR
889; EG-DAG: ASHR
890; EG-DAG: ASHR
891; EG-DAG: ASHR
892; EG-DAG: ASHR
893; EG-DAG: ASHR
894; EG-DAG: ASHR
895; EG: LDS_WRITE
896; EG: LDS_WRITE
897; EG: LDS_WRITE
898; EG: LDS_WRITE
899; EG: LDS_WRITE
900; EG: LDS_WRITE
901; EG: LDS_WRITE
902; EG: LDS_WRITE
903; EG: LDS_WRITE
904; EG: LDS_WRITE
905; EG: LDS_WRITE
906; EG: LDS_WRITE
907; EG: LDS_WRITE
908; EG: LDS_WRITE
909; EG: LDS_WRITE
910; EG: LDS_WRITE
911define void @local_sextload_v32i8_to_v32i16(<32 x i16> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
912  %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
913  %ext = sext <32 x i8> %load to <32 x i16>
914  store <32 x i16> %ext, <32 x i16> addrspace(3)* %out
915  ret void
916}
917
918; XFUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i16:
919; define void @local_zextload_v64i8_to_v64i16(<64 x i16> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
920;   %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
921;   %ext = zext <64 x i8> %load to <64 x i16>
922;   store <64 x i16> %ext, <64 x i16> addrspace(3)* %out
923;   ret void
924; }
925
926; XFUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i16:
927; define void @local_sextload_v64i8_to_v64i16(<64 x i16> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
928;   %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
929;   %ext = sext <64 x i8> %load to <64 x i16>
930;   store <64 x i16> %ext, <64 x i16> addrspace(3)* %out
931;   ret void
932; }
933
934attributes #0 = { nounwind }
935