1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s
3; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4
5; FUNC-LABEL: {{^}}local_load_i16:
6; GCN: ds_read_u16 v{{[0-9]+}}
7
8; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
9; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
10; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
11; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
12; EG: LDS_SHORT_WRITE {{\*?}} [[TO]], [[DATA]]
13define void @local_load_i16(i16 addrspace(3)* %out, i16 addrspace(3)* %in) {
14entry:
15  %ld = load i16, i16 addrspace(3)* %in
16  store i16 %ld, i16 addrspace(3)* %out
17  ret void
18}
19
20; FUNC-LABEL: {{^}}local_load_v2i16:
21; GCN: ds_read_b32
22
23; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
24; EG: LDS_READ_RET {{.*}} [[FROM]]
25; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
26; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
27; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
28define void @local_load_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) {
29entry:
30  %ld = load <2 x i16>, <2 x i16> addrspace(3)* %in
31  store <2 x i16> %ld, <2 x i16> addrspace(3)* %out
32  ret void
33}
34
35; FUNC-LABEL: {{^}}local_load_v3i16:
36; GCN: ds_read_b64
37; GCN-DAG: ds_write_b32
38; GCN-DAG: ds_write_b16
39
40; EG-DAG: LDS_USHORT_READ_RET
41; EG-DAG: LDS_READ_RET
42define void @local_load_v3i16(<3 x i16> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
43entry:
44  %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in
45  store <3 x i16> %ld, <3 x i16> addrspace(3)* %out
46  ret void
47}
48
49; FUNC-LABEL: {{^}}local_load_v4i16:
50; GCN: ds_read_b64
51
52; EG: LDS_READ_RET
53; EG: LDS_READ_RET
54define void @local_load_v4i16(<4 x i16> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) {
55entry:
56  %ld = load <4 x i16>, <4 x i16> addrspace(3)* %in
57  store <4 x i16> %ld, <4 x i16> addrspace(3)* %out
58  ret void
59}
60
61; FUNC-LABEL: {{^}}local_load_v8i16:
62; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
63
64; EG: LDS_READ_RET
65; EG: LDS_READ_RET
66; EG: LDS_READ_RET
67; EG: LDS_READ_RET
68define void @local_load_v8i16(<8 x i16> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) {
69entry:
70  %ld = load <8 x i16>, <8 x i16> addrspace(3)* %in
71  store <8 x i16> %ld, <8 x i16> addrspace(3)* %out
72  ret void
73}
74
75; FUNC-LABEL: {{^}}local_load_v16i16:
76; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:3{{$}}
77; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:1 offset1:2{{$}}
78
79
80; EG: LDS_READ_RET
81; EG: LDS_READ_RET
82; EG: LDS_READ_RET
83; EG: LDS_READ_RET
84
85; EG: LDS_READ_RET
86; EG: LDS_READ_RET
87; EG: LDS_READ_RET
88; EG: LDS_READ_RET
89define void @local_load_v16i16(<16 x i16> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) {
90entry:
91  %ld = load <16 x i16>, <16 x i16> addrspace(3)* %in
92  store <16 x i16> %ld, <16 x i16> addrspace(3)* %out
93  ret void
94}
95
96; FUNC-LABEL: {{^}}local_zextload_i16_to_i32:
97; GCN: ds_read_u16
98; GCN: ds_write_b32
99
100; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
101; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
102; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
103; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
104; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
105define void @local_zextload_i16_to_i32(i32 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
106  %a = load i16, i16 addrspace(3)* %in
107  %ext = zext i16 %a to i32
108  store i32 %ext, i32 addrspace(3)* %out
109  ret void
110}
111
112; FUNC-LABEL: {{^}}local_sextload_i16_to_i32:
113; GCN-NOT: s_wqm_b64
114; GCN: s_mov_b32 m0
115; GCN: ds_read_i16
116
117; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
118; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
119; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP
120; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
121; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal
122; EG: 16
123; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
124define void @local_sextload_i16_to_i32(i32 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
125  %a = load i16, i16 addrspace(3)* %in
126  %ext = sext i16 %a to i32
127  store i32 %ext, i32 addrspace(3)* %out
128  ret void
129}
130
131; FUNC-LABEL: {{^}}local_zextload_v1i16_to_v1i32:
132; GCN: ds_read_u16
133
134; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
135; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
136; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
137; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
138; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
139define void @local_zextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
140  %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
141  %ext = zext <1 x i16> %load to <1 x i32>
142  store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
143  ret void
144}
145
146; FUNC-LABEL: {{^}}local_sextload_v1i16_to_v1i32:
147; GCN: ds_read_i16
148
149; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
150; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
151; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP
152; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
153; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal
154; EG: 16
155; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
156define void @local_sextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
157  %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
158  %ext = sext <1 x i16> %load to <1 x i32>
159  store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
160  ret void
161}
162
163; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i32:
164; GCN-NOT: s_wqm_b64
165; GCN: s_mov_b32 m0
166; GCN: ds_read_b32
167
168; EG: LDS_READ_RET
169define void @local_zextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
170  %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
171  %ext = zext <2 x i16> %load to <2 x i32>
172  store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
173  ret void
174}
175
176; FUNC-LABEL: {{^}}local_sextload_v2i16_to_v2i32:
177; GCN-NOT: s_wqm_b64
178; GCN: s_mov_b32 m0
179; GCN: ds_read_b32
180
181; EG: LDS_READ_RET
182; EG: BFE_INT
183; EG: BFE_INT
184define void @local_sextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
185  %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
186  %ext = sext <2 x i16> %load to <2 x i32>
187  store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
188  ret void
189}
190
191; FUNC-LABEL: {{^}}local_local_zextload_v3i16_to_v3i32:
192; GCN: ds_read_b64
193; GCN-DAG: ds_write_b32
194; GCN-DAG: ds_write_b64
195
196; EG: LDS_READ_RET
197define void @local_local_zextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
198entry:
199  %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in
200  %ext = zext <3 x i16> %ld to <3 x i32>
201  store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
202  ret void
203}
204
205; FUNC-LABEL: {{^}}local_local_sextload_v3i16_to_v3i32:
206; GCN: ds_read_b64
207; GCN-DAG: ds_write_b32
208; GCN-DAG: ds_write_b64
209
210; EG: LDS_READ_RET
211; EG-DAG: BFE_INT
212; EG-DAG: BFE_INT
213; EG-DAG: BFE_INT
214define void @local_local_sextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
215entry:
216  %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in
217  %ext = sext <3 x i16> %ld to <3 x i32>
218  store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
219  ret void
220}
221
222; FUNC-LABEL: {{^}}local_local_zextload_v4i16_to_v4i32:
223; GCN-NOT: s_wqm_b64
224; GCN: s_mov_b32 m0
225; GCN: ds_read_b64
226
227; EG: LDS_READ_RET
228; EG: LDS_READ_RET
229define void @local_local_zextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
230  %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
231  %ext = zext <4 x i16> %load to <4 x i32>
232  store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
233  ret void
234}
235
236; FUNC-LABEL: {{^}}local_sextload_v4i16_to_v4i32:
237; GCN-NOT: s_wqm_b64
238; GCN: s_mov_b32 m0
239; GCN: ds_read_b64
240
241; EG: LDS_READ_RET
242; EG: LDS_READ_RET
243; EG-DAG: BFE_INT
244; EG-DAG: BFE_INT
245; EG-DAG: BFE_INT
246; EG-DAG: BFE_INT
247define void @local_sextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
248  %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
249  %ext = sext <4 x i16> %load to <4 x i32>
250  store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
251  ret void
252}
253
254; FUNC-LABEL: {{^}}local_zextload_v8i16_to_v8i32:
255; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
256
257; EG: LDS_READ_RET
258; EG: LDS_READ_RET
259; EG: LDS_READ_RET
260; EG: LDS_READ_RET
261define void @local_zextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
262  %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
263  %ext = zext <8 x i16> %load to <8 x i32>
264  store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
265  ret void
266}
267
268; FUNC-LABEL: {{^}}local_sextload_v8i16_to_v8i32:
269; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
270
271; EG: LDS_READ_RET
272; EG: LDS_READ_RET
273; EG: LDS_READ_RET
274; EG: LDS_READ_RET
275; EG-DAG: BFE_INT
276; EG-DAG: BFE_INT
277; EG-DAG: BFE_INT
278; EG-DAG: BFE_INT
279; EG-DAG: BFE_INT
280; EG-DAG: BFE_INT
281; EG-DAG: BFE_INT
282; EG-DAG: BFE_INT
283define void @local_sextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
284  %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
285  %ext = sext <8 x i16> %load to <8 x i32>
286  store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
287  ret void
288}
289
290; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i32:
291; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
292; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
293
294; GCN: ds_write2_b64
295; GCN: ds_write2_b64
296; GCN: ds_write2_b64
297; GCN: ds_write2_b64
298
299; EG: LDS_READ_RET
300; EG: LDS_READ_RET
301; EG: LDS_READ_RET
302; EG: LDS_READ_RET
303; EG: LDS_READ_RET
304; EG: LDS_READ_RET
305; EG: LDS_READ_RET
306; EG: LDS_READ_RET
307define void @local_zextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
308  %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
309  %ext = zext <16 x i16> %load to <16 x i32>
310  store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
311  ret void
312}
313
314; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i32:
315
316; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
317; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
318
319; EG: LDS_READ_RET
320; EG: LDS_READ_RET
321; EG: LDS_READ_RET
322; EG: LDS_READ_RET
323; EG: LDS_READ_RET
324; EG: LDS_READ_RET
325; EG: LDS_READ_RET
326; EG: LDS_READ_RET
327; EG-DAG: BFE_INT
328; EG-DAG: BFE_INT
329; EG-DAG: BFE_INT
330; EG-DAG: BFE_INT
331; EG-DAG: BFE_INT
332; EG-DAG: BFE_INT
333; EG-DAG: BFE_INT
334; EG-DAG: BFE_INT
335; EG-DAG: BFE_INT
336; EG-DAG: BFE_INT
337; EG-DAG: BFE_INT
338; EG-DAG: BFE_INT
339; EG-DAG: BFE_INT
340; EG-DAG: BFE_INT
341; EG-DAG: BFE_INT
342; EG-DAG: BFE_INT
343define void @local_sextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
344  %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
345  %ext = sext <16 x i16> %load to <16 x i32>
346  store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
347  ret void
348}
349
350; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i32:
351; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
352; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3
353; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5
354; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7
355
356; EG: LDS_READ_RET
357; EG: LDS_READ_RET
358; EG: LDS_READ_RET
359; EG: LDS_READ_RET
360; EG: LDS_READ_RET
361; EG: LDS_READ_RET
362; EG: LDS_READ_RET
363; EG: LDS_READ_RET
364; EG: LDS_READ_RET
365; EG: LDS_READ_RET
366; EG: LDS_READ_RET
367; EG: LDS_READ_RET
368; EG: LDS_READ_RET
369; EG: LDS_READ_RET
370; EG: LDS_READ_RET
371; EG: LDS_READ_RET
372define void @local_zextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
373  %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
374  %ext = zext <32 x i16> %load to <32 x i32>
375  store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
376  ret void
377}
378
379; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i32:
380; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
381; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5
382; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
383; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7
384; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15
385; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:12 offset1:13
386; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:10 offset1:11
387; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:8 offset1:9
388; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7
389; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5
390; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3
391; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1
392
393; EG: LDS_READ_RET
394; EG: LDS_READ_RET
395; EG: LDS_READ_RET
396; EG: LDS_READ_RET
397; EG: LDS_READ_RET
398; EG: LDS_READ_RET
399; EG: LDS_READ_RET
400; EG: LDS_READ_RET
401; EG: LDS_READ_RET
402; EG: LDS_READ_RET
403; EG: LDS_READ_RET
404; EG: LDS_READ_RET
405; EG: LDS_READ_RET
406; EG: LDS_READ_RET
407; EG: LDS_READ_RET
408; EG: LDS_READ_RET
409define void @local_sextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
410  %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
411  %ext = sext <32 x i16> %load to <32 x i32>
412  store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
413  ret void
414}
415
416; FUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i32:
417; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:14 offset1:15
418; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
419; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3
420; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5
421; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7
422; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:8 offset1:9
423; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:12 offset1:13
424; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:10 offset1:11
425; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:30 offset1:31
426; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:28 offset1:29
427; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:26 offset1:27
428; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:24 offset1:25
429; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:22 offset1:23
430; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:20 offset1:21
431; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:18 offset1:19
432; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:16 offset1:17
433; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15
434; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:12 offset1:13
435; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:10 offset1:11
436; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:8 offset1:9
437; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7
438; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5
439; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3
440; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1
441
442; EG: LDS_READ_RET
443; EG: LDS_READ_RET
444; EG: LDS_READ_RET
445; EG: LDS_READ_RET
446; EG: LDS_READ_RET
447; EG: LDS_READ_RET
448; EG: LDS_READ_RET
449; EG: LDS_READ_RET
450; EG: LDS_READ_RET
451; EG: LDS_READ_RET
452; EG: LDS_READ_RET
453; EG: LDS_READ_RET
454; EG: LDS_READ_RET
455; EG: LDS_READ_RET
456; EG: LDS_READ_RET
457; EG: LDS_READ_RET
458; EG: LDS_READ_RET
459; EG: LDS_READ_RET
460; EG: LDS_READ_RET
461; EG: LDS_READ_RET
462; EG: LDS_READ_RET
463; EG: LDS_READ_RET
464; EG: LDS_READ_RET
465; EG: LDS_READ_RET
466; EG: LDS_READ_RET
467; EG: LDS_READ_RET
468; EG: LDS_READ_RET
469; EG: LDS_READ_RET
470; EG: LDS_READ_RET
471; EG: LDS_READ_RET
472; EG: LDS_READ_RET
473; EG: LDS_READ_RET
474define void @local_zextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
475  %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
476  %ext = zext <64 x i16> %load to <64 x i32>
477  store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
478  ret void
479}
480
481; FUNC-LABEL: {{^}}local_sextload_v64i16_to_v64i32:
482
483; EG: LDS_READ_RET
484; EG: LDS_READ_RET
485; EG: LDS_READ_RET
486; EG: LDS_READ_RET
487; EG: LDS_READ_RET
488; EG: LDS_READ_RET
489; EG: LDS_READ_RET
490; EG: LDS_READ_RET
491; EG: LDS_READ_RET
492; EG: LDS_READ_RET
493; EG: LDS_READ_RET
494; EG: LDS_READ_RET
495; EG: LDS_READ_RET
496; EG: LDS_READ_RET
497; EG: LDS_READ_RET
498; EG: LDS_READ_RET
499; EG: LDS_READ_RET
500; EG: LDS_READ_RET
501; EG: LDS_READ_RET
502; EG: LDS_READ_RET
503; EG: LDS_READ_RET
504; EG: LDS_READ_RET
505; EG: LDS_READ_RET
506; EG: LDS_READ_RET
507; EG: LDS_READ_RET
508; EG: LDS_READ_RET
509; EG: LDS_READ_RET
510; EG: LDS_READ_RET
511; EG: LDS_READ_RET
512; EG: LDS_READ_RET
513; EG: LDS_READ_RET
514; EG: LDS_READ_RET
515define void @local_sextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
516  %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
517  %ext = sext <64 x i16> %load to <64 x i32>
518  store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
519  ret void
520}
521
522; FUNC-LABEL: {{^}}local_zextload_i16_to_i64:
523; GCN-DAG: ds_read_u16 v[[LO:[0-9]+]],
524; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
525
526; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]]
527
528; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
529; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
530; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
531; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
532; EG-DAG: LDS_WRITE
533; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
534define void @local_zextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
535  %a = load i16, i16 addrspace(3)* %in
536  %ext = zext i16 %a to i64
537  store i64 %ext, i64 addrspace(3)* %out
538  ret void
539}
540
541; FUNC-LABEL: {{^}}local_sextload_i16_to_i64:
542; FIXME: Need to optimize this sequence to avoid an extra shift.
543;  t25: i32,ch = load<LD2[%in(addrspace=3)], anyext from i16> t12, t10, undef:i32
544;          t28: i64 = any_extend t25
545;        t30: i64 = sign_extend_inreg t28, ValueType:ch:i16
546; SI: ds_read_i16 v[[LO:[0-9]+]],
547; VI: ds_read_u16 v[[ULO:[0-9]+]]
548; VI: v_bfe_i32 v[[LO:[0-9]+]], v[[ULO]], 0, 16
549; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
550
551; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]]
552
553; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
554; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
555; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP
556; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
557; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal
558; EG-DAG: LDS_WRITE
559; EG-DAG: 16
560; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
561define void @local_sextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
562  %a = load i16, i16 addrspace(3)* %in
563  %ext = sext i16 %a to i64
564  store i64 %ext, i64 addrspace(3)* %out
565  ret void
566}
567
568; FUNC-LABEL: {{^}}local_zextload_v1i16_to_v1i64:
569
570; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
571; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
572; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
573; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
574; EG-DAG: LDS_WRITE
575; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
576define void @local_zextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
577  %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
578  %ext = zext <1 x i16> %load to <1 x i64>
579  store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
580  ret void
581}
582
583; FUNC-LABEL: {{^}}local_sextload_v1i16_to_v1i64:
584
585; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
586; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
587; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP
588; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
589; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal
590; EG-DAG: LDS_WRITE
591; EG-DAG: 16
592; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
593define void @local_sextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
594  %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
595  %ext = sext <1 x i16> %load to <1 x i64>
596  store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
597  ret void
598}
599
600; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i64:
601
602; EG: LDS_READ_RET
603define void @local_zextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
604  %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
605  %ext = zext <2 x i16> %load to <2 x i64>
606  store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
607  ret void
608}
609
610; FUNC-LABEL: {{^}}local_sextload_v2i16_to_v2i64:
611
612; EG: LDS_READ_RET
613; EG-DAG: BFE_INT
614; EG-DAG: ASHR
615define void @local_sextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
616  %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
617  %ext = sext <2 x i16> %load to <2 x i64>
618  store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
619  ret void
620}
621
622; FUNC-LABEL: {{^}}local_zextload_v4i16_to_v4i64:
623
624; EG: LDS_READ_RET
625; EG: LDS_READ_RET
626define void @local_zextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
627  %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
628  %ext = zext <4 x i16> %load to <4 x i64>
629  store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
630  ret void
631}
632
633; FUNC-LABEL: {{^}}local_sextload_v4i16_to_v4i64:
634
635; EG: LDS_READ_RET
636; EG: LDS_READ_RET
637; EG-DAG: BFE_INT
638; EG-DAG: BFE_INT
639; EG-DAG: ASHR
640; EG-DAG: ASHR
641define void @local_sextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
642  %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
643  %ext = sext <4 x i16> %load to <4 x i64>
644  store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
645  ret void
646}
647
648; FUNC-LABEL: {{^}}local_zextload_v8i16_to_v8i64:
649
650; EG: LDS_READ_RET
651; EG: LDS_READ_RET
652; EG: LDS_READ_RET
653; EG: LDS_READ_RET
654define void @local_zextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
655  %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
656  %ext = zext <8 x i16> %load to <8 x i64>
657  store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
658  ret void
659}
660
661; FUNC-LABEL: {{^}}local_sextload_v8i16_to_v8i64:
662
663; EG: LDS_READ_RET
664; EG: LDS_READ_RET
665; EG: LDS_READ_RET
666; EG: LDS_READ_RET
667; EG-DAG: BFE_INT
668; EG-DAG: BFE_INT
669; EG-DAG: ASHR
670; EG-DAG: ASHR
671; EG-DAG: BFE_INT
672; EG-DAG: BFE_INT
673; EG-DAG: ASHR
674; EG-DAG: ASHR
675define void @local_sextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
676  %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
677  %ext = sext <8 x i16> %load to <8 x i64>
678  store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
679  ret void
680}
681
682; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i64:
683
684; EG: LDS_READ_RET
685; EG: LDS_READ_RET
686; EG: LDS_READ_RET
687; EG: LDS_READ_RET
688; EG: LDS_READ_RET
689; EG: LDS_READ_RET
690; EG: LDS_READ_RET
691; EG: LDS_READ_RET
692define void @local_zextload_v16i16_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
693  %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
694  %ext = zext <16 x i16> %load to <16 x i64>
695  store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
696  ret void
697}
698
699; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i64:
700
701; EG: LDS_READ_RET
702; EG: LDS_READ_RET
703; EG: LDS_READ_RET
704; EG: LDS_READ_RET
705; EG: LDS_READ_RET
706; EG: LDS_READ_RET
707; EG: LDS_READ_RET
708; EG: LDS_READ_RET
709; EG-DAG: BFE_INT
710; EG-DAG: BFE_INT
711; EG-DAG: ASHR
712; EG-DAG: ASHR
713; EG-DAG: BFE_INT
714; EG-DAG: BFE_INT
715; EG-DAG: ASHR
716; EG-DAG: ASHR
717; EG-DAG: BFE_INT
718; EG-DAG: BFE_INT
719; EG-DAG: ASHR
720; EG-DAG: ASHR
721; EG-DAG: BFE_INT
722; EG-DAG: BFE_INT
723; EG-DAG: ASHR
724; EG-DAG: ASHR
725define void @local_sextload_v16i16_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
726  %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
727  %ext = sext <16 x i16> %load to <16 x i64>
728  store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
729  ret void
730}
731
732; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i64:
733
734; EG: LDS_READ_RET
735; EG: LDS_READ_RET
736; EG: LDS_READ_RET
737; EG: LDS_READ_RET
738; EG: LDS_READ_RET
739; EG: LDS_READ_RET
740; EG: LDS_READ_RET
741; EG: LDS_READ_RET
742; EG: LDS_READ_RET
743; EG: LDS_READ_RET
744; EG: LDS_READ_RET
745; EG: LDS_READ_RET
746; EG: LDS_READ_RET
747; EG: LDS_READ_RET
748; EG: LDS_READ_RET
749; EG: LDS_READ_RET
750define void @local_zextload_v32i16_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
751  %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
752  %ext = zext <32 x i16> %load to <32 x i64>
753  store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
754  ret void
755}
756
757; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i64:
758
759; EG: LDS_READ_RET
760; EG: LDS_READ_RET
761; EG: LDS_READ_RET
762; EG: LDS_READ_RET
763; EG: LDS_READ_RET
764; EG: LDS_READ_RET
765; EG: LDS_READ_RET
766; EG: LDS_READ_RET
767; EG: LDS_READ_RET
768; EG: LDS_READ_RET
769; EG: LDS_READ_RET
770; EG: LDS_READ_RET
771; EG: LDS_READ_RET
772; EG: LDS_READ_RET
773; EG: LDS_READ_RET
774; EG: LDS_READ_RET
775; EG-DAG: BFE_INT
776; EG-DAG: BFE_INT
777; EG-DAG: ASHR
778; EG-DAG: ASHR
779; EG-DAG: BFE_INT
780; EG-DAG: BFE_INT
781; EG-DAG: ASHR
782; EG-DAG: ASHR
783; EG-DAG: BFE_INT
784; EG-DAG: BFE_INT
785; EG-DAG: ASHR
786; EG-DAG: ASHR
787; EG-DAG: BFE_INT
788; EG-DAG: BFE_INT
789; EG-DAG: ASHR
790; EG-DAG: ASHR
791; EG-DAG: BFE_INT
792; EG-DAG: BFE_INT
793; EG-DAG: ASHR
794; EG-DAG: ASHR
795; EG-DAG: BFE_INT
796; EG-DAG: BFE_INT
797; EG-DAG: ASHR
798; EG-DAG: ASHR
799; EG-DAG: BFE_INT
800; EG-DAG: BFE_INT
801; EG-DAG: ASHR
802; EG-DAG: ASHR
803; EG-DAG: BFE_INT
804; EG-DAG: BFE_INT
805; EG-DAG: ASHR
806; EG-DAG: ASHR
807define void @local_sextload_v32i16_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
808  %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
809  %ext = sext <32 x i16> %load to <32 x i64>
810  store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
811  ret void
812}
813
814; ; XFUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i64:
815; define void @local_zextload_v64i16_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
816;   %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
817;   %ext = zext <64 x i16> %load to <64 x i64>
818;   store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
819;   ret void
820; }
821
822; ; XFUNC-LABEL: {{^}}local_sextload_v64i16_to_v64i64:
823; define void @local_sextload_v64i16_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
824;   %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
825;   %ext = sext <64 x i16> %load to <64 x i64>
826;   store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
827;   ret void
828; }
829
830attributes #0 = { nounwind }
831