1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=GCN-NOHSA-SI %s
3; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck --check-prefix=GCN-HSA %s
4; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=GCN-NOHSA-VI %s
5; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck --check-prefix=EG %s
6
7define amdgpu_kernel void @constant_load_i16(i16 addrspace(1)* %out, i16 addrspace(4)* %in) {
8; GCN-NOHSA-SI-LABEL: constant_load_i16:
9; GCN-NOHSA-SI:       ; %bb.0: ; %entry
10; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
11; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
12; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
13; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
14; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
15; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
16; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
17; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
18; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
19; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
20; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
21; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
22; GCN-NOHSA-SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
23; GCN-NOHSA-SI-NEXT:    s_endpgm
24;
25; GCN-HSA-LABEL: constant_load_i16:
26; GCN-HSA:       ; %bb.0: ; %entry
27; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
28; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
29; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
30; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
31; GCN-HSA-NEXT:    flat_load_ushort v2, v[0:1]
32; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
33; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
34; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
35; GCN-HSA-NEXT:    flat_store_short v[0:1], v2
36; GCN-HSA-NEXT:    s_endpgm
37;
38; GCN-NOHSA-VI-LABEL: constant_load_i16:
39; GCN-NOHSA-VI:       ; %bb.0: ; %entry
40; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
41; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
42; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
43; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
44; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
45; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
46; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
47; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
48; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
49; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
50; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
51; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
52; GCN-NOHSA-VI-NEXT:    buffer_store_short v0, off, s[4:7], 0
53; GCN-NOHSA-VI-NEXT:    s_endpgm
54;
55; EG-LABEL: constant_load_i16:
56; EG:       ; %bb.0: ; %entry
57; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
58; EG-NEXT:    TEX 0 @6
59; EG-NEXT:    ALU 11, @9, KC0[CB0:0-32], KC1[]
60; EG-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
61; EG-NEXT:    CF_END
62; EG-NEXT:    PAD
63; EG-NEXT:    Fetch clause starting at 6:
64; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
65; EG-NEXT:    ALU clause starting at 8:
66; EG-NEXT:     MOV * T0.X, KC0[2].Z,
67; EG-NEXT:    ALU clause starting at 9:
68; EG-NEXT:     AND_INT T0.W, KC0[2].Y, literal.x,
69; EG-NEXT:     AND_INT * T1.W, T0.X, literal.y,
70; EG-NEXT:    3(4.203895e-45), 65535(9.183409e-41)
71; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
72; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
73; EG-NEXT:     LSHL T0.X, T1.W, PV.W,
74; EG-NEXT:     LSHL * T0.W, literal.x, PV.W,
75; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
76; EG-NEXT:     MOV T0.Y, 0.0,
77; EG-NEXT:     MOV * T0.Z, 0.0,
78; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
79; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
80entry:
81  %ld = load i16, i16 addrspace(4)* %in
82  store i16 %ld, i16 addrspace(1)* %out
83  ret void
84}
85
86define amdgpu_kernel void @constant_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) {
87; GCN-NOHSA-SI-LABEL: constant_load_v2i16:
88; GCN-NOHSA-SI:       ; %bb.0: ; %entry
89; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
90; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
91; GCN-NOHSA-SI-NEXT:    s_load_dword s4, s[2:3], 0x0
92; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
93; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
94; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
95; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
96; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
97; GCN-NOHSA-SI-NEXT:    s_endpgm
98;
99; GCN-HSA-LABEL: constant_load_v2i16:
100; GCN-HSA:       ; %bb.0: ; %entry
101; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
102; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
103; GCN-HSA-NEXT:    s_load_dword s2, s[2:3], 0x0
104; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
105; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
106; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
107; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
108; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
109; GCN-HSA-NEXT:    s_endpgm
110;
111; GCN-NOHSA-VI-LABEL: constant_load_v2i16:
112; GCN-NOHSA-VI:       ; %bb.0: ; %entry
113; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
114; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
115; GCN-NOHSA-VI-NEXT:    s_load_dword s4, s[2:3], 0x0
116; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
117; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
118; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
119; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
120; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
121; GCN-NOHSA-VI-NEXT:    s_endpgm
122;
123; EG-LABEL: constant_load_v2i16:
124; EG:       ; %bb.0: ; %entry
125; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
126; EG-NEXT:    TEX 0 @6
127; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
128; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
129; EG-NEXT:    CF_END
130; EG-NEXT:    PAD
131; EG-NEXT:    Fetch clause starting at 6:
132; EG-NEXT:     VTX_READ_32 T0.X, T0.X, 0, #1
133; EG-NEXT:    ALU clause starting at 8:
134; EG-NEXT:     MOV * T0.X, KC0[2].Z,
135; EG-NEXT:    ALU clause starting at 9:
136; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
137; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
138entry:
139  %ld = load <2 x i16>, <2 x i16> addrspace(4)* %in
140  store <2 x i16> %ld, <2 x i16> addrspace(1)* %out
141  ret void
142}
143
144define amdgpu_kernel void @constant_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) {
145; GCN-NOHSA-SI-LABEL: constant_load_v3i16:
146; GCN-NOHSA-SI:       ; %bb.0: ; %entry
147; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
148; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
149; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
150; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
151; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
152; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
153; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
154; GCN-NOHSA-SI-NEXT:    buffer_store_short v0, off, s[0:3], 0 offset:4
155; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
156; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
157; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
158; GCN-NOHSA-SI-NEXT:    s_endpgm
159;
160; GCN-HSA-LABEL: constant_load_v3i16:
161; GCN-HSA:       ; %bb.0: ; %entry
162; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
163; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
164; GCN-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
165; GCN-HSA-NEXT:    s_add_u32 s4, s0, 4
166; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
167; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s4
168; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
169; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s5
170; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
171; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s3
172; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
173; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s2
174; GCN-HSA-NEXT:    flat_store_short v[2:3], v4
175; GCN-HSA-NEXT:    flat_store_dword v[0:1], v5
176; GCN-HSA-NEXT:    s_endpgm
177;
178; GCN-NOHSA-VI-LABEL: constant_load_v3i16:
179; GCN-NOHSA-VI:       ; %bb.0: ; %entry
180; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
181; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
182; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
183; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
184; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
185; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
186; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s5
187; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s4
188; GCN-NOHSA-VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 offset:4
189; GCN-NOHSA-VI-NEXT:    buffer_store_dword v1, off, s[0:3], 0
190; GCN-NOHSA-VI-NEXT:    s_endpgm
191;
192; EG-LABEL: constant_load_v3i16:
193; EG:       ; %bb.0: ; %entry
194; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
195; EG-NEXT:    TEX 2 @6
196; EG-NEXT:    ALU 19, @13, KC0[CB0:0-32], KC1[]
197; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0
198; EG-NEXT:    MEM_RAT MSKOR T5.XW, T8.X
199; EG-NEXT:    CF_END
200; EG-NEXT:    Fetch clause starting at 6:
201; EG-NEXT:     VTX_READ_16 T6.X, T5.X, 0, #1
202; EG-NEXT:     VTX_READ_16 T7.X, T5.X, 2, #1
203; EG-NEXT:     VTX_READ_16 T5.X, T5.X, 4, #1
204; EG-NEXT:    ALU clause starting at 12:
205; EG-NEXT:     MOV * T5.X, KC0[2].Z,
206; EG-NEXT:    ALU clause starting at 13:
207; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
208; EG-NEXT:    4(5.605194e-45), 0(0.000000e+00)
209; EG-NEXT:     AND_INT T1.W, PV.W, literal.x,
210; EG-NEXT:     AND_INT * T2.W, T5.X, literal.y,
211; EG-NEXT:    3(4.203895e-45), 65535(9.183409e-41)
212; EG-NEXT:     LSHL * T1.W, PV.W, literal.x,
213; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
214; EG-NEXT:     LSHL T5.X, T2.W, PV.W,
215; EG-NEXT:     LSHL * T5.W, literal.x, PV.W,
216; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
217; EG-NEXT:     MOV T5.Y, 0.0,
218; EG-NEXT:     MOV * T5.Z, 0.0,
219; EG-NEXT:     LSHR T8.X, T0.W, literal.x,
220; EG-NEXT:     LSHL T0.W, T7.X, literal.y,
221; EG-NEXT:     AND_INT * T1.W, T6.X, literal.z,
222; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
223; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
224; EG-NEXT:     OR_INT T6.X, PV.W, PS,
225; EG-NEXT:     LSHR * T7.X, KC0[2].Y, literal.x,
226; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
227entry:
228  %ld = load <3 x i16>, <3 x i16> addrspace(4)* %in
229  store <3 x i16> %ld, <3 x i16> addrspace(1)* %out
230  ret void
231}
232
233define amdgpu_kernel void @constant_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) {
234; GCN-NOHSA-SI-LABEL: constant_load_v4i16:
235; GCN-NOHSA-SI:       ; %bb.0: ; %entry
236; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
237; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
238; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
239; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
240; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
241; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
242; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
243; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s5
244; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
245; GCN-NOHSA-SI-NEXT:    s_endpgm
246;
247; GCN-HSA-LABEL: constant_load_v4i16:
248; GCN-HSA:       ; %bb.0: ; %entry
249; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
250; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
251; GCN-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
252; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
253; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
254; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
255; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
256; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
257; GCN-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
258; GCN-HSA-NEXT:    s_endpgm
259;
260; GCN-NOHSA-VI-LABEL: constant_load_v4i16:
261; GCN-NOHSA-VI:       ; %bb.0: ; %entry
262; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
263; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
264; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
265; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
266; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
267; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
268; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
269; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s5
270; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
271; GCN-NOHSA-VI-NEXT:    s_endpgm
272;
273; EG-LABEL: constant_load_v4i16:
274; EG:       ; %bb.0: ; %entry
275; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
276; EG-NEXT:    TEX 0 @6
277; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
278; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
279; EG-NEXT:    CF_END
280; EG-NEXT:    PAD
281; EG-NEXT:    Fetch clause starting at 6:
282; EG-NEXT:     VTX_READ_64 T0.XY, T0.X, 0, #1
283; EG-NEXT:    ALU clause starting at 8:
284; EG-NEXT:     MOV * T0.X, KC0[2].Z,
285; EG-NEXT:    ALU clause starting at 9:
286; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
287; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
288entry:
289  %ld = load <4 x i16>, <4 x i16> addrspace(4)* %in
290  store <4 x i16> %ld, <4 x i16> addrspace(1)* %out
291  ret void
292}
293
294define amdgpu_kernel void @constant_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) {
295; GCN-NOHSA-SI-LABEL: constant_load_v8i16:
296; GCN-NOHSA-SI:       ; %bb.0: ; %entry
297; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
298; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
299; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
300; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
301; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
302; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
303; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
304; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s5
305; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s6
306; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s7
307; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
308; GCN-NOHSA-SI-NEXT:    s_endpgm
309;
310; GCN-HSA-LABEL: constant_load_v8i16:
311; GCN-HSA:       ; %bb.0: ; %entry
312; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
313; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
314; GCN-HSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
315; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
316; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
317; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
318; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
319; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
320; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s6
321; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s7
322; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
323; GCN-HSA-NEXT:    s_endpgm
324;
325; GCN-NOHSA-VI-LABEL: constant_load_v8i16:
326; GCN-NOHSA-VI:       ; %bb.0: ; %entry
327; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
328; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
329; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
330; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
331; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
332; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
333; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
334; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s5
335; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s6
336; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s7
337; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
338; GCN-NOHSA-VI-NEXT:    s_endpgm
339;
340; EG-LABEL: constant_load_v8i16:
341; EG:       ; %bb.0: ; %entry
342; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
343; EG-NEXT:    TEX 0 @6
344; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
345; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
346; EG-NEXT:    CF_END
347; EG-NEXT:    PAD
348; EG-NEXT:    Fetch clause starting at 6:
349; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
350; EG-NEXT:    ALU clause starting at 8:
351; EG-NEXT:     MOV * T0.X, KC0[2].Z,
352; EG-NEXT:    ALU clause starting at 9:
353; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
354; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
355entry:
356  %ld = load <8 x i16>, <8 x i16> addrspace(4)* %in
357  store <8 x i16> %ld, <8 x i16> addrspace(1)* %out
358  ret void
359}
360
361define amdgpu_kernel void @constant_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) {
362; GCN-NOHSA-SI-LABEL: constant_load_v16i16:
363; GCN-NOHSA-SI:       ; %bb.0: ; %entry
364; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x9
365; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
366; GCN-NOHSA-SI-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
367; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, 0xf000
368; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, -1
369; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
370; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
371; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s5
372; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s6
373; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s7
374; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16
375; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
376; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s0
377; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s1
378; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s2
379; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s3
380; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
381; GCN-NOHSA-SI-NEXT:    s_endpgm
382;
383; GCN-HSA-LABEL: constant_load_v16i16:
384; GCN-HSA:       ; %bb.0: ; %entry
385; GCN-HSA-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x0
386; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
387; GCN-HSA-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
388; GCN-HSA-NEXT:    s_add_u32 s10, s8, 16
389; GCN-HSA-NEXT:    s_addc_u32 s11, s9, 0
390; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s10
391; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s11
392; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
393; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
394; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
395; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s6
396; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s7
397; GCN-HSA-NEXT:    flat_store_dwordx4 v[6:7], v[0:3]
398; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
399; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
400; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
401; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s2
402; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s3
403; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s9
404; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
405; GCN-HSA-NEXT:    s_endpgm
406;
407; GCN-NOHSA-VI-LABEL: constant_load_v16i16:
408; GCN-NOHSA-VI:       ; %bb.0: ; %entry
409; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x24
410; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
411; GCN-NOHSA-VI-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
412; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, 0xf000
413; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, -1
414; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
415; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
416; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s5
417; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s6
418; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s7
419; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
420; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
421; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v6, s2
422; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v7, s3
423; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16
424; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[8:11], 0
425; GCN-NOHSA-VI-NEXT:    s_endpgm
426;
427; EG-LABEL: constant_load_v16i16:
428; EG:       ; %bb.0: ; %entry
429; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
430; EG-NEXT:    TEX 0 @8
431; EG-NEXT:    ALU 3, @13, KC0[CB0:0-32], KC1[]
432; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0
433; EG-NEXT:    ALU 1, @17, KC0[CB0:0-32], KC1[]
434; EG-NEXT:    TEX 0 @10
435; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
436; EG-NEXT:    CF_END
437; EG-NEXT:    Fetch clause starting at 8:
438; EG-NEXT:     VTX_READ_128 T1.XYZW, T0.X, 16, #1
439; EG-NEXT:    Fetch clause starting at 10:
440; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
441; EG-NEXT:    ALU clause starting at 12:
442; EG-NEXT:     MOV * T0.X, KC0[2].Z,
443; EG-NEXT:    ALU clause starting at 13:
444; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
445; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
446; EG-NEXT:     LSHR * T2.X, PV.W, literal.x,
447; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
448; EG-NEXT:    ALU clause starting at 17:
449; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
450; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
451entry:
452  %ld = load <16 x i16>, <16 x i16> addrspace(4)* %in
453  store <16 x i16> %ld, <16 x i16> addrspace(1)* %out
454  ret void
455}
456
457define amdgpu_kernel void @constant_load_v16i16_align2(<16 x i16> addrspace(4)* %ptr0) #0 {
458; GCN-NOHSA-SI-LABEL: constant_load_v16i16_align2:
459; GCN-NOHSA-SI:       ; %bb.0: ; %entry
460; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
461; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
462; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
463; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
464; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[0:3], 0
465; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v1, off, s[0:3], 0 offset:2
466; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v4, off, s[0:3], 0 offset:4
467; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v2, off, s[0:3], 0 offset:6
468; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v5, off, s[0:3], 0 offset:8
469; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v3, off, s[0:3], 0 offset:10
470; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v6, off, s[0:3], 0 offset:12
471; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v7, off, s[0:3], 0 offset:14
472; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v8, off, s[0:3], 0 offset:16
473; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v9, off, s[0:3], 0 offset:18
474; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v10, off, s[0:3], 0 offset:20
475; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v11, off, s[0:3], 0 offset:22
476; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v12, off, s[0:3], 0 offset:24
477; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v13, off, s[0:3], 0 offset:26
478; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v14, off, s[0:3], 0 offset:28
479; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v15, off, s[0:3], 0 offset:30
480; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(8)
481; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
482; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v16, 16, v3
483; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v17, 16, v2
484; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v18, 16, v1
485; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
486; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v15, 16, v15
487; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v13, 16, v13
488; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v11, 16, v11
489; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v9, 16, v9
490; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v3, v7, v6
491; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v2, v16, v5
492; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v1, v17, v4
493; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v0, v18, v0
494; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v7, v15, v14
495; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v6, v13, v12
496; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v5, v11, v10
497; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v4, v9, v8
498; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
499; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
500; GCN-NOHSA-SI-NEXT:    s_endpgm
501;
502; GCN-HSA-LABEL: constant_load_v16i16_align2:
503; GCN-HSA:       ; %bb.0: ; %entry
504; GCN-HSA-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
505; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
506; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
507; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
508; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
509; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
510; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
511; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
512; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
513; GCN-HSA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
514; GCN-HSA-NEXT:    s_waitcnt vmcnt(1)
515; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[0:3]
516; GCN-HSA-NEXT:    s_waitcnt vmcnt(1)
517; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
518; GCN-HSA-NEXT:    s_endpgm
519;
520; GCN-NOHSA-VI-LABEL: constant_load_v16i16_align2:
521; GCN-NOHSA-VI:       ; %bb.0: ; %entry
522; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
523; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
524; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
525; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
526; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[0:3], 0 offset:14
527; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v1, off, s[0:3], 0 offset:10
528; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v2, off, s[0:3], 0 offset:6
529; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v3, off, s[0:3], 0 offset:2
530; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v4, off, s[0:3], 0 offset:30
531; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v5, off, s[0:3], 0 offset:26
532; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v6, off, s[0:3], 0 offset:22
533; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v7, off, s[0:3], 0 offset:18
534; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v8, off, s[0:3], 0 offset:12
535; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v9, off, s[0:3], 0 offset:8
536; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v10, off, s[0:3], 0 offset:4
537; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v11, off, s[0:3], 0
538; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v12, off, s[0:3], 0 offset:28
539; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v13, off, s[0:3], 0 offset:24
540; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v14, off, s[0:3], 0 offset:20
541; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v15, off, s[0:3], 0 offset:16
542; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(14)
543; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
544; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
545; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(13)
546; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v16, 16, v2
547; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(12)
548; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v17, 16, v3
549; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(11)
550; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
551; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(10)
552; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
553; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(9)
554; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v18, 16, v6
555; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(8)
556; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v19, 16, v7
557; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(7)
558; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v3, v0, v8
559; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(6)
560; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v2, v1, v9
561; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(5)
562; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v1, v16, v10
563; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(4)
564; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v0, v17, v11
565; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(3)
566; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v7, v4, v12
567; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(2)
568; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v6, v5, v13
569; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(1)
570; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v5, v18, v14
571; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
572; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v4, v19, v15
573; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
574; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
575; GCN-NOHSA-VI-NEXT:    s_endpgm
576;
577; EG-LABEL: constant_load_v16i16_align2:
578; EG:       ; %bb.0: ; %entry
579; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
580; EG-NEXT:    TEX 0 @8
581; EG-NEXT:    ALU 1, @13, KC0[], KC1[]
582; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0
583; EG-NEXT:    TEX 0 @10
584; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T2.X, 1
585; EG-NEXT:    CF_END
586; EG-NEXT:    PAD
587; EG-NEXT:    Fetch clause starting at 8:
588; EG-NEXT:     VTX_READ_128 T1.XYZW, T0.X, 16, #1
589; EG-NEXT:    Fetch clause starting at 10:
590; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
591; EG-NEXT:    ALU clause starting at 12:
592; EG-NEXT:     MOV * T0.X, KC0[2].Y,
593; EG-NEXT:    ALU clause starting at 13:
594; EG-NEXT:     MOV * T2.X, literal.x,
595; EG-NEXT:    0(0.000000e+00), 0(0.000000e+00)
596entry:
597  %ld =  load <16 x i16>, <16 x i16> addrspace(4)* %ptr0, align 2
598  store <16 x i16> %ld, <16 x i16> addrspace(1)* undef, align 32
599  ret void
600}
601
602define amdgpu_kernel void @constant_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
603; GCN-NOHSA-SI-LABEL: constant_zextload_i16_to_i32:
604; GCN-NOHSA-SI:       ; %bb.0:
605; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
606; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
607; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
608; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
609; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
610; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
611; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
612; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
613; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
614; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
615; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
616; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
617; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
618; GCN-NOHSA-SI-NEXT:    s_endpgm
619;
620; GCN-HSA-LABEL: constant_zextload_i16_to_i32:
621; GCN-HSA:       ; %bb.0:
622; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
623; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
624; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
625; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
626; GCN-HSA-NEXT:    flat_load_ushort v2, v[0:1]
627; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
628; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
629; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
630; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
631; GCN-HSA-NEXT:    s_endpgm
632;
633; GCN-NOHSA-VI-LABEL: constant_zextload_i16_to_i32:
634; GCN-NOHSA-VI:       ; %bb.0:
635; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
636; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
637; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
638; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
639; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
640; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
641; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
642; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
643; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
644; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
645; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
646; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
647; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
648; GCN-NOHSA-VI-NEXT:    s_endpgm
649;
650; EG-LABEL: constant_zextload_i16_to_i32:
651; EG:       ; %bb.0:
652; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
653; EG-NEXT:    TEX 0 @6
654; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
655; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
656; EG-NEXT:    CF_END
657; EG-NEXT:    PAD
658; EG-NEXT:    Fetch clause starting at 6:
659; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
660; EG-NEXT:    ALU clause starting at 8:
661; EG-NEXT:     MOV * T0.X, KC0[2].Z,
662; EG-NEXT:    ALU clause starting at 9:
663; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
664; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
665  %a = load i16, i16 addrspace(4)* %in
666  %ext = zext i16 %a to i32
667  store i32 %ext, i32 addrspace(1)* %out
668  ret void
669}
670
671define amdgpu_kernel void @constant_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
672; GCN-NOHSA-SI-LABEL: constant_sextload_i16_to_i32:
673; GCN-NOHSA-SI:       ; %bb.0:
674; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
675; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
676; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
677; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
678; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
679; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
680; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
681; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
682; GCN-NOHSA-SI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
683; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
684; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
685; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
686; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
687; GCN-NOHSA-SI-NEXT:    s_endpgm
688;
689; GCN-HSA-LABEL: constant_sextload_i16_to_i32:
690; GCN-HSA:       ; %bb.0:
691; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
692; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
693; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
694; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
695; GCN-HSA-NEXT:    flat_load_sshort v2, v[0:1]
696; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
697; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
698; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
699; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
700; GCN-HSA-NEXT:    s_endpgm
701;
702; GCN-NOHSA-VI-LABEL: constant_sextload_i16_to_i32:
703; GCN-NOHSA-VI:       ; %bb.0:
704; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
705; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
706; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
707; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
708; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
709; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
710; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
711; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
712; GCN-NOHSA-VI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
713; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
714; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
715; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
716; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
717; GCN-NOHSA-VI-NEXT:    s_endpgm
718;
719; EG-LABEL: constant_sextload_i16_to_i32:
720; EG:       ; %bb.0:
721; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
722; EG-NEXT:    TEX 0 @6
723; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
724; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
725; EG-NEXT:    CF_END
726; EG-NEXT:    PAD
727; EG-NEXT:    Fetch clause starting at 6:
728; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
729; EG-NEXT:    ALU clause starting at 8:
730; EG-NEXT:     MOV * T0.X, KC0[2].Z,
731; EG-NEXT:    ALU clause starting at 9:
732; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
733; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
734; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
735  %a = load i16, i16 addrspace(4)* %in
736  %ext = sext i16 %a to i32
737  store i32 %ext, i32 addrspace(1)* %out
738  ret void
739}
740
741define amdgpu_kernel void @constant_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
742; GCN-NOHSA-SI-LABEL: constant_zextload_v1i16_to_v1i32:
743; GCN-NOHSA-SI:       ; %bb.0:
744; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
745; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
746; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
747; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
748; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
749; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
750; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
751; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
752; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
753; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
754; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
755; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
756; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
757; GCN-NOHSA-SI-NEXT:    s_endpgm
758;
759; GCN-HSA-LABEL: constant_zextload_v1i16_to_v1i32:
760; GCN-HSA:       ; %bb.0:
761; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
762; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
763; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
764; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
765; GCN-HSA-NEXT:    flat_load_ushort v2, v[0:1]
766; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
767; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
768; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
769; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
770; GCN-HSA-NEXT:    s_endpgm
771;
772; GCN-NOHSA-VI-LABEL: constant_zextload_v1i16_to_v1i32:
773; GCN-NOHSA-VI:       ; %bb.0:
774; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
775; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
776; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
777; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
778; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
779; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
780; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
781; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
782; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
783; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
784; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
785; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
786; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
787; GCN-NOHSA-VI-NEXT:    s_endpgm
788;
789; EG-LABEL: constant_zextload_v1i16_to_v1i32:
790; EG:       ; %bb.0:
791; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
792; EG-NEXT:    TEX 0 @6
793; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
794; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
795; EG-NEXT:    CF_END
796; EG-NEXT:    PAD
797; EG-NEXT:    Fetch clause starting at 6:
798; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
799; EG-NEXT:    ALU clause starting at 8:
800; EG-NEXT:     MOV * T0.X, KC0[2].Z,
801; EG-NEXT:    ALU clause starting at 9:
802; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
803; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
804  %load = load <1 x i16>, <1 x i16> addrspace(4)* %in
805  %ext = zext <1 x i16> %load to <1 x i32>
806  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
807  ret void
808}
809
810define amdgpu_kernel void @constant_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
811; GCN-NOHSA-SI-LABEL: constant_sextload_v1i16_to_v1i32:
812; GCN-NOHSA-SI:       ; %bb.0:
813; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
814; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
815; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
816; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
817; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
818; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
819; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
820; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
821; GCN-NOHSA-SI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
822; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
823; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
824; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
825; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
826; GCN-NOHSA-SI-NEXT:    s_endpgm
827;
828; GCN-HSA-LABEL: constant_sextload_v1i16_to_v1i32:
829; GCN-HSA:       ; %bb.0:
830; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
831; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
832; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
833; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
834; GCN-HSA-NEXT:    flat_load_sshort v2, v[0:1]
835; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
836; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
837; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
838; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
839; GCN-HSA-NEXT:    s_endpgm
840;
841; GCN-NOHSA-VI-LABEL: constant_sextload_v1i16_to_v1i32:
842; GCN-NOHSA-VI:       ; %bb.0:
843; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
844; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
845; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
846; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
847; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
848; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
849; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
850; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
851; GCN-NOHSA-VI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
852; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
853; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
854; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
855; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
856; GCN-NOHSA-VI-NEXT:    s_endpgm
857;
858; EG-LABEL: constant_sextload_v1i16_to_v1i32:
859; EG:       ; %bb.0:
860; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
861; EG-NEXT:    TEX 0 @6
862; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
863; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
864; EG-NEXT:    CF_END
865; EG-NEXT:    PAD
866; EG-NEXT:    Fetch clause starting at 6:
867; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
868; EG-NEXT:    ALU clause starting at 8:
869; EG-NEXT:     MOV * T0.X, KC0[2].Z,
870; EG-NEXT:    ALU clause starting at 9:
871; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
872; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
873; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
874  %load = load <1 x i16>, <1 x i16> addrspace(4)* %in
875  %ext = sext <1 x i16> %load to <1 x i32>
876  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
877  ret void
878}
879
880define amdgpu_kernel void @constant_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
881; GCN-NOHSA-SI-LABEL: constant_zextload_v2i16_to_v2i32:
882; GCN-NOHSA-SI:       ; %bb.0:
883; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
884; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
885; GCN-NOHSA-SI-NEXT:    s_load_dword s2, s[2:3], 0x0
886; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
887; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
888; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s4, s2, 16
889; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s2, 0xffff
890; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
891; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
892; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s4
893; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
894; GCN-NOHSA-SI-NEXT:    s_endpgm
895;
896; GCN-HSA-LABEL: constant_zextload_v2i16_to_v2i32:
897; GCN-HSA:       ; %bb.0:
898; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
899; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
900; GCN-HSA-NEXT:    s_load_dword s2, s[2:3], 0x0
901; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
902; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
903; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
904; GCN-HSA-NEXT:    s_lshr_b32 s0, s2, 16
905; GCN-HSA-NEXT:    s_and_b32 s1, s2, 0xffff
906; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s1
907; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s0
908; GCN-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
909; GCN-HSA-NEXT:    s_endpgm
910;
911; GCN-NOHSA-VI-LABEL: constant_zextload_v2i16_to_v2i32:
912; GCN-NOHSA-VI:       ; %bb.0:
913; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
914; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
915; GCN-NOHSA-VI-NEXT:    s_load_dword s4, s[2:3], 0x0
916; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
917; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
918; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
919; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s5, s4, 16
920; GCN-NOHSA-VI-NEXT:    s_and_b32 s4, s4, 0xffff
921; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
922; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s5
923; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
924; GCN-NOHSA-VI-NEXT:    s_endpgm
925;
926; EG-LABEL: constant_zextload_v2i16_to_v2i32:
927; EG:       ; %bb.0:
928; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
929; EG-NEXT:    TEX 0 @6
930; EG-NEXT:    ALU 4, @9, KC0[CB0:0-32], KC1[]
931; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XY, T5.X, 1
932; EG-NEXT:    CF_END
933; EG-NEXT:    PAD
934; EG-NEXT:    Fetch clause starting at 6:
935; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
936; EG-NEXT:    ALU clause starting at 8:
937; EG-NEXT:     MOV * T4.X, KC0[2].Z,
938; EG-NEXT:    ALU clause starting at 9:
939; EG-NEXT:     LSHR * T4.Y, T4.X, literal.x,
940; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
941; EG-NEXT:     AND_INT T4.X, T4.X, literal.x,
942; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
943; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
944  %load = load <2 x i16>, <2 x i16> addrspace(4)* %in
945  %ext = zext <2 x i16> %load to <2 x i32>
946  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
947  ret void
948}
949
950; TODO: We should use ASHR instead of LSHR + BFE
951define amdgpu_kernel void @constant_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
952; GCN-NOHSA-SI-LABEL: constant_sextload_v2i16_to_v2i32:
953; GCN-NOHSA-SI:       ; %bb.0:
954; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
955; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
956; GCN-NOHSA-SI-NEXT:    s_load_dword s2, s[2:3], 0x0
957; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
958; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
959; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s4, s2, 16
960; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s2
961; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
962; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
963; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s4
964; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
965; GCN-NOHSA-SI-NEXT:    s_endpgm
966;
967; GCN-HSA-LABEL: constant_sextload_v2i16_to_v2i32:
968; GCN-HSA:       ; %bb.0:
969; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
970; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
971; GCN-HSA-NEXT:    s_load_dword s2, s[2:3], 0x0
972; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
973; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
974; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
975; GCN-HSA-NEXT:    s_ashr_i32 s0, s2, 16
976; GCN-HSA-NEXT:    s_sext_i32_i16 s1, s2
977; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s1
978; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s0
979; GCN-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
980; GCN-HSA-NEXT:    s_endpgm
981;
982; GCN-NOHSA-VI-LABEL: constant_sextload_v2i16_to_v2i32:
983; GCN-NOHSA-VI:       ; %bb.0:
984; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
985; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
986; GCN-NOHSA-VI-NEXT:    s_load_dword s4, s[2:3], 0x0
987; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
988; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
989; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
990; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s5, s4, 16
991; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s4, s4
992; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
993; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s5
994; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
995; GCN-NOHSA-VI-NEXT:    s_endpgm
996;
997; EG-LABEL: constant_sextload_v2i16_to_v2i32:
998; EG:       ; %bb.0:
999; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1000; EG-NEXT:    TEX 0 @6
1001; EG-NEXT:    ALU 5, @9, KC0[CB0:0-32], KC1[]
1002; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XY, T4.X, 1
1003; EG-NEXT:    CF_END
1004; EG-NEXT:    PAD
1005; EG-NEXT:    Fetch clause starting at 6:
1006; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
1007; EG-NEXT:    ALU clause starting at 8:
1008; EG-NEXT:     MOV * T4.X, KC0[2].Z,
1009; EG-NEXT:    ALU clause starting at 9:
1010; EG-NEXT:     BFE_INT T5.X, T4.X, 0.0, literal.x,
1011; EG-NEXT:     LSHR T0.W, T4.X, literal.x,
1012; EG-NEXT:     LSHR * T4.X, KC0[2].Y, literal.y,
1013; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
1014; EG-NEXT:     BFE_INT * T5.Y, PV.W, 0.0, literal.x,
1015; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1016  %load = load <2 x i16>, <2 x i16> addrspace(4)* %in
1017  %ext = sext <2 x i16> %load to <2 x i32>
1018  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
1019  ret void
1020}
1021
1022define amdgpu_kernel void @constant_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) {
1023; GCN-NOHSA-SI-LABEL: constant_zextload_v3i16_to_v3i32:
1024; GCN-NOHSA-SI:       ; %bb.0: ; %entry
1025; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1026; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1027; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
1028; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1029; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1030; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, 0xffff
1031; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1032; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s7, s4, 16
1033; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, s6
1034; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, s6
1035; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
1036; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:8
1037; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1038; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1039; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s7
1040; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1041; GCN-NOHSA-SI-NEXT:    s_endpgm
1042;
1043; GCN-HSA-LABEL: constant_zextload_v3i16_to_v3i32:
1044; GCN-HSA:       ; %bb.0: ; %entry
1045; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1046; GCN-HSA-NEXT:    s_mov_b32 s4, 0xffff
1047; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1048; GCN-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
1049; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s0
1050; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s1
1051; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1052; GCN-HSA-NEXT:    s_lshr_b32 s0, s2, 16
1053; GCN-HSA-NEXT:    s_and_b32 s1, s3, s4
1054; GCN-HSA-NEXT:    s_and_b32 s2, s2, s4
1055; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1056; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s0
1057; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s1
1058; GCN-HSA-NEXT:    flat_store_dwordx3 v[3:4], v[0:2]
1059; GCN-HSA-NEXT:    s_endpgm
1060;
1061; GCN-NOHSA-VI-LABEL: constant_zextload_v3i16_to_v3i32:
1062; GCN-NOHSA-VI:       ; %bb.0: ; %entry
1063; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
1064; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, 0xffff
1065; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
1066; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
1067; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1068; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
1069; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
1070; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
1071; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1072; GCN-NOHSA-VI-NEXT:    s_and_b32 s0, s3, s8
1073; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s1, s2, 16
1074; GCN-NOHSA-VI-NEXT:    s_and_b32 s2, s2, s8
1075; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
1076; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s1
1077; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s0
1078; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx3 v[0:2], off, s[4:7], 0
1079; GCN-NOHSA-VI-NEXT:    s_endpgm
1080;
1081; EG-LABEL: constant_zextload_v3i16_to_v3i32:
1082; EG:       ; %bb.0: ; %entry
1083; EG-NEXT:    ALU 4, @12, KC0[CB0:0-32], KC1[]
1084; EG-NEXT:    TEX 2 @6
1085; EG-NEXT:    ALU 2, @17, KC0[], KC1[]
1086; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T2.X, T4.X, 0
1087; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T3.XY, T0.X, 1
1088; EG-NEXT:    CF_END
1089; EG-NEXT:    Fetch clause starting at 6:
1090; EG-NEXT:     VTX_READ_16 T2.X, T1.X, 4, #1
1091; EG-NEXT:     VTX_READ_16 T3.X, T1.X, 0, #1
1092; EG-NEXT:     VTX_READ_16 T1.X, T1.X, 2, #1
1093; EG-NEXT:    ALU clause starting at 12:
1094; EG-NEXT:     LSHR T0.X, KC0[2].Y, literal.x,
1095; EG-NEXT:     MOV * T1.X, KC0[2].Z,
1096; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1097; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
1098; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
1099; EG-NEXT:    ALU clause starting at 17:
1100; EG-NEXT:     LSHR T4.X, T0.W, literal.x,
1101; EG-NEXT:     MOV * T3.Y, T1.X,
1102; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1103entry:
1104  %ld = load <3 x i16>, <3 x i16> addrspace(4)* %in
1105  %ext = zext <3 x i16> %ld to <3 x i32>
1106  store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
1107  ret void
1108}
1109
1110define amdgpu_kernel void @constant_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) {
1111; GCN-NOHSA-SI-LABEL: constant_sextload_v3i16_to_v3i32:
1112; GCN-NOHSA-SI:       ; %bb.0: ; %entry
1113; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1114; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1115; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
1116; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1117; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1118; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1119; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s6, s4, 16
1120; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s5
1121; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s4, s4
1122; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
1123; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:8
1124; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1125; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1126; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s6
1127; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1128; GCN-NOHSA-SI-NEXT:    s_endpgm
1129;
1130; GCN-HSA-LABEL: constant_sextload_v3i16_to_v3i32:
1131; GCN-HSA:       ; %bb.0: ; %entry
1132; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1133; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1134; GCN-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
1135; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s0
1136; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s1
1137; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1138; GCN-HSA-NEXT:    s_ashr_i32 s0, s2, 16
1139; GCN-HSA-NEXT:    s_sext_i32_i16 s1, s3
1140; GCN-HSA-NEXT:    s_sext_i32_i16 s2, s2
1141; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1142; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s0
1143; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s1
1144; GCN-HSA-NEXT:    flat_store_dwordx3 v[3:4], v[0:2]
1145; GCN-HSA-NEXT:    s_endpgm
1146;
1147; GCN-NOHSA-VI-LABEL: constant_sextload_v3i16_to_v3i32:
1148; GCN-NOHSA-VI:       ; %bb.0: ; %entry
1149; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
1150; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1151; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
1152; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1153; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1154; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1155; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s6, s4, 16
1156; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s5, s5
1157; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s4, s4
1158; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
1159; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s6
1160; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
1161; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
1162; GCN-NOHSA-VI-NEXT:    s_endpgm
1163;
1164; EG-LABEL: constant_sextload_v3i16_to_v3i32:
1165; EG:       ; %bb.0: ; %entry
1166; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
1167; EG-NEXT:    TEX 2 @6
1168; EG-NEXT:    ALU 9, @13, KC0[CB0:0-32], KC1[]
1169; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0
1170; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
1171; EG-NEXT:    CF_END
1172; EG-NEXT:    Fetch clause starting at 6:
1173; EG-NEXT:     VTX_READ_16 T1.X, T0.X, 2, #1
1174; EG-NEXT:     VTX_READ_16 T2.X, T0.X, 4, #1
1175; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
1176; EG-NEXT:    ALU clause starting at 12:
1177; EG-NEXT:     MOV * T0.X, KC0[2].Z,
1178; EG-NEXT:    ALU clause starting at 13:
1179; EG-NEXT:     BFE_INT * T0.Y, T1.X, 0.0, literal.x,
1180; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1181; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
1182; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
1183; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
1184; EG-NEXT:     BFE_INT T2.X, T2.X, 0.0, literal.x,
1185; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1186; EG-NEXT:    16(2.242078e-44), 8(1.121039e-44)
1187; EG-NEXT:     LSHR * T3.X, PV.W, literal.x,
1188; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1189entry:
1190  %ld = load <3 x i16>, <3 x i16> addrspace(4)* %in
1191  %ext = sext <3 x i16> %ld to <3 x i32>
1192  store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
1193  ret void
1194}
1195
1196; v4i16 is naturally 8 byte aligned
1197; TODO: This should use LD, but for some there are redundant MOVs
1198define amdgpu_kernel void @constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
1199; GCN-NOHSA-SI-LABEL: constant_zextload_v4i16_to_v4i32:
1200; GCN-NOHSA-SI:       ; %bb.0:
1201; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1202; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1203; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
1204; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1205; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, 0xffff
1206; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1207; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s6, s5, 16
1208; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s7, s4, 16
1209; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, s2
1210; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, s2
1211; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1212; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1213; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s7
1214; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
1215; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s6
1216; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1217; GCN-NOHSA-SI-NEXT:    s_endpgm
1218;
1219; GCN-HSA-LABEL: constant_zextload_v4i16_to_v4i32:
1220; GCN-HSA:       ; %bb.0:
1221; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1222; GCN-HSA-NEXT:    s_mov_b32 s4, 0xffff
1223; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1224; GCN-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
1225; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
1226; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
1227; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1228; GCN-HSA-NEXT:    s_lshr_b32 s0, s3, 16
1229; GCN-HSA-NEXT:    s_lshr_b32 s1, s2, 16
1230; GCN-HSA-NEXT:    s_and_b32 s3, s3, s4
1231; GCN-HSA-NEXT:    s_and_b32 s2, s2, s4
1232; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1233; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
1234; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s3
1235; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s0
1236; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1237; GCN-HSA-NEXT:    s_endpgm
1238;
1239; GCN-NOHSA-VI-LABEL: constant_zextload_v4i16_to_v4i32:
1240; GCN-NOHSA-VI:       ; %bb.0:
1241; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
1242; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, 0xffff
1243; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
1244; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
1245; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1246; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
1247; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
1248; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
1249; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1250; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s0, s3, 16
1251; GCN-NOHSA-VI-NEXT:    s_and_b32 s1, s3, s8
1252; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s3, s2, 16
1253; GCN-NOHSA-VI-NEXT:    s_and_b32 s2, s2, s8
1254; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
1255; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s3
1256; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s1
1257; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s0
1258; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1259; GCN-NOHSA-VI-NEXT:    s_endpgm
1260;
1261; EG-LABEL: constant_zextload_v4i16_to_v4i32:
1262; EG:       ; %bb.0:
1263; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1264; EG-NEXT:    TEX 0 @6
1265; EG-NEXT:    ALU 12, @9, KC0[CB0:0-32], KC1[]
1266; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1
1267; EG-NEXT:    CF_END
1268; EG-NEXT:    PAD
1269; EG-NEXT:    Fetch clause starting at 6:
1270; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
1271; EG-NEXT:    ALU clause starting at 8:
1272; EG-NEXT:     MOV * T5.X, KC0[2].Z,
1273; EG-NEXT:    ALU clause starting at 9:
1274; EG-NEXT:     MOV T2.X, T5.X,
1275; EG-NEXT:     MOV * T3.X, T5.Y,
1276; EG-NEXT:     MOV T0.Y, PV.X,
1277; EG-NEXT:     MOV * T0.Z, PS,
1278; EG-NEXT:     LSHR * T5.W, PV.Z, literal.x,
1279; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1280; EG-NEXT:     AND_INT * T5.Z, T0.Z, literal.x,
1281; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1282; EG-NEXT:     LSHR * T5.Y, T0.Y, literal.x,
1283; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1284; EG-NEXT:     AND_INT T5.X, T0.Y, literal.x,
1285; EG-NEXT:     LSHR * T6.X, KC0[2].Y, literal.y,
1286; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
1287  %load = load <4 x i16>, <4 x i16> addrspace(4)* %in
1288  %ext = zext <4 x i16> %load to <4 x i32>
1289  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
1290  ret void
1291}
1292
1293; v4i16 is naturally 8 byte aligned
1294; TODO: This should use LD, but for some there are redundant MOVs
1295; TODO: We should use ASHR instead of LSHR + BFE
1296define amdgpu_kernel void @constant_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
1297; GCN-NOHSA-SI-LABEL: constant_sextload_v4i16_to_v4i32:
1298; GCN-NOHSA-SI:       ; %bb.0:
1299; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1300; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1301; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
1302; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1303; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1304; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s8, s4, 16
1305; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[6:7], s[4:5], 48
1306; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s5
1307; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s4, s4
1308; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1309; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1310; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s8
1311; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
1312; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s6
1313; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1314; GCN-NOHSA-SI-NEXT:    s_endpgm
1315;
1316; GCN-HSA-LABEL: constant_sextload_v4i16_to_v4i32:
1317; GCN-HSA:       ; %bb.0:
1318; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1319; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1320; GCN-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
1321; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
1322; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
1323; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1324; GCN-HSA-NEXT:    s_ashr_i64 s[0:1], s[2:3], 48
1325; GCN-HSA-NEXT:    s_ashr_i32 s4, s2, 16
1326; GCN-HSA-NEXT:    s_sext_i32_i16 s1, s3
1327; GCN-HSA-NEXT:    s_sext_i32_i16 s2, s2
1328; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1329; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s4
1330; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s1
1331; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s0
1332; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1333; GCN-HSA-NEXT:    s_endpgm
1334;
1335; GCN-NOHSA-VI-LABEL: constant_sextload_v4i16_to_v4i32:
1336; GCN-NOHSA-VI:       ; %bb.0:
1337; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
1338; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1339; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
1340; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1341; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1342; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1343; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s6, s5, 16
1344; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s7, s4, 16
1345; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s5, s5
1346; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s4, s4
1347; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
1348; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s7
1349; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
1350; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s6
1351; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1352; GCN-NOHSA-VI-NEXT:    s_endpgm
1353;
1354; EG-LABEL: constant_sextload_v4i16_to_v4i32:
1355; EG:       ; %bb.0:
1356; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1357; EG-NEXT:    TEX 0 @6
1358; EG-NEXT:    ALU 14, @9, KC0[CB0:0-32], KC1[]
1359; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1
1360; EG-NEXT:    CF_END
1361; EG-NEXT:    PAD
1362; EG-NEXT:    Fetch clause starting at 6:
1363; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
1364; EG-NEXT:    ALU clause starting at 8:
1365; EG-NEXT:     MOV * T5.X, KC0[2].Z,
1366; EG-NEXT:    ALU clause starting at 9:
1367; EG-NEXT:     MOV T2.X, T5.X,
1368; EG-NEXT:     MOV * T3.X, T5.Y,
1369; EG-NEXT:     MOV T0.Y, PV.X,
1370; EG-NEXT:     MOV * T0.Z, PS,
1371; EG-NEXT:     BFE_INT * T5.Z, PV.Z, 0.0, literal.x,
1372; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1373; EG-NEXT:     BFE_INT T5.X, T0.Y, 0.0, literal.x,
1374; EG-NEXT:     LSHR * T0.W, T0.Z, literal.x,
1375; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1376; EG-NEXT:     BFE_INT T5.W, PV.W, 0.0, literal.x,
1377; EG-NEXT:     LSHR * T0.W, T0.Y, literal.x,
1378; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1379; EG-NEXT:     LSHR T6.X, KC0[2].Y, literal.x,
1380; EG-NEXT:     BFE_INT * T5.Y, PS, 0.0, literal.y,
1381; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1382  %load = load <4 x i16>, <4 x i16> addrspace(4)* %in
1383  %ext = sext <4 x i16> %load to <4 x i32>
1384  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
1385  ret void
1386}
1387
1388; v8i16 is naturally 16 byte aligned
1389; TODO: These should use LSHR instead of BFE_UINT
1390; TODO: This should use DST, but for some there are redundant MOVs
1391define amdgpu_kernel void @constant_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
1392; GCN-NOHSA-SI-LABEL: constant_zextload_v8i16_to_v8i32:
1393; GCN-NOHSA-SI:       ; %bb.0:
1394; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1395; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1396; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
1397; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1398; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1399; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, 0xffff
1400; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1401; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s9, s5, 16
1402; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s10, s4, 16
1403; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s11, s7, 16
1404; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s12, s6, 16
1405; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, s8
1406; GCN-NOHSA-SI-NEXT:    s_and_b32 s7, s7, s8
1407; GCN-NOHSA-SI-NEXT:    s_and_b32 s6, s6, s8
1408; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, s8
1409; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
1410; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s12
1411; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
1412; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s11
1413; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
1414; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1415; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1416; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s10
1417; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
1418; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s9
1419; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1420; GCN-NOHSA-SI-NEXT:    s_endpgm
1421;
1422; GCN-HSA-LABEL: constant_zextload_v8i16_to_v8i32:
1423; GCN-HSA:       ; %bb.0:
1424; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1425; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1426; GCN-HSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
1427; GCN-HSA-NEXT:    s_mov_b32 s2, 0xffff
1428; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1429; GCN-HSA-NEXT:    s_lshr_b32 s8, s5, 16
1430; GCN-HSA-NEXT:    s_lshr_b32 s9, s4, 16
1431; GCN-HSA-NEXT:    s_lshr_b32 s3, s7, 16
1432; GCN-HSA-NEXT:    s_lshr_b32 s10, s6, 16
1433; GCN-HSA-NEXT:    s_and_b32 s5, s5, s2
1434; GCN-HSA-NEXT:    s_and_b32 s4, s4, s2
1435; GCN-HSA-NEXT:    s_and_b32 s7, s7, s2
1436; GCN-HSA-NEXT:    s_and_b32 s2, s6, s2
1437; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1438; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
1439; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
1440; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
1441; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
1442; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s10
1443; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s7
1444; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
1445; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1446; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
1447; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
1448; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s9
1449; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
1450; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s8
1451; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
1452; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1453; GCN-HSA-NEXT:    s_endpgm
1454;
1455; GCN-NOHSA-VI-LABEL: constant_zextload_v8i16_to_v8i32:
1456; GCN-NOHSA-VI:       ; %bb.0:
1457; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
1458; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
1459; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
1460; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1461; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[8:11], s[2:3], 0x0
1462; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, 0xffff
1463; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
1464; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
1465; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1466; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s0, s9, 16
1467; GCN-NOHSA-VI-NEXT:    s_and_b32 s1, s9, s2
1468; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s3, s8, 16
1469; GCN-NOHSA-VI-NEXT:    s_and_b32 s8, s8, s2
1470; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s9, s11, 16
1471; GCN-NOHSA-VI-NEXT:    s_and_b32 s11, s11, s2
1472; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s12, s10, 16
1473; GCN-NOHSA-VI-NEXT:    s_and_b32 s2, s10, s2
1474; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
1475; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s12
1476; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
1477; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s9
1478; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16
1479; GCN-NOHSA-VI-NEXT:    s_nop 0
1480; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
1481; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s3
1482; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s1
1483; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s0
1484; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1485; GCN-NOHSA-VI-NEXT:    s_endpgm
1486;
1487; EG-LABEL: constant_zextload_v8i16_to_v8i32:
1488; EG:       ; %bb.0:
1489; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1490; EG-NEXT:    TEX 0 @6
1491; EG-NEXT:    ALU 17, @9, KC0[CB0:0-32], KC1[]
1492; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
1493; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
1494; EG-NEXT:    CF_END
1495; EG-NEXT:    Fetch clause starting at 6:
1496; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
1497; EG-NEXT:    ALU clause starting at 8:
1498; EG-NEXT:     MOV * T7.X, KC0[2].Z,
1499; EG-NEXT:    ALU clause starting at 9:
1500; EG-NEXT:     LSHR * T8.W, T7.Y, literal.x,
1501; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1502; EG-NEXT:     AND_INT * T8.Z, T7.Y, literal.x,
1503; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1504; EG-NEXT:     LSHR T8.Y, T7.X, literal.x,
1505; EG-NEXT:     LSHR * T9.W, T7.W, literal.x,
1506; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1507; EG-NEXT:     AND_INT T8.X, T7.X, literal.x,
1508; EG-NEXT:     AND_INT T9.Z, T7.W, literal.x,
1509; EG-NEXT:     LSHR * T7.X, KC0[2].Y, literal.y,
1510; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
1511; EG-NEXT:     LSHR * T9.Y, T7.Z, literal.x,
1512; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1513; EG-NEXT:     AND_INT T9.X, T7.Z, literal.x,
1514; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1515; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
1516; EG-NEXT:     LSHR * T10.X, PV.W, literal.x,
1517; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1518  %load = load <8 x i16>, <8 x i16> addrspace(4)* %in
1519  %ext = zext <8 x i16> %load to <8 x i32>
1520  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
1521  ret void
1522}
1523
1524; v8i16 is naturally 16 byte aligned
1525; TODO: 4 of these should use ASHR instead of LSHR + BFE_INT
1526; TODO: This should use DST, but for some there are redundant MOVs
1527define amdgpu_kernel void @constant_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
1528; GCN-NOHSA-SI-LABEL: constant_sextload_v8i16_to_v8i32:
1529; GCN-NOHSA-SI:       ; %bb.0:
1530; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1531; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1532; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
1533; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1534; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1535; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1536; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s8, s5, 16
1537; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s9, s4, 16
1538; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s5
1539; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s10, s7, 16
1540; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s11, s6, 16
1541; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s7, s7
1542; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s6, s6
1543; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s4, s4
1544; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
1545; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s11
1546; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
1547; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s10
1548; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
1549; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1550; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1551; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s9
1552; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
1553; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s8
1554; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1555; GCN-NOHSA-SI-NEXT:    s_endpgm
1556;
1557; GCN-HSA-LABEL: constant_sextload_v8i16_to_v8i32:
1558; GCN-HSA:       ; %bb.0:
1559; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1560; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1561; GCN-HSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
1562; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1563; GCN-HSA-NEXT:    s_ashr_i32 s8, s5, 16
1564; GCN-HSA-NEXT:    s_ashr_i32 s9, s4, 16
1565; GCN-HSA-NEXT:    s_ashr_i32 s2, s7, 16
1566; GCN-HSA-NEXT:    s_ashr_i32 s3, s6, 16
1567; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s2
1568; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
1569; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1570; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
1571; GCN-HSA-NEXT:    s_sext_i32_i16 s7, s7
1572; GCN-HSA-NEXT:    s_sext_i32_i16 s6, s6
1573; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
1574; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
1575; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s7
1576; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
1577; GCN-HSA-NEXT:    s_sext_i32_i16 s5, s5
1578; GCN-HSA-NEXT:    s_sext_i32_i16 s4, s4
1579; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1580; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
1581; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
1582; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s9
1583; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
1584; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s8
1585; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
1586; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1587; GCN-HSA-NEXT:    s_endpgm
1588;
1589; GCN-NOHSA-VI-LABEL: constant_sextload_v8i16_to_v8i32:
1590; GCN-NOHSA-VI:       ; %bb.0:
1591; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
1592; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1593; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
1594; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1595; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1596; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1597; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s10, s7, 16
1598; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s11, s6, 16
1599; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s7, s7
1600; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s6, s6
1601; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s8, s5, 16
1602; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s9, s4, 16
1603; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s5, s5
1604; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s4, s4
1605; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
1606; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s11
1607; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
1608; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s10
1609; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
1610; GCN-NOHSA-VI-NEXT:    s_nop 0
1611; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
1612; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s9
1613; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
1614; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s8
1615; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1616; GCN-NOHSA-VI-NEXT:    s_endpgm
1617;
1618; EG-LABEL: constant_sextload_v8i16_to_v8i32:
1619; EG:       ; %bb.0:
1620; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1621; EG-NEXT:    TEX 0 @6
1622; EG-NEXT:    ALU 19, @9, KC0[CB0:0-32], KC1[]
1623; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
1624; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
1625; EG-NEXT:    CF_END
1626; EG-NEXT:    Fetch clause starting at 6:
1627; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
1628; EG-NEXT:    ALU clause starting at 8:
1629; EG-NEXT:     MOV * T7.X, KC0[2].Z,
1630; EG-NEXT:    ALU clause starting at 9:
1631; EG-NEXT:     BFE_INT * T8.Z, T7.Y, 0.0, literal.x,
1632; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1633; EG-NEXT:     BFE_INT T8.X, T7.X, 0.0, literal.x,
1634; EG-NEXT:     BFE_INT T9.Z, T7.W, 0.0, literal.x,
1635; EG-NEXT:     LSHR * T0.W, T7.Y, literal.x,
1636; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1637; EG-NEXT:     BFE_INT T9.X, T7.Z, 0.0, literal.x,
1638; EG-NEXT:     LSHR T0.Z, T7.W, literal.x,
1639; EG-NEXT:     BFE_INT T8.W, PV.W, 0.0, literal.x,
1640; EG-NEXT:     LSHR * T0.W, T7.X, literal.x,
1641; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1642; EG-NEXT:     LSHR T7.X, KC0[2].Y, literal.x,
1643; EG-NEXT:     BFE_INT T8.Y, PS, 0.0, literal.y,
1644; EG-NEXT:     LSHR T1.Z, T7.Z, literal.y,
1645; EG-NEXT:     BFE_INT T9.W, PV.Z, 0.0, literal.y,
1646; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1647; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1648; EG-NEXT:     LSHR T10.X, PS, literal.x,
1649; EG-NEXT:     BFE_INT * T9.Y, PV.Z, 0.0, literal.y,
1650; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1651  %load = load <8 x i16>, <8 x i16> addrspace(4)* %in
1652  %ext = sext <8 x i16> %load to <8 x i32>
1653  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
1654  ret void
1655}
1656
1657define amdgpu_kernel void @constant_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
1658; GCN-NOHSA-SI-LABEL: constant_zextload_v16i16_to_v16i32:
1659; GCN-NOHSA-SI:       ; %bb.0:
1660; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1661; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1662; GCN-NOHSA-SI-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
1663; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1664; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1665; GCN-NOHSA-SI-NEXT:    s_mov_b32 s12, 0xffff
1666; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1667; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s13, s5, 16
1668; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s14, s4, 16
1669; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s15, s7, 16
1670; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s16, s6, 16
1671; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s17, s9, 16
1672; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s18, s8, 16
1673; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s19, s11, 16
1674; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s20, s10, 16
1675; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, s12
1676; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, s12
1677; GCN-NOHSA-SI-NEXT:    s_and_b32 s7, s7, s12
1678; GCN-NOHSA-SI-NEXT:    s_and_b32 s6, s6, s12
1679; GCN-NOHSA-SI-NEXT:    s_and_b32 s9, s9, s12
1680; GCN-NOHSA-SI-NEXT:    s_and_b32 s11, s11, s12
1681; GCN-NOHSA-SI-NEXT:    s_and_b32 s10, s10, s12
1682; GCN-NOHSA-SI-NEXT:    s_and_b32 s8, s8, s12
1683; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
1684; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s20
1685; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s11
1686; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s19
1687; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
1688; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1689; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
1690; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s18
1691; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s9
1692; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s17
1693; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
1694; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1695; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
1696; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s16
1697; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
1698; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s15
1699; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
1700; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1701; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1702; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s14
1703; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
1704; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s13
1705; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1706; GCN-NOHSA-SI-NEXT:    s_endpgm
1707;
1708; GCN-HSA-LABEL: constant_zextload_v16i16_to_v16i32:
1709; GCN-HSA:       ; %bb.0:
1710; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1711; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1712; GCN-HSA-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
1713; GCN-HSA-NEXT:    s_mov_b32 s2, 0xffff
1714; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1715; GCN-HSA-NEXT:    s_lshr_b32 s12, s5, 16
1716; GCN-HSA-NEXT:    s_lshr_b32 s13, s4, 16
1717; GCN-HSA-NEXT:    s_lshr_b32 s14, s7, 16
1718; GCN-HSA-NEXT:    s_lshr_b32 s15, s6, 16
1719; GCN-HSA-NEXT:    s_lshr_b32 s16, s9, 16
1720; GCN-HSA-NEXT:    s_lshr_b32 s17, s8, 16
1721; GCN-HSA-NEXT:    s_lshr_b32 s3, s11, 16
1722; GCN-HSA-NEXT:    s_lshr_b32 s18, s10, 16
1723; GCN-HSA-NEXT:    s_and_b32 s5, s5, s2
1724; GCN-HSA-NEXT:    s_and_b32 s4, s4, s2
1725; GCN-HSA-NEXT:    s_and_b32 s7, s7, s2
1726; GCN-HSA-NEXT:    s_and_b32 s6, s6, s2
1727; GCN-HSA-NEXT:    s_and_b32 s9, s9, s2
1728; GCN-HSA-NEXT:    s_and_b32 s8, s8, s2
1729; GCN-HSA-NEXT:    s_and_b32 s11, s11, s2
1730; GCN-HSA-NEXT:    s_and_b32 s2, s10, s2
1731; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1732; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
1733; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
1734; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
1735; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
1736; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
1737; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
1738; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s18
1739; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s11
1740; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
1741; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1742; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
1743; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
1744; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
1745; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
1746; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s17
1747; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s9
1748; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s16
1749; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
1750; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1751; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
1752; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
1753; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s15
1754; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s7
1755; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s14
1756; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
1757; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1758; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
1759; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
1760; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s13
1761; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
1762; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s12
1763; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
1764; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1765; GCN-HSA-NEXT:    s_endpgm
1766;
1767; GCN-NOHSA-VI-LABEL: constant_zextload_v16i16_to_v16i32:
1768; GCN-NOHSA-VI:       ; %bb.0:
1769; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x24
1770; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1771; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1772; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1773; GCN-NOHSA-VI-NEXT:    s_load_dwordx8 s[4:11], s[14:15], 0x0
1774; GCN-NOHSA-VI-NEXT:    s_mov_b32 s14, 0xffff
1775; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s12
1776; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s13
1777; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1778; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s19, s11, 16
1779; GCN-NOHSA-VI-NEXT:    s_and_b32 s11, s11, s14
1780; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s20, s10, 16
1781; GCN-NOHSA-VI-NEXT:    s_and_b32 s10, s10, s14
1782; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s17, s9, 16
1783; GCN-NOHSA-VI-NEXT:    s_and_b32 s9, s9, s14
1784; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s18, s8, 16
1785; GCN-NOHSA-VI-NEXT:    s_and_b32 s8, s8, s14
1786; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
1787; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s20
1788; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
1789; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s19
1790; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s15, s7, 16
1791; GCN-NOHSA-VI-NEXT:    s_and_b32 s7, s7, s14
1792; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s16, s6, 16
1793; GCN-NOHSA-VI-NEXT:    s_and_b32 s6, s6, s14
1794; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
1795; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s12, s5, 16
1796; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
1797; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s18
1798; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
1799; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s17
1800; GCN-NOHSA-VI-NEXT:    s_and_b32 s5, s5, s14
1801; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s13, s4, 16
1802; GCN-NOHSA-VI-NEXT:    s_and_b32 s4, s4, s14
1803; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
1804; GCN-NOHSA-VI-NEXT:    s_nop 0
1805; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
1806; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s16
1807; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
1808; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s15
1809; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
1810; GCN-NOHSA-VI-NEXT:    s_nop 0
1811; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
1812; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s13
1813; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
1814; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s12
1815; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1816; GCN-NOHSA-VI-NEXT:    s_endpgm
1817;
1818; EG-LABEL: constant_zextload_v16i16_to_v16i32:
1819; EG:       ; %bb.0:
1820; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
1821; EG-NEXT:    TEX 1 @8
1822; EG-NEXT:    ALU 35, @13, KC0[CB0:0-32], KC1[]
1823; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T18.X, 0
1824; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T11.X, 0
1825; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0
1826; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T12.X, 1
1827; EG-NEXT:    CF_END
1828; EG-NEXT:    Fetch clause starting at 8:
1829; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 0, #1
1830; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 16, #1
1831; EG-NEXT:    ALU clause starting at 12:
1832; EG-NEXT:     MOV * T11.X, KC0[2].Z,
1833; EG-NEXT:    ALU clause starting at 13:
1834; EG-NEXT:     LSHR * T13.W, T12.Y, literal.x,
1835; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1836; EG-NEXT:     AND_INT * T13.Z, T12.Y, literal.x,
1837; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1838; EG-NEXT:     LSHR T13.Y, T12.X, literal.x,
1839; EG-NEXT:     LSHR * T14.W, T12.W, literal.x,
1840; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1841; EG-NEXT:     AND_INT T13.X, T12.X, literal.x,
1842; EG-NEXT:     AND_INT T14.Z, T12.W, literal.x,
1843; EG-NEXT:     LSHR * T12.X, KC0[2].Y, literal.y,
1844; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
1845; EG-NEXT:     LSHR T14.Y, T12.Z, literal.x,
1846; EG-NEXT:     LSHR * T15.W, T11.Y, literal.x,
1847; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1848; EG-NEXT:     AND_INT T14.X, T12.Z, literal.x,
1849; EG-NEXT:     AND_INT T15.Z, T11.Y, literal.x,
1850; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1851; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
1852; EG-NEXT:     LSHR T16.X, PV.W, literal.x,
1853; EG-NEXT:     LSHR T15.Y, T11.X, literal.y,
1854; EG-NEXT:     LSHR T17.W, T11.W, literal.y,
1855; EG-NEXT:     AND_INT * T15.X, T11.X, literal.z,
1856; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1857; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1858; EG-NEXT:     AND_INT T17.Z, T11.W, literal.x,
1859; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1860; EG-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
1861; EG-NEXT:     LSHR T11.X, PV.W, literal.x,
1862; EG-NEXT:     LSHR T17.Y, T11.Z, literal.y,
1863; EG-NEXT:     AND_INT * T17.X, T11.Z, literal.z,
1864; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1865; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1866; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
1867; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
1868; EG-NEXT:     LSHR * T18.X, PV.W, literal.x,
1869; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1870  %load = load <16 x i16>, <16 x i16> addrspace(4)* %in
1871  %ext = zext <16 x i16> %load to <16 x i32>
1872  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
1873  ret void
1874}
1875
1876define amdgpu_kernel void @constant_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
1877; GCN-NOHSA-SI-LABEL: constant_sextload_v16i16_to_v16i32:
1878; GCN-NOHSA-SI:       ; %bb.0:
1879; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1880; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1881; GCN-NOHSA-SI-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
1882; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1883; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1884; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1885; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s12, s5, 16
1886; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s13, s4, 16
1887; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s5
1888; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s4, s4
1889; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s14, s7, 16
1890; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s15, s6, 16
1891; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s7, s7
1892; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s6, s6
1893; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s16, s9, 16
1894; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s17, s8, 16
1895; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s9, s9
1896; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s18, s11, 16
1897; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s19, s10, 16
1898; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s11, s11
1899; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s10, s10
1900; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s8, s8
1901; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
1902; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
1903; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s11
1904; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s18
1905; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
1906; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1907; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
1908; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s17
1909; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s9
1910; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s16
1911; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
1912; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1913; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
1914; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s15
1915; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
1916; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s14
1917; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
1918; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1919; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1920; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s13
1921; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
1922; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s12
1923; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1924; GCN-NOHSA-SI-NEXT:    s_endpgm
1925;
1926; GCN-HSA-LABEL: constant_sextload_v16i16_to_v16i32:
1927; GCN-HSA:       ; %bb.0:
1928; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1929; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1930; GCN-HSA-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
1931; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1932; GCN-HSA-NEXT:    s_ashr_i32 s12, s5, 16
1933; GCN-HSA-NEXT:    s_ashr_i32 s13, s4, 16
1934; GCN-HSA-NEXT:    s_ashr_i32 s14, s7, 16
1935; GCN-HSA-NEXT:    s_ashr_i32 s15, s6, 16
1936; GCN-HSA-NEXT:    s_ashr_i32 s16, s9, 16
1937; GCN-HSA-NEXT:    s_ashr_i32 s17, s8, 16
1938; GCN-HSA-NEXT:    s_ashr_i32 s2, s11, 16
1939; GCN-HSA-NEXT:    s_ashr_i32 s3, s10, 16
1940; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s2
1941; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
1942; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1943; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
1944; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
1945; GCN-HSA-NEXT:    s_sext_i32_i16 s11, s11
1946; GCN-HSA-NEXT:    s_sext_i32_i16 s10, s10
1947; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
1948; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
1949; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s10
1950; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s11
1951; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
1952; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1953; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
1954; GCN-HSA-NEXT:    s_sext_i32_i16 s9, s9
1955; GCN-HSA-NEXT:    s_sext_i32_i16 s8, s8
1956; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
1957; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
1958; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
1959; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s17
1960; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s9
1961; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s16
1962; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
1963; GCN-HSA-NEXT:    s_sext_i32_i16 s7, s7
1964; GCN-HSA-NEXT:    s_sext_i32_i16 s6, s6
1965; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1966; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
1967; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
1968; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s15
1969; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s7
1970; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s14
1971; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
1972; GCN-HSA-NEXT:    s_sext_i32_i16 s5, s5
1973; GCN-HSA-NEXT:    s_sext_i32_i16 s4, s4
1974; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1975; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
1976; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
1977; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s13
1978; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
1979; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s12
1980; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
1981; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1982; GCN-HSA-NEXT:    s_endpgm
1983;
1984; GCN-NOHSA-VI-LABEL: constant_sextload_v16i16_to_v16i32:
1985; GCN-NOHSA-VI:       ; %bb.0:
1986; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
1987; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1988; GCN-NOHSA-VI-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
1989; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1990; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1991; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1992; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s18, s11, 16
1993; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s19, s10, 16
1994; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s11, s11
1995; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s10, s10
1996; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s16, s9, 16
1997; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s17, s8, 16
1998; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s9, s9
1999; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s8, s8
2000; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
2001; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s19
2002; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
2003; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s18
2004; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s14, s7, 16
2005; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s15, s6, 16
2006; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s7, s7
2007; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s6, s6
2008; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
2009; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s12, s5, 16
2010; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
2011; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s17
2012; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
2013; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s16
2014; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s13, s4, 16
2015; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s5, s5
2016; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s4, s4
2017; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2018; GCN-NOHSA-VI-NEXT:    s_nop 0
2019; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
2020; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s15
2021; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
2022; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s14
2023; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
2024; GCN-NOHSA-VI-NEXT:    s_nop 0
2025; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
2026; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s13
2027; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
2028; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s12
2029; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
2030; GCN-NOHSA-VI-NEXT:    s_endpgm
2031;
2032; EG-LABEL: constant_sextload_v16i16_to_v16i32:
2033; EG:       ; %bb.0:
2034; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
2035; EG-NEXT:    TEX 1 @8
2036; EG-NEXT:    ALU 39, @13, KC0[CB0:0-32], KC1[]
2037; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T12.X, 0
2038; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T11.X, 0
2039; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T14.X, 0
2040; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T13.X, 1
2041; EG-NEXT:    CF_END
2042; EG-NEXT:    Fetch clause starting at 8:
2043; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
2044; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
2045; EG-NEXT:    ALU clause starting at 12:
2046; EG-NEXT:     MOV * T11.X, KC0[2].Z,
2047; EG-NEXT:    ALU clause starting at 13:
2048; EG-NEXT:     LSHR T13.X, KC0[2].Y, literal.x,
2049; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2050; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2051; EG-NEXT:     LSHR T14.X, PV.W, literal.x,
2052; EG-NEXT:     BFE_INT * T15.Z, T11.Y, 0.0, literal.y,
2053; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2054; EG-NEXT:     BFE_INT T15.X, T11.X, 0.0, literal.x,
2055; EG-NEXT:     LSHR T0.Y, T12.W, literal.x,
2056; EG-NEXT:     BFE_INT T16.Z, T11.W, 0.0, literal.x, BS:VEC_120/SCL_212
2057; EG-NEXT:     LSHR T0.W, T12.Y, literal.x,
2058; EG-NEXT:     LSHR * T1.W, T11.Y, literal.x,
2059; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2060; EG-NEXT:     BFE_INT T16.X, T11.Z, 0.0, literal.x,
2061; EG-NEXT:     LSHR T1.Y, T11.W, literal.x,
2062; EG-NEXT:     BFE_INT T17.Z, T12.Y, 0.0, literal.x,
2063; EG-NEXT:     BFE_INT T15.W, PS, 0.0, literal.x,
2064; EG-NEXT:     LSHR * T1.W, T11.X, literal.x,
2065; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2066; EG-NEXT:     BFE_INT T17.X, T12.X, 0.0, literal.x,
2067; EG-NEXT:     BFE_INT T15.Y, PS, 0.0, literal.x,
2068; EG-NEXT:     BFE_INT T18.Z, T12.W, 0.0, literal.x,
2069; EG-NEXT:     BFE_INT T16.W, PV.Y, 0.0, literal.x,
2070; EG-NEXT:     LSHR * T1.W, T11.Z, literal.x,
2071; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2072; EG-NEXT:     BFE_INT T18.X, T12.Z, 0.0, literal.x,
2073; EG-NEXT:     BFE_INT T16.Y, PS, 0.0, literal.x,
2074; EG-NEXT:     LSHR T0.Z, T12.X, literal.x,
2075; EG-NEXT:     BFE_INT T17.W, T0.W, 0.0, literal.x,
2076; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2077; EG-NEXT:    16(2.242078e-44), 32(4.484155e-44)
2078; EG-NEXT:     LSHR T11.X, PS, literal.x,
2079; EG-NEXT:     BFE_INT T17.Y, PV.Z, 0.0, literal.y,
2080; EG-NEXT:     LSHR T0.Z, T12.Z, literal.y,
2081; EG-NEXT:     BFE_INT T18.W, T0.Y, 0.0, literal.y,
2082; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
2083; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2084; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
2085; EG-NEXT:     LSHR T12.X, PS, literal.x,
2086; EG-NEXT:     BFE_INT * T18.Y, PV.Z, 0.0, literal.y,
2087; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2088  %load = load <16 x i16>, <16 x i16> addrspace(4)* %in
2089  %ext = sext <16 x i16> %load to <16 x i32>
2090  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
2091  ret void
2092}
2093
2094define amdgpu_kernel void @constant_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
2095; GCN-NOHSA-SI-LABEL: constant_zextload_v32i16_to_v32i32:
2096; GCN-NOHSA-SI:       ; %bb.0:
2097; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x9
2098; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2099; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
2100; GCN-NOHSA-SI-NEXT:    s_mov_b32 s18, 0xffff
2101; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2102; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s19, s1, 16
2103; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s20, s0, 16
2104; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s21, s3, 16
2105; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s22, s2, 16
2106; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s23, s5, 16
2107; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s24, s4, 16
2108; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s25, s7, 16
2109; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s26, s6, 16
2110; GCN-NOHSA-SI-NEXT:    s_and_b32 s27, s1, s18
2111; GCN-NOHSA-SI-NEXT:    s_and_b32 s28, s0, s18
2112; GCN-NOHSA-SI-NEXT:    s_and_b32 s29, s3, s18
2113; GCN-NOHSA-SI-NEXT:    s_and_b32 s30, s2, s18
2114; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, s18
2115; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, s18
2116; GCN-NOHSA-SI-NEXT:    s_and_b32 s7, s7, s18
2117; GCN-NOHSA-SI-NEXT:    s_and_b32 s6, s6, s18
2118; GCN-NOHSA-SI-NEXT:    s_and_b32 s31, s9, s18
2119; GCN-NOHSA-SI-NEXT:    s_and_b32 s33, s8, s18
2120; GCN-NOHSA-SI-NEXT:    s_and_b32 s34, s11, s18
2121; GCN-NOHSA-SI-NEXT:    s_and_b32 s35, s10, s18
2122; GCN-NOHSA-SI-NEXT:    s_and_b32 s36, s13, s18
2123; GCN-NOHSA-SI-NEXT:    s_and_b32 s37, s12, s18
2124; GCN-NOHSA-SI-NEXT:    s_and_b32 s38, s15, s18
2125; GCN-NOHSA-SI-NEXT:    s_and_b32 s18, s14, s18
2126; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s9, s9, 16
2127; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s8, s8, 16
2128; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s11, s11, 16
2129; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s10, s10, 16
2130; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s13, s13, 16
2131; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s12, s12, 16
2132; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s15, s15, 16
2133; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s14, s14, 16
2134; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
2135; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
2136; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s16
2137; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s17
2138; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s18
2139; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s14
2140; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s38
2141; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s15
2142; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
2143; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2144; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s37
2145; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s12
2146; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s36
2147; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s13
2148; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
2149; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2150; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s35
2151; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s10
2152; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s34
2153; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s11
2154; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
2155; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2156; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s33
2157; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s8
2158; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s31
2159; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s9
2160; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
2161; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2162; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
2163; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s26
2164; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
2165; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s25
2166; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
2167; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2168; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
2169; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s24
2170; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
2171; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s23
2172; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2173; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2174; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s30
2175; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s22
2176; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s29
2177; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s21
2178; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
2179; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2180; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s28
2181; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s20
2182; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s27
2183; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s19
2184; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
2185; GCN-NOHSA-SI-NEXT:    s_endpgm
2186;
2187; GCN-HSA-LABEL: constant_zextload_v32i16_to_v32i32:
2188; GCN-HSA:       ; %bb.0:
2189; GCN-HSA-NEXT:    s_load_dwordx4 s[16:19], s[4:5], 0x0
2190; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2191; GCN-HSA-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
2192; GCN-HSA-NEXT:    s_mov_b32 s18, 0xffff
2193; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2194; GCN-HSA-NEXT:    s_and_b32 s19, s1, s18
2195; GCN-HSA-NEXT:    s_and_b32 s20, s0, s18
2196; GCN-HSA-NEXT:    s_and_b32 s21, s3, s18
2197; GCN-HSA-NEXT:    s_and_b32 s22, s2, s18
2198; GCN-HSA-NEXT:    s_and_b32 s23, s5, s18
2199; GCN-HSA-NEXT:    s_and_b32 s24, s4, s18
2200; GCN-HSA-NEXT:    s_and_b32 s25, s7, s18
2201; GCN-HSA-NEXT:    s_and_b32 s26, s6, s18
2202; GCN-HSA-NEXT:    s_and_b32 s27, s9, s18
2203; GCN-HSA-NEXT:    s_and_b32 s28, s8, s18
2204; GCN-HSA-NEXT:    s_and_b32 s29, s11, s18
2205; GCN-HSA-NEXT:    s_and_b32 s30, s10, s18
2206; GCN-HSA-NEXT:    s_and_b32 s31, s13, s18
2207; GCN-HSA-NEXT:    s_and_b32 s33, s12, s18
2208; GCN-HSA-NEXT:    s_and_b32 s34, s15, s18
2209; GCN-HSA-NEXT:    s_and_b32 s18, s14, s18
2210; GCN-HSA-NEXT:    s_lshr_b32 s35, s1, 16
2211; GCN-HSA-NEXT:    s_lshr_b32 s36, s0, 16
2212; GCN-HSA-NEXT:    s_lshr_b32 s3, s3, 16
2213; GCN-HSA-NEXT:    s_lshr_b32 s2, s2, 16
2214; GCN-HSA-NEXT:    s_lshr_b32 s5, s5, 16
2215; GCN-HSA-NEXT:    s_lshr_b32 s4, s4, 16
2216; GCN-HSA-NEXT:    s_lshr_b32 s7, s7, 16
2217; GCN-HSA-NEXT:    s_lshr_b32 s6, s6, 16
2218; GCN-HSA-NEXT:    s_lshr_b32 s9, s9, 16
2219; GCN-HSA-NEXT:    s_lshr_b32 s8, s8, 16
2220; GCN-HSA-NEXT:    s_lshr_b32 s11, s11, 16
2221; GCN-HSA-NEXT:    s_lshr_b32 s10, s10, 16
2222; GCN-HSA-NEXT:    s_lshr_b32 s13, s13, 16
2223; GCN-HSA-NEXT:    s_lshr_b32 s12, s12, 16
2224; GCN-HSA-NEXT:    s_lshr_b32 s15, s15, 16
2225; GCN-HSA-NEXT:    s_lshr_b32 s14, s14, 16
2226; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0x70
2227; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
2228; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s1
2229; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s0
2230; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0x60
2231; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
2232; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s1
2233; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s0
2234; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0x50
2235; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s18
2236; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s14
2237; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s34
2238; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s15
2239; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s33
2240; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s12
2241; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
2242; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s31
2243; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s13
2244; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
2245; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
2246; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s30
2247; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
2248; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
2249; GCN-HSA-NEXT:    s_add_u32 s0, s16, 64
2250; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s10
2251; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s29
2252; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s11
2253; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
2254; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2255; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
2256; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
2257; GCN-HSA-NEXT:    s_add_u32 s0, s16, 48
2258; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s28
2259; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s8
2260; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s27
2261; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s9
2262; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
2263; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2264; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
2265; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
2266; GCN-HSA-NEXT:    s_add_u32 s0, s16, 32
2267; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s26
2268; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s6
2269; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s25
2270; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s7
2271; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
2272; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2273; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
2274; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
2275; GCN-HSA-NEXT:    s_add_u32 s0, s16, 16
2276; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s24
2277; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s4
2278; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s23
2279; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s5
2280; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
2281; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2282; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
2283; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s22
2284; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s2
2285; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s21
2286; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
2287; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
2288; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2289; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s16
2290; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s20
2291; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s36
2292; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s19
2293; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s35
2294; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s17
2295; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2296; GCN-HSA-NEXT:    s_endpgm
2297;
2298; GCN-NOHSA-VI-LABEL: constant_zextload_v32i16_to_v32i32:
2299; GCN-NOHSA-VI:       ; %bb.0:
2300; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[20:23], s[0:1], 0x24
2301; GCN-NOHSA-VI-NEXT:    s_mov_b32 s19, 0xf000
2302; GCN-NOHSA-VI-NEXT:    s_mov_b32 s18, -1
2303; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
2304; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[0:15], s[22:23], 0x0
2305; GCN-NOHSA-VI-NEXT:    s_mov_b32 s22, 0xffff
2306; GCN-NOHSA-VI-NEXT:    s_mov_b32 s16, s20
2307; GCN-NOHSA-VI-NEXT:    s_mov_b32 s17, s21
2308; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
2309; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s36, s15, 16
2310; GCN-NOHSA-VI-NEXT:    s_and_b32 s15, s15, s22
2311; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s37, s14, 16
2312; GCN-NOHSA-VI-NEXT:    s_and_b32 s14, s14, s22
2313; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s34, s13, 16
2314; GCN-NOHSA-VI-NEXT:    s_and_b32 s13, s13, s22
2315; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s35, s12, 16
2316; GCN-NOHSA-VI-NEXT:    s_and_b32 s12, s12, s22
2317; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s14
2318; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s37
2319; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s15
2320; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s36
2321; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s31, s11, 16
2322; GCN-NOHSA-VI-NEXT:    s_and_b32 s11, s11, s22
2323; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s33, s10, 16
2324; GCN-NOHSA-VI-NEXT:    s_and_b32 s10, s10, s22
2325; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:112
2326; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s29, s9, 16
2327; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s12
2328; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s35
2329; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s13
2330; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s34
2331; GCN-NOHSA-VI-NEXT:    s_and_b32 s9, s9, s22
2332; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s30, s8, 16
2333; GCN-NOHSA-VI-NEXT:    s_and_b32 s8, s8, s22
2334; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:96
2335; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s27, s7, 16
2336; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
2337; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s33
2338; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
2339; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s31
2340; GCN-NOHSA-VI-NEXT:    s_and_b32 s7, s7, s22
2341; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s28, s6, 16
2342; GCN-NOHSA-VI-NEXT:    s_and_b32 s6, s6, s22
2343; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:80
2344; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s25, s5, 16
2345; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
2346; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s30
2347; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
2348; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s29
2349; GCN-NOHSA-VI-NEXT:    s_and_b32 s5, s5, s22
2350; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s26, s4, 16
2351; GCN-NOHSA-VI-NEXT:    s_and_b32 s4, s4, s22
2352; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:64
2353; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s23, s3, 16
2354; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
2355; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s28
2356; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
2357; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s27
2358; GCN-NOHSA-VI-NEXT:    s_and_b32 s3, s3, s22
2359; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s24, s2, 16
2360; GCN-NOHSA-VI-NEXT:    s_and_b32 s2, s2, s22
2361; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:48
2362; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s20, s1, 16
2363; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
2364; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s26
2365; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
2366; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s25
2367; GCN-NOHSA-VI-NEXT:    s_and_b32 s1, s1, s22
2368; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s21, s0, 16
2369; GCN-NOHSA-VI-NEXT:    s_and_b32 s0, s0, s22
2370; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:32
2371; GCN-NOHSA-VI-NEXT:    s_nop 0
2372; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
2373; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s24
2374; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s3
2375; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s23
2376; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:16
2377; GCN-NOHSA-VI-NEXT:    s_nop 0
2378; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
2379; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s21
2380; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s1
2381; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s20
2382; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
2383; GCN-NOHSA-VI-NEXT:    s_endpgm
2384;
2385; EG-LABEL: constant_zextload_v32i16_to_v32i32:
2386; EG:       ; %bb.0:
2387; EG-NEXT:    ALU 0, @20, KC0[CB0:0-32], KC1[]
2388; EG-NEXT:    TEX 3 @12
2389; EG-NEXT:    ALU 71, @21, KC0[CB0:0-32], KC1[]
2390; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T34.X, 0
2391; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T21.X, 0
2392; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T32.X, 0
2393; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T22.X, 0
2394; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T29.X, 0
2395; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T19.X, 0
2396; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T26.X, 0
2397; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T20.X, 1
2398; EG-NEXT:    CF_END
2399; EG-NEXT:    Fetch clause starting at 12:
2400; EG-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 0, #1
2401; EG-NEXT:     VTX_READ_128 T21.XYZW, T19.X, 48, #1
2402; EG-NEXT:     VTX_READ_128 T22.XYZW, T19.X, 32, #1
2403; EG-NEXT:     VTX_READ_128 T19.XYZW, T19.X, 16, #1
2404; EG-NEXT:    ALU clause starting at 20:
2405; EG-NEXT:     MOV * T19.X, KC0[2].Z,
2406; EG-NEXT:    ALU clause starting at 21:
2407; EG-NEXT:     LSHR * T23.W, T20.Y, literal.x,
2408; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2409; EG-NEXT:     AND_INT * T23.Z, T20.Y, literal.x,
2410; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2411; EG-NEXT:     LSHR T23.Y, T20.X, literal.x,
2412; EG-NEXT:     LSHR * T24.W, T20.W, literal.x,
2413; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2414; EG-NEXT:     AND_INT T23.X, T20.X, literal.x,
2415; EG-NEXT:     AND_INT T24.Z, T20.W, literal.x,
2416; EG-NEXT:     LSHR * T20.X, KC0[2].Y, literal.y,
2417; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
2418; EG-NEXT:     LSHR T24.Y, T20.Z, literal.x,
2419; EG-NEXT:     LSHR * T25.W, T19.Y, literal.x,
2420; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2421; EG-NEXT:     AND_INT T24.X, T20.Z, literal.x,
2422; EG-NEXT:     AND_INT T25.Z, T19.Y, literal.x,
2423; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2424; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
2425; EG-NEXT:     LSHR T26.X, PV.W, literal.x,
2426; EG-NEXT:     LSHR T25.Y, T19.X, literal.y,
2427; EG-NEXT:     LSHR T27.W, T19.W, literal.y,
2428; EG-NEXT:     AND_INT * T25.X, T19.X, literal.z,
2429; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2430; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2431; EG-NEXT:     AND_INT T27.Z, T19.W, literal.x,
2432; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2433; EG-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
2434; EG-NEXT:     LSHR T19.X, PV.W, literal.x,
2435; EG-NEXT:     LSHR T27.Y, T19.Z, literal.y,
2436; EG-NEXT:     LSHR T28.W, T22.Y, literal.y,
2437; EG-NEXT:     AND_INT * T27.X, T19.Z, literal.z,
2438; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2439; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2440; EG-NEXT:     AND_INT T28.Z, T22.Y, literal.x,
2441; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2442; EG-NEXT:    65535(9.183409e-41), 48(6.726233e-44)
2443; EG-NEXT:     LSHR T29.X, PV.W, literal.x,
2444; EG-NEXT:     LSHR T28.Y, T22.X, literal.y,
2445; EG-NEXT:     LSHR T30.W, T22.W, literal.y,
2446; EG-NEXT:     AND_INT * T28.X, T22.X, literal.z,
2447; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2448; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2449; EG-NEXT:     AND_INT T30.Z, T22.W, literal.x,
2450; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2451; EG-NEXT:    65535(9.183409e-41), 64(8.968310e-44)
2452; EG-NEXT:     LSHR T22.X, PV.W, literal.x,
2453; EG-NEXT:     LSHR T30.Y, T22.Z, literal.y,
2454; EG-NEXT:     LSHR T31.W, T21.Y, literal.y,
2455; EG-NEXT:     AND_INT * T30.X, T22.Z, literal.z,
2456; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2457; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2458; EG-NEXT:     AND_INT T31.Z, T21.Y, literal.x,
2459; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2460; EG-NEXT:    65535(9.183409e-41), 80(1.121039e-43)
2461; EG-NEXT:     LSHR T32.X, PV.W, literal.x,
2462; EG-NEXT:     LSHR T31.Y, T21.X, literal.y,
2463; EG-NEXT:     LSHR T33.W, T21.W, literal.y,
2464; EG-NEXT:     AND_INT * T31.X, T21.X, literal.z,
2465; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2466; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2467; EG-NEXT:     AND_INT T33.Z, T21.W, literal.x,
2468; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2469; EG-NEXT:    65535(9.183409e-41), 96(1.345247e-43)
2470; EG-NEXT:     LSHR T21.X, PV.W, literal.x,
2471; EG-NEXT:     LSHR T33.Y, T21.Z, literal.y,
2472; EG-NEXT:     AND_INT * T33.X, T21.Z, literal.z,
2473; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2474; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2475; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
2476; EG-NEXT:    112(1.569454e-43), 0(0.000000e+00)
2477; EG-NEXT:     LSHR * T34.X, PV.W, literal.x,
2478; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
2479  %load = load <32 x i16>, <32 x i16> addrspace(4)* %in
2480  %ext = zext <32 x i16> %load to <32 x i32>
2481  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
2482  ret void
2483}
2484
2485define amdgpu_kernel void @constant_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
2486; GCN-NOHSA-SI-LABEL: constant_sextload_v32i16_to_v32i32:
2487; GCN-NOHSA-SI:       ; %bb.0:
2488; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x9
2489; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2490; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
2491; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2492; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s18, s1, 16
2493; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s19, s0, 16
2494; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s20, s1
2495; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s21, s0
2496; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s22, s3, 16
2497; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s23, s2, 16
2498; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s24, s3
2499; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s25, s2
2500; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s26, s5, 16
2501; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s27, s4, 16
2502; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s5
2503; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s4, s4
2504; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s28, s7, 16
2505; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s29, s6, 16
2506; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s7, s7
2507; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s6, s6
2508; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s30, s9, 16
2509; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s31, s8, 16
2510; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s9, s9
2511; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s8, s8
2512; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s33, s11, 16
2513; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s34, s10, 16
2514; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s11, s11
2515; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s10, s10
2516; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s35, s13, 16
2517; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s36, s12, 16
2518; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s13, s13
2519; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s12, s12
2520; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s37, s15, 16
2521; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s38, s14, 16
2522; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s15, s15
2523; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s14, s14
2524; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
2525; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
2526; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s16
2527; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s17
2528; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s14
2529; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s38
2530; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s15
2531; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s37
2532; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
2533; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2534; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s12
2535; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s36
2536; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s13
2537; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s35
2538; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
2539; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2540; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
2541; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s34
2542; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s11
2543; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s33
2544; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
2545; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2546; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
2547; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s31
2548; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s9
2549; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s30
2550; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
2551; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2552; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
2553; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s29
2554; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
2555; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s28
2556; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
2557; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2558; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
2559; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s27
2560; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
2561; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s26
2562; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2563; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2564; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s25
2565; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s23
2566; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s24
2567; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s22
2568; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
2569; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2570; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s21
2571; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
2572; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s20
2573; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s18
2574; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
2575; GCN-NOHSA-SI-NEXT:    s_endpgm
2576;
2577; GCN-HSA-LABEL: constant_sextload_v32i16_to_v32i32:
2578; GCN-HSA:       ; %bb.0:
2579; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
2580; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2581; GCN-HSA-NEXT:    s_load_dwordx16 s[4:19], s[2:3], 0x0
2582; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2583; GCN-HSA-NEXT:    s_ashr_i32 s20, s5, 16
2584; GCN-HSA-NEXT:    s_ashr_i32 s21, s4, 16
2585; GCN-HSA-NEXT:    s_ashr_i32 s22, s7, 16
2586; GCN-HSA-NEXT:    s_ashr_i32 s23, s6, 16
2587; GCN-HSA-NEXT:    s_ashr_i32 s24, s9, 16
2588; GCN-HSA-NEXT:    s_ashr_i32 s25, s8, 16
2589; GCN-HSA-NEXT:    s_ashr_i32 s26, s11, 16
2590; GCN-HSA-NEXT:    s_ashr_i32 s27, s10, 16
2591; GCN-HSA-NEXT:    s_ashr_i32 s28, s13, 16
2592; GCN-HSA-NEXT:    s_ashr_i32 s29, s12, 16
2593; GCN-HSA-NEXT:    s_ashr_i32 s30, s15, 16
2594; GCN-HSA-NEXT:    s_ashr_i32 s31, s14, 16
2595; GCN-HSA-NEXT:    s_ashr_i32 s33, s17, 16
2596; GCN-HSA-NEXT:    s_ashr_i32 s34, s16, 16
2597; GCN-HSA-NEXT:    s_ashr_i32 s35, s19, 16
2598; GCN-HSA-NEXT:    s_ashr_i32 s36, s18, 16
2599; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x70
2600; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2601; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s3
2602; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s2
2603; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x60
2604; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2605; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s3
2606; GCN-HSA-NEXT:    s_sext_i32_i16 s16, s16
2607; GCN-HSA-NEXT:    s_sext_i32_i16 s19, s19
2608; GCN-HSA-NEXT:    s_sext_i32_i16 s18, s18
2609; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s2
2610; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x50
2611; GCN-HSA-NEXT:    s_sext_i32_i16 s17, s17
2612; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s18
2613; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s36
2614; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s19
2615; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s35
2616; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s16
2617; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s34
2618; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2619; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s17
2620; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s33
2621; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
2622; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
2623; GCN-HSA-NEXT:    s_sext_i32_i16 s15, s15
2624; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2625; GCN-HSA-NEXT:    s_sext_i32_i16 s14, s14
2626; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2627; GCN-HSA-NEXT:    s_add_u32 s2, s0, 64
2628; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s14
2629; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s31
2630; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s15
2631; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s30
2632; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2633; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2634; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2635; GCN-HSA-NEXT:    s_sext_i32_i16 s13, s13
2636; GCN-HSA-NEXT:    s_sext_i32_i16 s12, s12
2637; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2638; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
2639; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s12
2640; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s29
2641; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s13
2642; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s28
2643; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2644; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2645; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2646; GCN-HSA-NEXT:    s_sext_i32_i16 s11, s11
2647; GCN-HSA-NEXT:    s_sext_i32_i16 s10, s10
2648; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2649; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
2650; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s10
2651; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s27
2652; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s11
2653; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s26
2654; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2655; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2656; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2657; GCN-HSA-NEXT:    s_sext_i32_i16 s9, s9
2658; GCN-HSA-NEXT:    s_sext_i32_i16 s8, s8
2659; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2660; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
2661; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
2662; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s25
2663; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s9
2664; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s24
2665; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2666; GCN-HSA-NEXT:    s_sext_i32_i16 s7, s7
2667; GCN-HSA-NEXT:    s_sext_i32_i16 s6, s6
2668; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2669; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2670; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
2671; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s23
2672; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s7
2673; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s22
2674; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2675; GCN-HSA-NEXT:    s_sext_i32_i16 s5, s5
2676; GCN-HSA-NEXT:    s_sext_i32_i16 s4, s4
2677; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2678; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
2679; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
2680; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s21
2681; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
2682; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s20
2683; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
2684; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2685; GCN-HSA-NEXT:    s_endpgm
2686;
2687; GCN-NOHSA-VI-LABEL: constant_sextload_v32i16_to_v32i32:
2688; GCN-NOHSA-VI:       ; %bb.0:
2689; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x24
2690; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
2691; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
2692; GCN-NOHSA-VI-NEXT:    s_mov_b32 s19, 0xf000
2693; GCN-NOHSA-VI-NEXT:    s_mov_b32 s18, -1
2694; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
2695; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s35, s15, 16
2696; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s36, s14, 16
2697; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s15, s15
2698; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s14, s14
2699; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s33, s13, 16
2700; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s34, s12, 16
2701; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s13, s13
2702; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s12, s12
2703; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s14
2704; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s36
2705; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s15
2706; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s35
2707; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s30, s11, 16
2708; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s31, s10, 16
2709; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s11, s11
2710; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s10, s10
2711; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:112
2712; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s28, s9, 16
2713; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s12
2714; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s34
2715; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s13
2716; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s33
2717; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s29, s8, 16
2718; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s9, s9
2719; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s8, s8
2720; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:96
2721; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s26, s7, 16
2722; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
2723; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s31
2724; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
2725; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s30
2726; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s27, s6, 16
2727; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s7, s7
2728; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s6, s6
2729; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:80
2730; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s24, s5, 16
2731; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
2732; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s29
2733; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
2734; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s28
2735; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s25, s4, 16
2736; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s5, s5
2737; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s4, s4
2738; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:64
2739; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s22, s3, 16
2740; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
2741; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s27
2742; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
2743; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s26
2744; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s23, s2, 16
2745; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s3, s3
2746; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s2, s2
2747; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:48
2748; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s20, s1, 16
2749; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
2750; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s25
2751; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
2752; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s24
2753; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s21, s0, 16
2754; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s1, s1
2755; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s0, s0
2756; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:32
2757; GCN-NOHSA-VI-NEXT:    s_nop 0
2758; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
2759; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s23
2760; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s3
2761; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s22
2762; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:16
2763; GCN-NOHSA-VI-NEXT:    s_nop 0
2764; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
2765; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s21
2766; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s1
2767; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s20
2768; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
2769; GCN-NOHSA-VI-NEXT:    s_endpgm
2770;
2771; EG-LABEL: constant_sextload_v32i16_to_v32i32:
2772; EG:       ; %bb.0:
2773; EG-NEXT:    ALU 8, @20, KC0[CB0:0-32], KC1[]
2774; EG-NEXT:    TEX 3 @12
2775; EG-NEXT:    ALU 73, @29, KC0[CB0:0-32], KC1[]
2776; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T24.X, 0
2777; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T22.X, 0
2778; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T28.X, 0
2779; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T27.X, 0
2780; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T26.X, 0
2781; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T21.X, 0
2782; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T20.X, 0
2783; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T19.X, 1
2784; EG-NEXT:    CF_END
2785; EG-NEXT:    Fetch clause starting at 12:
2786; EG-NEXT:     VTX_READ_128 T23.XYZW, T22.X, 16, #1
2787; EG-NEXT:     VTX_READ_128 T24.XYZW, T22.X, 32, #1
2788; EG-NEXT:     VTX_READ_128 T25.XYZW, T22.X, 0, #1
2789; EG-NEXT:     VTX_READ_128 T22.XYZW, T22.X, 48, #1
2790; EG-NEXT:    ALU clause starting at 20:
2791; EG-NEXT:     LSHR T19.X, KC0[2].Y, literal.x,
2792; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2793; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2794; EG-NEXT:     LSHR T20.X, PV.W, literal.x,
2795; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2796; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
2797; EG-NEXT:     LSHR T21.X, PV.W, literal.x,
2798; EG-NEXT:     MOV * T22.X, KC0[2].Z,
2799; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
2800; EG-NEXT:    ALU clause starting at 29:
2801; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
2802; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
2803; EG-NEXT:     LSHR T26.X, PV.W, literal.x,
2804; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2805; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
2806; EG-NEXT:     LSHR T27.X, PV.W, literal.x,
2807; EG-NEXT:     LSHR T0.W, T22.W, literal.y,
2808; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
2809; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2810; EG-NEXT:    80(1.121039e-43), 0(0.000000e+00)
2811; EG-NEXT:     LSHR T28.X, PS, literal.x,
2812; EG-NEXT:     LSHR T0.Y, T22.Y, literal.y,
2813; EG-NEXT:     BFE_INT T29.Z, T25.Y, 0.0, literal.y, BS:VEC_120/SCL_212
2814; EG-NEXT:     LSHR T1.W, T24.W, literal.y,
2815; EG-NEXT:     LSHR * T2.W, T24.Y, literal.y,
2816; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2817; EG-NEXT:     BFE_INT T29.X, T25.X, 0.0, literal.x,
2818; EG-NEXT:     LSHR T1.Y, T23.W, literal.x,
2819; EG-NEXT:     BFE_INT T30.Z, T25.W, 0.0, literal.x, BS:VEC_120/SCL_212
2820; EG-NEXT:     LSHR T3.W, T23.Y, literal.x,
2821; EG-NEXT:     LSHR * T4.W, T25.Y, literal.x,
2822; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2823; EG-NEXT:     BFE_INT T30.X, T25.Z, 0.0, literal.x,
2824; EG-NEXT:     LSHR T2.Y, T25.W, literal.x,
2825; EG-NEXT:     BFE_INT T31.Z, T23.Y, 0.0, literal.x,
2826; EG-NEXT:     BFE_INT T29.W, PS, 0.0, literal.x,
2827; EG-NEXT:     LSHR * T4.W, T25.X, literal.x,
2828; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2829; EG-NEXT:     BFE_INT T31.X, T23.X, 0.0, literal.x,
2830; EG-NEXT:     BFE_INT T29.Y, PS, 0.0, literal.x,
2831; EG-NEXT:     BFE_INT T32.Z, T23.W, 0.0, literal.x,
2832; EG-NEXT:     BFE_INT T30.W, PV.Y, 0.0, literal.x,
2833; EG-NEXT:     LSHR * T4.W, T25.Z, literal.x,
2834; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2835; EG-NEXT:     BFE_INT T32.X, T23.Z, 0.0, literal.x,
2836; EG-NEXT:     BFE_INT T30.Y, PS, 0.0, literal.x,
2837; EG-NEXT:     BFE_INT T25.Z, T24.Y, 0.0, literal.x,
2838; EG-NEXT:     BFE_INT T31.W, T3.W, 0.0, literal.x,
2839; EG-NEXT:     LSHR * T3.W, T23.X, literal.x,
2840; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2841; EG-NEXT:     BFE_INT T25.X, T24.X, 0.0, literal.x,
2842; EG-NEXT:     BFE_INT T31.Y, PS, 0.0, literal.x,
2843; EG-NEXT:     BFE_INT T33.Z, T24.W, 0.0, literal.x,
2844; EG-NEXT:     BFE_INT T32.W, T1.Y, 0.0, literal.x,
2845; EG-NEXT:     LSHR * T3.W, T23.Z, literal.x,
2846; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2847; EG-NEXT:     BFE_INT T33.X, T24.Z, 0.0, literal.x,
2848; EG-NEXT:     BFE_INT T32.Y, PS, 0.0, literal.x,
2849; EG-NEXT:     BFE_INT T23.Z, T22.Y, 0.0, literal.x,
2850; EG-NEXT:     BFE_INT T25.W, T2.W, 0.0, literal.x,
2851; EG-NEXT:     LSHR * T2.W, T24.X, literal.x,
2852; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2853; EG-NEXT:     BFE_INT T23.X, T22.X, 0.0, literal.x,
2854; EG-NEXT:     BFE_INT T25.Y, PS, 0.0, literal.x,
2855; EG-NEXT:     BFE_INT T34.Z, T22.W, 0.0, literal.x,
2856; EG-NEXT:     BFE_INT T33.W, T1.W, 0.0, literal.x, BS:VEC_120/SCL_212
2857; EG-NEXT:     LSHR * T1.W, T24.Z, literal.x,
2858; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2859; EG-NEXT:     BFE_INT T34.X, T22.Z, 0.0, literal.x,
2860; EG-NEXT:     BFE_INT T33.Y, PS, 0.0, literal.x,
2861; EG-NEXT:     LSHR T0.Z, T22.X, literal.x,
2862; EG-NEXT:     BFE_INT T23.W, T0.Y, 0.0, literal.x,
2863; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
2864; EG-NEXT:    16(2.242078e-44), 96(1.345247e-43)
2865; EG-NEXT:     LSHR T22.X, PS, literal.x,
2866; EG-NEXT:     BFE_INT T23.Y, PV.Z, 0.0, literal.y,
2867; EG-NEXT:     LSHR T0.Z, T22.Z, literal.y,
2868; EG-NEXT:     BFE_INT T34.W, T0.W, 0.0, literal.y,
2869; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
2870; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2871; EG-NEXT:    112(1.569454e-43), 0(0.000000e+00)
2872; EG-NEXT:     LSHR T24.X, PS, literal.x,
2873; EG-NEXT:     BFE_INT * T34.Y, PV.Z, 0.0, literal.y,
2874; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2875  %load = load <32 x i16>, <32 x i16> addrspace(4)* %in
2876  %ext = sext <32 x i16> %load to <32 x i32>
2877  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
2878  ret void
2879}
2880
2881define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
2882; GCN-NOHSA-SI-LABEL: constant_zextload_v64i16_to_v64i32:
2883; GCN-NOHSA-SI:       ; %bb.0:
2884; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x9
2885; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2886; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
2887; GCN-NOHSA-SI-NEXT:    s_mov_b32 s20, 0xffff
2888; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[36:51], s[18:19], 0x10
2889; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2890; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s18, s1, 16
2891; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s19, s0, 16
2892; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s21, s3, 16
2893; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s22, s2, 16
2894; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s23, s5, 16
2895; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s24, s4, 16
2896; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s25, s7, 16
2897; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s26, s6, 16
2898; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s27, s9, 16
2899; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s28, s8, 16
2900; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s29, s11, 16
2901; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s30, s10, 16
2902; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s31, s13, 16
2903; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s33, s12, 16
2904; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s34, s15, 16
2905; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s35, s14, 16
2906; GCN-NOHSA-SI-NEXT:    s_and_b32 s52, s1, s20
2907; GCN-NOHSA-SI-NEXT:    s_and_b32 s53, s0, s20
2908; GCN-NOHSA-SI-NEXT:    s_and_b32 s54, s3, s20
2909; GCN-NOHSA-SI-NEXT:    s_and_b32 s55, s2, s20
2910; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, s20
2911; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, s20
2912; GCN-NOHSA-SI-NEXT:    s_and_b32 s7, s7, s20
2913; GCN-NOHSA-SI-NEXT:    s_and_b32 s6, s6, s20
2914; GCN-NOHSA-SI-NEXT:    s_and_b32 s9, s9, s20
2915; GCN-NOHSA-SI-NEXT:    s_and_b32 s8, s8, s20
2916; GCN-NOHSA-SI-NEXT:    s_and_b32 s11, s11, s20
2917; GCN-NOHSA-SI-NEXT:    s_and_b32 s10, s10, s20
2918; GCN-NOHSA-SI-NEXT:    s_and_b32 s13, s13, s20
2919; GCN-NOHSA-SI-NEXT:    s_and_b32 s12, s12, s20
2920; GCN-NOHSA-SI-NEXT:    s_and_b32 s15, s15, s20
2921; GCN-NOHSA-SI-NEXT:    s_and_b32 s14, s14, s20
2922; GCN-NOHSA-SI-NEXT:    s_and_b32 s56, s37, s20
2923; GCN-NOHSA-SI-NEXT:    s_and_b32 s57, s36, s20
2924; GCN-NOHSA-SI-NEXT:    s_and_b32 s58, s39, s20
2925; GCN-NOHSA-SI-NEXT:    s_and_b32 s59, s38, s20
2926; GCN-NOHSA-SI-NEXT:    s_and_b32 s60, s41, s20
2927; GCN-NOHSA-SI-NEXT:    s_and_b32 s61, s40, s20
2928; GCN-NOHSA-SI-NEXT:    s_and_b32 s62, s43, s20
2929; GCN-NOHSA-SI-NEXT:    s_and_b32 s63, s42, s20
2930; GCN-NOHSA-SI-NEXT:    s_and_b32 s64, s45, s20
2931; GCN-NOHSA-SI-NEXT:    s_and_b32 s65, s44, s20
2932; GCN-NOHSA-SI-NEXT:    s_and_b32 s66, s47, s20
2933; GCN-NOHSA-SI-NEXT:    s_and_b32 s67, s46, s20
2934; GCN-NOHSA-SI-NEXT:    s_and_b32 s68, s49, s20
2935; GCN-NOHSA-SI-NEXT:    s_and_b32 s69, s48, s20
2936; GCN-NOHSA-SI-NEXT:    s_and_b32 s70, s51, s20
2937; GCN-NOHSA-SI-NEXT:    s_and_b32 s20, s50, s20
2938; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s37, s37, 16
2939; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s36, s36, 16
2940; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s39, s39, 16
2941; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s38, s38, 16
2942; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s41, s41, 16
2943; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s40, s40, 16
2944; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s42, s42, 16
2945; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s45, s45, 16
2946; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s44, s44, 16
2947; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s47, s47, 16
2948; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s46, s46, 16
2949; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s49, s49, 16
2950; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s48, s48, 16
2951; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s51, s51, 16
2952; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s50, s50, 16
2953; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s43, s43, 16
2954; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s16
2955; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s17
2956; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
2957; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
2958; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s20
2959; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s50
2960; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s70
2961; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s51
2962; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s69
2963; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s48
2964; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s68
2965; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s49
2966; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s67
2967; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s46
2968; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s66
2969; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s47
2970; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s65
2971; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s44
2972; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s64
2973; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s45
2974; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s63
2975; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s42
2976; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, s62
2977; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v20, s61
2978; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, s43
2979; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v21, s40
2980; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v22, s60
2981; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v23, s41
2982; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
2983; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224
2984; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
2985; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192
2986; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176
2987; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:160
2988; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(5)
2989; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s59
2990; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s38
2991; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s58
2992; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s39
2993; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
2994; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2995; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s57
2996; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s36
2997; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s56
2998; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s37
2999; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
3000; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3001; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s14
3002; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s35
3003; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s15
3004; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s34
3005; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
3006; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3007; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s12
3008; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s33
3009; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s13
3010; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s31
3011; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
3012; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3013; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
3014; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s30
3015; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s11
3016; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s29
3017; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
3018; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3019; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
3020; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s28
3021; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s9
3022; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s27
3023; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
3024; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3025; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
3026; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s26
3027; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
3028; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s25
3029; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
3030; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3031; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
3032; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s24
3033; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
3034; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s23
3035; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
3036; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3037; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s55
3038; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s22
3039; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s54
3040; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s21
3041; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
3042; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3043; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s53
3044; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
3045; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s52
3046; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s18
3047; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
3048; GCN-NOHSA-SI-NEXT:    s_endpgm
3049;
3050; GCN-HSA-LABEL: constant_zextload_v64i16_to_v64i32:
3051; GCN-HSA:       ; %bb.0:
3052; GCN-HSA-NEXT:    s_load_dwordx4 s[16:19], s[4:5], 0x0
3053; GCN-HSA-NEXT:    s_mov_b32 s53, 0xffff
3054; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
3055; GCN-HSA-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
3056; GCN-HSA-NEXT:    s_load_dwordx16 s[36:51], s[18:19], 0x10
3057; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
3058; GCN-HSA-NEXT:    s_lshr_b32 s20, s1, 16
3059; GCN-HSA-NEXT:    s_lshr_b32 s21, s0, 16
3060; GCN-HSA-NEXT:    s_lshr_b32 s22, s3, 16
3061; GCN-HSA-NEXT:    s_lshr_b32 s23, s2, 16
3062; GCN-HSA-NEXT:    s_lshr_b32 s24, s5, 16
3063; GCN-HSA-NEXT:    s_lshr_b32 s25, s4, 16
3064; GCN-HSA-NEXT:    s_lshr_b32 s26, s7, 16
3065; GCN-HSA-NEXT:    s_lshr_b32 s27, s6, 16
3066; GCN-HSA-NEXT:    s_lshr_b32 s28, s9, 16
3067; GCN-HSA-NEXT:    s_lshr_b32 s29, s8, 16
3068; GCN-HSA-NEXT:    s_lshr_b32 s30, s11, 16
3069; GCN-HSA-NEXT:    s_lshr_b32 s31, s10, 16
3070; GCN-HSA-NEXT:    s_lshr_b32 s33, s13, 16
3071; GCN-HSA-NEXT:    s_lshr_b32 s34, s12, 16
3072; GCN-HSA-NEXT:    s_lshr_b32 s35, s15, 16
3073; GCN-HSA-NEXT:    s_lshr_b32 s52, s14, 16
3074; GCN-HSA-NEXT:    s_and_b32 s1, s1, s53
3075; GCN-HSA-NEXT:    s_and_b32 s0, s0, s53
3076; GCN-HSA-NEXT:    s_and_b32 s3, s3, s53
3077; GCN-HSA-NEXT:    s_and_b32 s2, s2, s53
3078; GCN-HSA-NEXT:    s_and_b32 s5, s5, s53
3079; GCN-HSA-NEXT:    s_and_b32 s4, s4, s53
3080; GCN-HSA-NEXT:    s_and_b32 s54, s7, s53
3081; GCN-HSA-NEXT:    s_and_b32 s55, s6, s53
3082; GCN-HSA-NEXT:    s_and_b32 s9, s9, s53
3083; GCN-HSA-NEXT:    s_and_b32 s8, s8, s53
3084; GCN-HSA-NEXT:    s_and_b32 s11, s11, s53
3085; GCN-HSA-NEXT:    s_and_b32 s10, s10, s53
3086; GCN-HSA-NEXT:    s_and_b32 s13, s13, s53
3087; GCN-HSA-NEXT:    s_and_b32 s12, s12, s53
3088; GCN-HSA-NEXT:    s_and_b32 s15, s15, s53
3089; GCN-HSA-NEXT:    s_and_b32 s14, s14, s53
3090; GCN-HSA-NEXT:    s_and_b32 s18, s37, s53
3091; GCN-HSA-NEXT:    s_and_b32 s19, s36, s53
3092; GCN-HSA-NEXT:    s_and_b32 s56, s39, s53
3093; GCN-HSA-NEXT:    s_and_b32 s57, s38, s53
3094; GCN-HSA-NEXT:    s_and_b32 s58, s41, s53
3095; GCN-HSA-NEXT:    s_and_b32 s59, s40, s53
3096; GCN-HSA-NEXT:    s_and_b32 s60, s43, s53
3097; GCN-HSA-NEXT:    s_and_b32 s61, s42, s53
3098; GCN-HSA-NEXT:    s_and_b32 s62, s45, s53
3099; GCN-HSA-NEXT:    s_and_b32 s63, s44, s53
3100; GCN-HSA-NEXT:    s_and_b32 s64, s47, s53
3101; GCN-HSA-NEXT:    s_and_b32 s65, s46, s53
3102; GCN-HSA-NEXT:    s_and_b32 s66, s49, s53
3103; GCN-HSA-NEXT:    s_and_b32 s67, s48, s53
3104; GCN-HSA-NEXT:    s_and_b32 s68, s51, s53
3105; GCN-HSA-NEXT:    s_and_b32 s53, s50, s53
3106; GCN-HSA-NEXT:    s_lshr_b32 s37, s37, 16
3107; GCN-HSA-NEXT:    s_lshr_b32 s36, s36, 16
3108; GCN-HSA-NEXT:    s_lshr_b32 s39, s39, 16
3109; GCN-HSA-NEXT:    s_lshr_b32 s38, s38, 16
3110; GCN-HSA-NEXT:    s_lshr_b32 s41, s41, 16
3111; GCN-HSA-NEXT:    s_lshr_b32 s40, s40, 16
3112; GCN-HSA-NEXT:    s_lshr_b32 s43, s43, 16
3113; GCN-HSA-NEXT:    s_lshr_b32 s42, s42, 16
3114; GCN-HSA-NEXT:    s_lshr_b32 s45, s45, 16
3115; GCN-HSA-NEXT:    s_lshr_b32 s44, s44, 16
3116; GCN-HSA-NEXT:    s_lshr_b32 s47, s47, 16
3117; GCN-HSA-NEXT:    s_lshr_b32 s46, s46, 16
3118; GCN-HSA-NEXT:    s_lshr_b32 s49, s49, 16
3119; GCN-HSA-NEXT:    s_lshr_b32 s48, s48, 16
3120; GCN-HSA-NEXT:    s_lshr_b32 s51, s51, 16
3121; GCN-HSA-NEXT:    s_lshr_b32 s50, s50, 16
3122; GCN-HSA-NEXT:    s_add_u32 s6, s16, 0xf0
3123; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
3124; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s7
3125; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s6
3126; GCN-HSA-NEXT:    s_add_u32 s6, s16, 0xe0
3127; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
3128; GCN-HSA-NEXT:    v_mov_b32_e32 v25, s7
3129; GCN-HSA-NEXT:    v_mov_b32_e32 v24, s6
3130; GCN-HSA-NEXT:    s_add_u32 s6, s16, 0xd0
3131; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
3132; GCN-HSA-NEXT:    v_mov_b32_e32 v27, s7
3133; GCN-HSA-NEXT:    v_mov_b32_e32 v26, s6
3134; GCN-HSA-NEXT:    s_add_u32 s6, s16, 0xc0
3135; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
3136; GCN-HSA-NEXT:    v_mov_b32_e32 v29, s7
3137; GCN-HSA-NEXT:    v_mov_b32_e32 v28, s6
3138; GCN-HSA-NEXT:    s_add_u32 s6, s16, 0xb0
3139; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
3140; GCN-HSA-NEXT:    v_mov_b32_e32 v31, s7
3141; GCN-HSA-NEXT:    v_mov_b32_e32 v30, s6
3142; GCN-HSA-NEXT:    s_add_u32 s6, s16, 0xa0
3143; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
3144; GCN-HSA-NEXT:    v_mov_b32_e32 v33, s7
3145; GCN-HSA-NEXT:    v_mov_b32_e32 v32, s6
3146; GCN-HSA-NEXT:    s_add_u32 s6, s16, 0x90
3147; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s67
3148; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s48
3149; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s66
3150; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s49
3151; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
3152; GCN-HSA-NEXT:    flat_store_dwordx4 v[24:25], v[4:7]
3153; GCN-HSA-NEXT:    v_mov_b32_e32 v25, s7
3154; GCN-HSA-NEXT:    v_mov_b32_e32 v24, s6
3155; GCN-HSA-NEXT:    s_add_u32 s6, s16, 0x80
3156; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
3157; GCN-HSA-NEXT:    v_mov_b32_e32 v35, s7
3158; GCN-HSA-NEXT:    v_mov_b32_e32 v34, s6
3159; GCN-HSA-NEXT:    s_add_u32 s6, s16, 0x70
3160; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s61
3161; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s42
3162; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s60
3163; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s43
3164; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
3165; GCN-HSA-NEXT:    flat_store_dwordx4 v[30:31], v[16:19]
3166; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s53
3167; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s7
3168; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s6
3169; GCN-HSA-NEXT:    s_add_u32 s6, s16, 0x60
3170; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
3171; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s50
3172; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s68
3173; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s51
3174; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s7
3175; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s65
3176; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s46
3177; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s64
3178; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s47
3179; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s63
3180; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s44
3181; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s62
3182; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s45
3183; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s59
3184; GCN-HSA-NEXT:    flat_store_dwordx4 v[21:22], v[0:3]
3185; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s40
3186; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s57
3187; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s58
3188; GCN-HSA-NEXT:    v_mov_b32_e32 v23, s41
3189; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s38
3190; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s56
3191; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s19
3192; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s39
3193; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s36
3194; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s6
3195; GCN-HSA-NEXT:    s_add_u32 s6, s16, 0x50
3196; GCN-HSA-NEXT:    flat_store_dwordx4 v[26:27], v[8:11]
3197; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s18
3198; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s14
3199; GCN-HSA-NEXT:    flat_store_dwordx4 v[28:29], v[12:15]
3200; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s37
3201; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s12
3202; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s52
3203; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s15
3204; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s35
3205; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s34
3206; GCN-HSA-NEXT:    flat_store_dwordx4 v[32:33], v[20:23]
3207; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s13
3208; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s33
3209; GCN-HSA-NEXT:    flat_store_dwordx4 v[24:25], v[0:3]
3210; GCN-HSA-NEXT:    flat_store_dwordx4 v[34:35], v[4:7]
3211; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
3212; GCN-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[12:15]
3213; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
3214; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s6
3215; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s10
3216; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s31
3217; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s11
3218; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s30
3219; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s7
3220; GCN-HSA-NEXT:    s_add_u32 s6, s16, 64
3221; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3222; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
3223; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s6
3224; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
3225; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s29
3226; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s9
3227; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s28
3228; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s7
3229; GCN-HSA-NEXT:    s_add_u32 s6, s16, 48
3230; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3231; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
3232; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s6
3233; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s55
3234; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s27
3235; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s54
3236; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s26
3237; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s7
3238; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3239; GCN-HSA-NEXT:    s_nop 0
3240; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
3241; GCN-HSA-NEXT:    s_add_u32 s4, s16, 32
3242; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
3243; GCN-HSA-NEXT:    s_addc_u32 s5, s17, 0
3244; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
3245; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s25
3246; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s24
3247; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
3248; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3249; GCN-HSA-NEXT:    s_nop 0
3250; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
3251; GCN-HSA-NEXT:    s_add_u32 s2, s16, 16
3252; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s3
3253; GCN-HSA-NEXT:    s_addc_u32 s3, s17, 0
3254; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
3255; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s23
3256; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s22
3257; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
3258; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3259; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s16
3260; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
3261; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s21
3262; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s1
3263; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s20
3264; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s17
3265; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3266; GCN-HSA-NEXT:    s_endpgm
3267;
3268; GCN-NOHSA-VI-LABEL: constant_zextload_v64i16_to_v64i32:
3269; GCN-NOHSA-VI:       ; %bb.0:
3270; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[36:39], s[0:1], 0x24
3271; GCN-NOHSA-VI-NEXT:    s_mov_b32 s40, 0xffff
3272; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
3273; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[16:31], s[38:39], 0x0
3274; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[0:15], s[38:39], 0x40
3275; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
3276; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s33, s17, 16
3277; GCN-NOHSA-VI-NEXT:    s_and_b32 s17, s17, s40
3278; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s34, s16, 16
3279; GCN-NOHSA-VI-NEXT:    s_and_b32 s16, s16, s40
3280; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s35, s19, 16
3281; GCN-NOHSA-VI-NEXT:    s_and_b32 s19, s19, s40
3282; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s38, s18, 16
3283; GCN-NOHSA-VI-NEXT:    s_and_b32 s18, s18, s40
3284; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s39, s21, 16
3285; GCN-NOHSA-VI-NEXT:    s_and_b32 s21, s21, s40
3286; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s41, s20, 16
3287; GCN-NOHSA-VI-NEXT:    s_and_b32 s20, s20, s40
3288; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s42, s23, 16
3289; GCN-NOHSA-VI-NEXT:    s_and_b32 s23, s23, s40
3290; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s43, s22, 16
3291; GCN-NOHSA-VI-NEXT:    s_and_b32 s22, s22, s40
3292; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s44, s25, 16
3293; GCN-NOHSA-VI-NEXT:    s_and_b32 s25, s25, s40
3294; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s45, s24, 16
3295; GCN-NOHSA-VI-NEXT:    s_and_b32 s24, s24, s40
3296; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s46, s27, 16
3297; GCN-NOHSA-VI-NEXT:    s_and_b32 s27, s27, s40
3298; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s47, s26, 16
3299; GCN-NOHSA-VI-NEXT:    s_and_b32 s26, s26, s40
3300; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s48, s29, 16
3301; GCN-NOHSA-VI-NEXT:    s_and_b32 s29, s29, s40
3302; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s49, s28, 16
3303; GCN-NOHSA-VI-NEXT:    s_and_b32 s28, s28, s40
3304; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s50, s31, 16
3305; GCN-NOHSA-VI-NEXT:    s_and_b32 s31, s31, s40
3306; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s51, s30, 16
3307; GCN-NOHSA-VI-NEXT:    s_and_b32 s30, s30, s40
3308; GCN-NOHSA-VI-NEXT:    s_and_b32 s53, s1, s40
3309; GCN-NOHSA-VI-NEXT:    s_and_b32 s55, s0, s40
3310; GCN-NOHSA-VI-NEXT:    s_and_b32 s57, s3, s40
3311; GCN-NOHSA-VI-NEXT:    s_and_b32 s59, s2, s40
3312; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s60, s5, 16
3313; GCN-NOHSA-VI-NEXT:    s_and_b32 s5, s5, s40
3314; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s61, s4, 16
3315; GCN-NOHSA-VI-NEXT:    s_and_b32 s4, s4, s40
3316; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s62, s7, 16
3317; GCN-NOHSA-VI-NEXT:    s_and_b32 s7, s7, s40
3318; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s63, s6, 16
3319; GCN-NOHSA-VI-NEXT:    s_and_b32 s6, s6, s40
3320; GCN-NOHSA-VI-NEXT:    s_and_b32 s64, s9, s40
3321; GCN-NOHSA-VI-NEXT:    s_and_b32 s65, s8, s40
3322; GCN-NOHSA-VI-NEXT:    s_and_b32 s66, s11, s40
3323; GCN-NOHSA-VI-NEXT:    s_and_b32 s67, s10, s40
3324; GCN-NOHSA-VI-NEXT:    s_and_b32 s68, s13, s40
3325; GCN-NOHSA-VI-NEXT:    s_and_b32 s69, s12, s40
3326; GCN-NOHSA-VI-NEXT:    s_and_b32 s70, s15, s40
3327; GCN-NOHSA-VI-NEXT:    s_and_b32 s40, s14, s40
3328; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s15, s15, 16
3329; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s14, s14, 16
3330; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s52, s1, 16
3331; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s54, s0, 16
3332; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s56, s3, 16
3333; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s58, s2, 16
3334; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s13, s13, 16
3335; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s12, s12, 16
3336; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
3337; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
3338; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s36
3339; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s37
3340; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s40
3341; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s14
3342; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s70
3343; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s15
3344; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s11, s11, 16
3345; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s10, s10, 16
3346; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
3347; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s9, s9, 16
3348; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s69
3349; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s12
3350; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s68
3351; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s13
3352; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s8, s8, 16
3353; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
3354; GCN-NOHSA-VI-NEXT:    s_nop 0
3355; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s67
3356; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s10
3357; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s66
3358; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s11
3359; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
3360; GCN-NOHSA-VI-NEXT:    s_nop 0
3361; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s65
3362; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s8
3363; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s64
3364; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s9
3365; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
3366; GCN-NOHSA-VI-NEXT:    s_nop 0
3367; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
3368; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s63
3369; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
3370; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s62
3371; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
3372; GCN-NOHSA-VI-NEXT:    s_nop 0
3373; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
3374; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s61
3375; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
3376; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s60
3377; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
3378; GCN-NOHSA-VI-NEXT:    s_nop 0
3379; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s59
3380; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s58
3381; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s57
3382; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s56
3383; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
3384; GCN-NOHSA-VI-NEXT:    s_nop 0
3385; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s55
3386; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s54
3387; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s53
3388; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s52
3389; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
3390; GCN-NOHSA-VI-NEXT:    s_nop 0
3391; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s30
3392; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s51
3393; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s31
3394; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s50
3395; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
3396; GCN-NOHSA-VI-NEXT:    s_nop 0
3397; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s28
3398; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s49
3399; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s29
3400; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s48
3401; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
3402; GCN-NOHSA-VI-NEXT:    s_nop 0
3403; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s26
3404; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s47
3405; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s27
3406; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s46
3407; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
3408; GCN-NOHSA-VI-NEXT:    s_nop 0
3409; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s24
3410; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s45
3411; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s25
3412; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s44
3413; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
3414; GCN-NOHSA-VI-NEXT:    s_nop 0
3415; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s22
3416; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s43
3417; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s23
3418; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s42
3419; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
3420; GCN-NOHSA-VI-NEXT:    s_nop 0
3421; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s20
3422; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s41
3423; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s21
3424; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s39
3425; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
3426; GCN-NOHSA-VI-NEXT:    s_nop 0
3427; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s18
3428; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s38
3429; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s19
3430; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s35
3431; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
3432; GCN-NOHSA-VI-NEXT:    s_nop 0
3433; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s16
3434; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s34
3435; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s17
3436; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s33
3437; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
3438; GCN-NOHSA-VI-NEXT:    s_endpgm
3439;
3440; EG-LABEL: constant_zextload_v64i16_to_v64i32:
3441; EG:       ; %bb.0:
3442; EG-NEXT:    ALU 0, @38, KC0[CB0:0-32], KC1[]
3443; EG-NEXT:    TEX 3 @22
3444; EG-NEXT:    ALU 55, @39, KC0[CB0:0-32], KC1[]
3445; EG-NEXT:    TEX 3 @30
3446; EG-NEXT:    ALU 87, @95, KC0[CB0:0-32], KC1[]
3447; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T66.X, 0
3448; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T49.X, 0
3449; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T64.X, 0
3450; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T50.X, 0
3451; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T61.X, 0
3452; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T51.X, 0
3453; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T56.XYZW, T58.X, 0
3454; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T54.XYZW, T52.X, 0
3455; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T53.XYZW, T55.X, 0
3456; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T39.X, 0
3457; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T48.X, 0
3458; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T40.X, 0
3459; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T46.X, 0
3460; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T41.X, 0
3461; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T43.X, 0
3462; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T36.X, 1
3463; EG-NEXT:    CF_END
3464; EG-NEXT:    Fetch clause starting at 22:
3465; EG-NEXT:     VTX_READ_128 T36.XYZW, T35.X, 0, #1
3466; EG-NEXT:     VTX_READ_128 T39.XYZW, T35.X, 48, #1
3467; EG-NEXT:     VTX_READ_128 T40.XYZW, T35.X, 32, #1
3468; EG-NEXT:     VTX_READ_128 T41.XYZW, T35.X, 16, #1
3469; EG-NEXT:    Fetch clause starting at 30:
3470; EG-NEXT:     VTX_READ_128 T49.XYZW, T35.X, 112, #1
3471; EG-NEXT:     VTX_READ_128 T50.XYZW, T35.X, 96, #1
3472; EG-NEXT:     VTX_READ_128 T51.XYZW, T35.X, 80, #1
3473; EG-NEXT:     VTX_READ_128 T52.XYZW, T35.X, 64, #1
3474; EG-NEXT:    ALU clause starting at 38:
3475; EG-NEXT:     MOV * T35.X, KC0[2].Z,
3476; EG-NEXT:    ALU clause starting at 39:
3477; EG-NEXT:     LSHR * T37.W, T36.Y, literal.x,
3478; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3479; EG-NEXT:     AND_INT * T37.Z, T36.Y, literal.x,
3480; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3481; EG-NEXT:     LSHR T37.Y, T36.X, literal.x,
3482; EG-NEXT:     LSHR * T38.W, T36.W, literal.x,
3483; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3484; EG-NEXT:     AND_INT T37.X, T36.X, literal.x,
3485; EG-NEXT:     AND_INT T38.Z, T36.W, literal.x,
3486; EG-NEXT:     LSHR * T36.X, KC0[2].Y, literal.y,
3487; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
3488; EG-NEXT:     LSHR T38.Y, T36.Z, literal.x,
3489; EG-NEXT:     LSHR * T42.W, T41.Y, literal.x,
3490; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3491; EG-NEXT:     AND_INT T38.X, T36.Z, literal.x,
3492; EG-NEXT:     AND_INT T42.Z, T41.Y, literal.x,
3493; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3494; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
3495; EG-NEXT:     LSHR T43.X, PV.W, literal.x,
3496; EG-NEXT:     LSHR T42.Y, T41.X, literal.y,
3497; EG-NEXT:     LSHR T44.W, T41.W, literal.y,
3498; EG-NEXT:     AND_INT * T42.X, T41.X, literal.z,
3499; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3500; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3501; EG-NEXT:     AND_INT T44.Z, T41.W, literal.x,
3502; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3503; EG-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
3504; EG-NEXT:     LSHR T41.X, PV.W, literal.x,
3505; EG-NEXT:     LSHR T44.Y, T41.Z, literal.y,
3506; EG-NEXT:     LSHR T45.W, T40.Y, literal.y,
3507; EG-NEXT:     AND_INT * T44.X, T41.Z, literal.z,
3508; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3509; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3510; EG-NEXT:     AND_INT T45.Z, T40.Y, literal.x,
3511; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3512; EG-NEXT:    65535(9.183409e-41), 48(6.726233e-44)
3513; EG-NEXT:     LSHR T46.X, PV.W, literal.x,
3514; EG-NEXT:     LSHR T45.Y, T40.X, literal.y,
3515; EG-NEXT:     LSHR T47.W, T40.W, literal.y,
3516; EG-NEXT:     AND_INT * T45.X, T40.X, literal.z,
3517; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3518; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3519; EG-NEXT:     AND_INT T47.Z, T40.W, literal.x,
3520; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3521; EG-NEXT:    65535(9.183409e-41), 64(8.968310e-44)
3522; EG-NEXT:     LSHR T40.X, PV.W, literal.x,
3523; EG-NEXT:     LSHR T47.Y, T40.Z, literal.y,
3524; EG-NEXT:     AND_INT * T47.X, T40.Z, literal.z,
3525; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3526; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3527; EG-NEXT:     ADD_INT T0.W, KC0[2].Y, literal.x,
3528; EG-NEXT:     LSHR * T35.W, T39.Y, literal.y,
3529; EG-NEXT:    80(1.121039e-43), 16(2.242078e-44)
3530; EG-NEXT:     LSHR T48.X, PV.W, literal.x,
3531; EG-NEXT:     AND_INT * T35.Z, T39.Y, literal.y,
3532; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
3533; EG-NEXT:    ALU clause starting at 95:
3534; EG-NEXT:     LSHR T35.Y, T39.X, literal.x,
3535; EG-NEXT:     LSHR * T53.W, T39.W, literal.x,
3536; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3537; EG-NEXT:     AND_INT T35.X, T39.X, literal.x,
3538; EG-NEXT:     AND_INT T53.Z, T39.W, literal.x,
3539; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3540; EG-NEXT:    65535(9.183409e-41), 96(1.345247e-43)
3541; EG-NEXT:     LSHR T39.X, PV.W, literal.x,
3542; EG-NEXT:     LSHR T53.Y, T39.Z, literal.y,
3543; EG-NEXT:     LSHR T54.W, T52.Y, literal.y,
3544; EG-NEXT:     AND_INT * T53.X, T39.Z, literal.z,
3545; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3546; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3547; EG-NEXT:     AND_INT T54.Z, T52.Y, literal.x,
3548; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3549; EG-NEXT:    65535(9.183409e-41), 112(1.569454e-43)
3550; EG-NEXT:     LSHR T55.X, PV.W, literal.x,
3551; EG-NEXT:     LSHR T54.Y, T52.X, literal.y,
3552; EG-NEXT:     LSHR T56.W, T52.W, literal.y,
3553; EG-NEXT:     AND_INT * T54.X, T52.X, literal.z,
3554; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3555; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3556; EG-NEXT:     AND_INT T56.Z, T52.W, literal.x,
3557; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3558; EG-NEXT:    65535(9.183409e-41), 128(1.793662e-43)
3559; EG-NEXT:     LSHR T52.X, PV.W, literal.x,
3560; EG-NEXT:     LSHR T56.Y, T52.Z, literal.y,
3561; EG-NEXT:     LSHR T57.W, T51.Y, literal.y,
3562; EG-NEXT:     AND_INT * T56.X, T52.Z, literal.z,
3563; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3564; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3565; EG-NEXT:     AND_INT T57.Z, T51.Y, literal.x,
3566; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3567; EG-NEXT:    65535(9.183409e-41), 144(2.017870e-43)
3568; EG-NEXT:     LSHR T58.X, PV.W, literal.x,
3569; EG-NEXT:     LSHR T57.Y, T51.X, literal.y,
3570; EG-NEXT:     LSHR T59.W, T51.W, literal.y,
3571; EG-NEXT:     AND_INT * T57.X, T51.X, literal.z,
3572; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3573; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3574; EG-NEXT:     AND_INT T59.Z, T51.W, literal.x,
3575; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3576; EG-NEXT:    65535(9.183409e-41), 160(2.242078e-43)
3577; EG-NEXT:     LSHR T51.X, PV.W, literal.x,
3578; EG-NEXT:     LSHR T59.Y, T51.Z, literal.y,
3579; EG-NEXT:     LSHR T60.W, T50.Y, literal.y,
3580; EG-NEXT:     AND_INT * T59.X, T51.Z, literal.z,
3581; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3582; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3583; EG-NEXT:     AND_INT T60.Z, T50.Y, literal.x,
3584; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3585; EG-NEXT:    65535(9.183409e-41), 176(2.466285e-43)
3586; EG-NEXT:     LSHR T61.X, PV.W, literal.x,
3587; EG-NEXT:     LSHR T60.Y, T50.X, literal.y,
3588; EG-NEXT:     LSHR T62.W, T50.W, literal.y,
3589; EG-NEXT:     AND_INT * T60.X, T50.X, literal.z,
3590; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3591; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3592; EG-NEXT:     AND_INT T62.Z, T50.W, literal.x,
3593; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3594; EG-NEXT:    65535(9.183409e-41), 192(2.690493e-43)
3595; EG-NEXT:     LSHR T50.X, PV.W, literal.x,
3596; EG-NEXT:     LSHR T62.Y, T50.Z, literal.y,
3597; EG-NEXT:     LSHR T63.W, T49.Y, literal.y,
3598; EG-NEXT:     AND_INT * T62.X, T50.Z, literal.z,
3599; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3600; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3601; EG-NEXT:     AND_INT T63.Z, T49.Y, literal.x,
3602; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3603; EG-NEXT:    65535(9.183409e-41), 208(2.914701e-43)
3604; EG-NEXT:     LSHR T64.X, PV.W, literal.x,
3605; EG-NEXT:     LSHR T63.Y, T49.X, literal.y,
3606; EG-NEXT:     LSHR T65.W, T49.W, literal.y,
3607; EG-NEXT:     AND_INT * T63.X, T49.X, literal.z,
3608; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3609; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3610; EG-NEXT:     AND_INT T65.Z, T49.W, literal.x,
3611; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3612; EG-NEXT:    65535(9.183409e-41), 224(3.138909e-43)
3613; EG-NEXT:     LSHR T49.X, PV.W, literal.x,
3614; EG-NEXT:     LSHR T65.Y, T49.Z, literal.y,
3615; EG-NEXT:     AND_INT * T65.X, T49.Z, literal.z,
3616; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3617; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3618; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
3619; EG-NEXT:    240(3.363116e-43), 0(0.000000e+00)
3620; EG-NEXT:     LSHR * T66.X, PV.W, literal.x,
3621; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
3622  %load = load <64 x i16>, <64 x i16> addrspace(4)* %in
3623  %ext = zext <64 x i16> %load to <64 x i32>
3624  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
3625  ret void
3626}
3627
3628define amdgpu_kernel void @constant_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
3629; GCN-NOHSA-SI-LABEL: constant_sextload_v64i16_to_v64i32:
3630; GCN-NOHSA-SI:       ; %bb.0:
3631; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[36:39], s[0:1], 0x9
3632; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
3633; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[0:15], s[38:39], 0x0
3634; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[16:31], s[38:39], 0x10
3635; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
3636; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s33, s1, 16
3637; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s34, s0, 16
3638; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s35, s1
3639; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s38, s0
3640; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s39, s3, 16
3641; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s40, s2, 16
3642; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s41, s3
3643; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s42, s2
3644; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s43, s5, 16
3645; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s44, s4, 16
3646; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s5
3647; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s4, s4
3648; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s45, s7, 16
3649; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s46, s6, 16
3650; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s7, s7
3651; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s6, s6
3652; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s47, s9, 16
3653; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s48, s8, 16
3654; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s9, s9
3655; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s8, s8
3656; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s49, s11, 16
3657; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s50, s10, 16
3658; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s11, s11
3659; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s10, s10
3660; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s51, s13, 16
3661; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s52, s12, 16
3662; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s13, s13
3663; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s12, s12
3664; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s53, s15, 16
3665; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s54, s14, 16
3666; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s15, s15
3667; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s14, s14
3668; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s55, s17, 16
3669; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s56, s16, 16
3670; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s17, s17
3671; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s16, s16
3672; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s57, s19, 16
3673; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s58, s18, 16
3674; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s19, s19
3675; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s18, s18
3676; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s59, s21, 16
3677; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s60, s20, 16
3678; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s21, s21
3679; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s20, s20
3680; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s61, s22, 16
3681; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s62, s23
3682; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s22, s22
3683; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s63, s25, 16
3684; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s64, s24, 16
3685; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s25, s25
3686; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s24, s24
3687; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s65, s27, 16
3688; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s66, s26, 16
3689; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s27, s27
3690; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s26, s26
3691; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s67, s29, 16
3692; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s68, s28, 16
3693; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s29, s29
3694; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s28, s28
3695; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s69, s31, 16
3696; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s70, s30, 16
3697; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s31, s31
3698; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s30, s30
3699; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s23, s23, 16
3700; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s36
3701; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s37
3702; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
3703; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
3704; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s30
3705; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s70
3706; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s31
3707; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s69
3708; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s28
3709; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s68
3710; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s29
3711; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s67
3712; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s26
3713; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s66
3714; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s27
3715; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s65
3716; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s24
3717; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s64
3718; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s25
3719; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s63
3720; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s22
3721; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s61
3722; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, s62
3723; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v20, s20
3724; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, s23
3725; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v21, s60
3726; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v22, s21
3727; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v23, s59
3728; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
3729; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224
3730; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
3731; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192
3732; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176
3733; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:160
3734; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(5)
3735; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s18
3736; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s58
3737; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s19
3738; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s57
3739; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
3740; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3741; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s16
3742; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s56
3743; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s17
3744; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s55
3745; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
3746; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3747; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s14
3748; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s54
3749; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s15
3750; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s53
3751; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
3752; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3753; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s12
3754; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s52
3755; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s13
3756; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s51
3757; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
3758; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3759; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
3760; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s50
3761; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s11
3762; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s49
3763; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
3764; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3765; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
3766; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s48
3767; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s9
3768; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s47
3769; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
3770; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3771; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
3772; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s46
3773; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
3774; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s45
3775; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
3776; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3777; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
3778; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s44
3779; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
3780; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s43
3781; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
3782; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3783; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s42
3784; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s40
3785; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s41
3786; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s39
3787; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
3788; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3789; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s38
3790; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s34
3791; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s35
3792; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s33
3793; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
3794; GCN-NOHSA-SI-NEXT:    s_endpgm
3795;
3796; GCN-HSA-LABEL: constant_sextload_v64i16_to_v64i32:
3797; GCN-HSA:       ; %bb.0:
3798; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
3799; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
3800; GCN-HSA-NEXT:    s_load_dwordx16 s[16:31], s[2:3], 0x0
3801; GCN-HSA-NEXT:    s_load_dwordx16 s[36:51], s[2:3], 0x10
3802; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
3803; GCN-HSA-NEXT:    s_ashr_i32 s4, s17, 16
3804; GCN-HSA-NEXT:    s_ashr_i32 s5, s16, 16
3805; GCN-HSA-NEXT:    s_sext_i32_i16 s6, s17
3806; GCN-HSA-NEXT:    s_sext_i32_i16 s7, s16
3807; GCN-HSA-NEXT:    s_ashr_i32 s8, s19, 16
3808; GCN-HSA-NEXT:    s_ashr_i32 s9, s18, 16
3809; GCN-HSA-NEXT:    s_sext_i32_i16 s10, s19
3810; GCN-HSA-NEXT:    s_sext_i32_i16 s11, s18
3811; GCN-HSA-NEXT:    s_ashr_i32 s12, s21, 16
3812; GCN-HSA-NEXT:    s_ashr_i32 s13, s20, 16
3813; GCN-HSA-NEXT:    s_sext_i32_i16 s14, s21
3814; GCN-HSA-NEXT:    s_sext_i32_i16 s15, s20
3815; GCN-HSA-NEXT:    s_ashr_i32 s16, s23, 16
3816; GCN-HSA-NEXT:    s_ashr_i32 s17, s22, 16
3817; GCN-HSA-NEXT:    s_sext_i32_i16 s18, s23
3818; GCN-HSA-NEXT:    s_sext_i32_i16 s19, s22
3819; GCN-HSA-NEXT:    s_ashr_i32 s20, s25, 16
3820; GCN-HSA-NEXT:    s_ashr_i32 s21, s24, 16
3821; GCN-HSA-NEXT:    s_sext_i32_i16 s22, s25
3822; GCN-HSA-NEXT:    s_sext_i32_i16 s23, s24
3823; GCN-HSA-NEXT:    s_ashr_i32 s24, s27, 16
3824; GCN-HSA-NEXT:    s_ashr_i32 s25, s26, 16
3825; GCN-HSA-NEXT:    s_ashr_i32 s33, s29, 16
3826; GCN-HSA-NEXT:    s_ashr_i32 s34, s28, 16
3827; GCN-HSA-NEXT:    s_ashr_i32 s35, s31, 16
3828; GCN-HSA-NEXT:    s_ashr_i32 s52, s30, 16
3829; GCN-HSA-NEXT:    s_ashr_i32 s53, s37, 16
3830; GCN-HSA-NEXT:    s_ashr_i32 s54, s36, 16
3831; GCN-HSA-NEXT:    s_ashr_i32 s55, s39, 16
3832; GCN-HSA-NEXT:    s_ashr_i32 s56, s38, 16
3833; GCN-HSA-NEXT:    s_ashr_i32 s57, s41, 16
3834; GCN-HSA-NEXT:    s_ashr_i32 s58, s40, 16
3835; GCN-HSA-NEXT:    s_ashr_i32 s59, s43, 16
3836; GCN-HSA-NEXT:    s_ashr_i32 s60, s42, 16
3837; GCN-HSA-NEXT:    s_ashr_i32 s61, s45, 16
3838; GCN-HSA-NEXT:    s_ashr_i32 s62, s44, 16
3839; GCN-HSA-NEXT:    s_ashr_i32 s63, s47, 16
3840; GCN-HSA-NEXT:    s_ashr_i32 s64, s46, 16
3841; GCN-HSA-NEXT:    s_ashr_i32 s65, s49, 16
3842; GCN-HSA-NEXT:    s_ashr_i32 s66, s48, 16
3843; GCN-HSA-NEXT:    s_ashr_i32 s67, s51, 16
3844; GCN-HSA-NEXT:    s_ashr_i32 s68, s50, 16
3845; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xf0
3846; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3847; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s3
3848; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s2
3849; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xe0
3850; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3851; GCN-HSA-NEXT:    v_mov_b32_e32 v25, s3
3852; GCN-HSA-NEXT:    v_mov_b32_e32 v24, s2
3853; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xd0
3854; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3855; GCN-HSA-NEXT:    v_mov_b32_e32 v27, s3
3856; GCN-HSA-NEXT:    v_mov_b32_e32 v26, s2
3857; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xc0
3858; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3859; GCN-HSA-NEXT:    v_mov_b32_e32 v29, s3
3860; GCN-HSA-NEXT:    v_mov_b32_e32 v28, s2
3861; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xb0
3862; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3863; GCN-HSA-NEXT:    v_mov_b32_e32 v31, s3
3864; GCN-HSA-NEXT:    v_mov_b32_e32 v30, s2
3865; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xa0
3866; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3867; GCN-HSA-NEXT:    v_mov_b32_e32 v33, s3
3868; GCN-HSA-NEXT:    s_sext_i32_i16 s49, s49
3869; GCN-HSA-NEXT:    s_sext_i32_i16 s48, s48
3870; GCN-HSA-NEXT:    v_mov_b32_e32 v32, s2
3871; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x90
3872; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s48
3873; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s66
3874; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s49
3875; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s65
3876; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3877; GCN-HSA-NEXT:    flat_store_dwordx4 v[24:25], v[4:7]
3878; GCN-HSA-NEXT:    v_mov_b32_e32 v25, s3
3879; GCN-HSA-NEXT:    v_mov_b32_e32 v24, s2
3880; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x80
3881; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3882; GCN-HSA-NEXT:    v_mov_b32_e32 v35, s3
3883; GCN-HSA-NEXT:    s_sext_i32_i16 s43, s43
3884; GCN-HSA-NEXT:    s_sext_i32_i16 s42, s42
3885; GCN-HSA-NEXT:    v_mov_b32_e32 v34, s2
3886; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x70
3887; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s42
3888; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s60
3889; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s43
3890; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s59
3891; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3892; GCN-HSA-NEXT:    flat_store_dwordx4 v[30:31], v[16:19]
3893; GCN-HSA-NEXT:    s_sext_i32_i16 s51, s51
3894; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s3
3895; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s2
3896; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x60
3897; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3898; GCN-HSA-NEXT:    s_sext_i32_i16 s50, s50
3899; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s3
3900; GCN-HSA-NEXT:    s_sext_i32_i16 s36, s36
3901; GCN-HSA-NEXT:    s_sext_i32_i16 s39, s39
3902; GCN-HSA-NEXT:    s_sext_i32_i16 s38, s38
3903; GCN-HSA-NEXT:    s_sext_i32_i16 s41, s41
3904; GCN-HSA-NEXT:    s_sext_i32_i16 s40, s40
3905; GCN-HSA-NEXT:    s_sext_i32_i16 s45, s45
3906; GCN-HSA-NEXT:    s_sext_i32_i16 s44, s44
3907; GCN-HSA-NEXT:    s_sext_i32_i16 s47, s47
3908; GCN-HSA-NEXT:    s_sext_i32_i16 s46, s46
3909; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s50
3910; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s68
3911; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s51
3912; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s67
3913; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s2
3914; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x50
3915; GCN-HSA-NEXT:    s_sext_i32_i16 s29, s29
3916; GCN-HSA-NEXT:    s_sext_i32_i16 s28, s28
3917; GCN-HSA-NEXT:    s_sext_i32_i16 s31, s31
3918; GCN-HSA-NEXT:    s_sext_i32_i16 s30, s30
3919; GCN-HSA-NEXT:    s_sext_i32_i16 s37, s37
3920; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s46
3921; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s64
3922; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s47
3923; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s63
3924; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s44
3925; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s62
3926; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s45
3927; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s61
3928; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s40
3929; GCN-HSA-NEXT:    flat_store_dwordx4 v[21:22], v[0:3]
3930; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s58
3931; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s38
3932; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s41
3933; GCN-HSA-NEXT:    v_mov_b32_e32 v23, s57
3934; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s56
3935; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s39
3936; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s36
3937; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s55
3938; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s54
3939; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3940; GCN-HSA-NEXT:    flat_store_dwordx4 v[26:27], v[8:11]
3941; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s37
3942; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s30
3943; GCN-HSA-NEXT:    flat_store_dwordx4 v[28:29], v[12:15]
3944; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s53
3945; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s28
3946; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s52
3947; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s31
3948; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s35
3949; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s34
3950; GCN-HSA-NEXT:    flat_store_dwordx4 v[32:33], v[20:23]
3951; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s29
3952; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s33
3953; GCN-HSA-NEXT:    flat_store_dwordx4 v[24:25], v[0:3]
3954; GCN-HSA-NEXT:    flat_store_dwordx4 v[34:35], v[4:7]
3955; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
3956; GCN-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[12:15]
3957; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
3958; GCN-HSA-NEXT:    s_sext_i32_i16 s27, s27
3959; GCN-HSA-NEXT:    s_sext_i32_i16 s26, s26
3960; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
3961; GCN-HSA-NEXT:    s_add_u32 s2, s0, 64
3962; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s26
3963; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s25
3964; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s27
3965; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s24
3966; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3967; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3968; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
3969; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
3970; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
3971; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s23
3972; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s21
3973; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s22
3974; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s20
3975; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3976; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3977; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
3978; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
3979; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
3980; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s19
3981; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s17
3982; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s18
3983; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s16
3984; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3985; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3986; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
3987; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
3988; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
3989; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s15
3990; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s13
3991; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s14
3992; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s12
3993; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3994; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3995; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
3996; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s11
3997; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s9
3998; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s10
3999; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s8
4000; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
4001; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4002; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
4003; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s7
4004; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
4005; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s6
4006; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s4
4007; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
4008; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4009; GCN-HSA-NEXT:    s_endpgm
4010;
4011; GCN-NOHSA-VI-LABEL: constant_sextload_v64i16_to_v64i32:
4012; GCN-NOHSA-VI:       ; %bb.0:
4013; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[36:39], s[0:1], 0x24
4014; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4015; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[16:31], s[38:39], 0x0
4016; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[0:15], s[38:39], 0x40
4017; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4018; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s49, s31, 16
4019; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s69, s15, 16
4020; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s70, s14, 16
4021; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s15, s15
4022; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s14, s14
4023; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s51, s1, 16
4024; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s52, s0, 16
4025; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s53, s1
4026; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s54, s0
4027; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s55, s3, 16
4028; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s56, s2, 16
4029; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s57, s3
4030; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s58, s2
4031; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s67, s13, 16
4032; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s68, s12, 16
4033; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s13, s13
4034; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s12, s12
4035; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
4036; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
4037; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s36
4038; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s37
4039; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s14
4040; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s70
4041; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s15
4042; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s69
4043; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s65, s11, 16
4044; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s66, s10, 16
4045; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s11, s11
4046; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s10, s10
4047; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
4048; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s63, s9, 16
4049; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s12
4050; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s68
4051; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s13
4052; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s67
4053; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s64, s8, 16
4054; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s9, s9
4055; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s8, s8
4056; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
4057; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s61, s7, 16
4058; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
4059; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s66
4060; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
4061; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s65
4062; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s62, s6, 16
4063; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s7, s7
4064; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s6, s6
4065; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
4066; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s59, s5, 16
4067; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
4068; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s64
4069; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
4070; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s63
4071; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s60, s4, 16
4072; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s5, s5
4073; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s4, s4
4074; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
4075; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s50, s30, 16
4076; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
4077; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s62
4078; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
4079; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s61
4080; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
4081; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s31, s31
4082; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
4083; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s60
4084; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
4085; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s59
4086; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
4087; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s30, s30
4088; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s58
4089; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s56
4090; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s57
4091; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s55
4092; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
4093; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s47, s29, 16
4094; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s54
4095; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s52
4096; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s53
4097; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s51
4098; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s48, s28, 16
4099; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s29, s29
4100; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s28, s28
4101; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
4102; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s45, s27, 16
4103; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s30
4104; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s50
4105; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s31
4106; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s49
4107; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s46, s26, 16
4108; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s27, s27
4109; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s26, s26
4110; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
4111; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s43, s25, 16
4112; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s28
4113; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s48
4114; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s29
4115; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s47
4116; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s44, s24, 16
4117; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s25, s25
4118; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s24, s24
4119; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
4120; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s41, s23, 16
4121; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s26
4122; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s46
4123; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s27
4124; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s45
4125; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s42, s22, 16
4126; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s23, s23
4127; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s22, s22
4128; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
4129; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s39, s21, 16
4130; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s24
4131; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s44
4132; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s25
4133; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s43
4134; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s40, s20, 16
4135; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s21, s21
4136; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s20, s20
4137; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
4138; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s35, s19, 16
4139; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s22
4140; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s42
4141; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s23
4142; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s41
4143; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s38, s18, 16
4144; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s19, s19
4145; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s18, s18
4146; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
4147; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s33, s17, 16
4148; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s20
4149; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s40
4150; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s21
4151; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s39
4152; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s34, s16, 16
4153; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s17, s17
4154; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s16, s16
4155; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
4156; GCN-NOHSA-VI-NEXT:    s_nop 0
4157; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s18
4158; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s38
4159; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s19
4160; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s35
4161; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
4162; GCN-NOHSA-VI-NEXT:    s_nop 0
4163; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s16
4164; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s34
4165; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s17
4166; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s33
4167; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
4168; GCN-NOHSA-VI-NEXT:    s_endpgm
4169;
4170; EG-LABEL: constant_sextload_v64i16_to_v64i32:
4171; EG:       ; %bb.0:
4172; EG-NEXT:    ALU 17, @38, KC0[CB0:0-32], KC1[]
4173; EG-NEXT:    TEX 7 @22
4174; EG-NEXT:    ALU 75, @56, KC0[CB0:0-32], KC1[]
4175; EG-NEXT:    ALU 71, @132, KC0[CB0:0-32], KC1[]
4176; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T66.XYZW, T48.X, 0
4177; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T41.X, 0
4178; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T56.X, 0
4179; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T55.X, 0
4180; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T64.XYZW, T54.X, 0
4181; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T53.X, 0
4182; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T52.X, 0
4183; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T51.X, 0
4184; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T50.X, 0
4185; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T49.X, 0
4186; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T61.XYZW, T40.X, 0
4187; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T39.X, 0
4188; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T38.X, 0
4189; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T37.X, 0
4190; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T58.XYZW, T36.X, 0
4191; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T35.X, 1
4192; EG-NEXT:    CF_END
4193; EG-NEXT:    PAD
4194; EG-NEXT:    Fetch clause starting at 22:
4195; EG-NEXT:     VTX_READ_128 T42.XYZW, T41.X, 16, #1
4196; EG-NEXT:     VTX_READ_128 T43.XYZW, T41.X, 32, #1
4197; EG-NEXT:     VTX_READ_128 T44.XYZW, T41.X, 0, #1
4198; EG-NEXT:     VTX_READ_128 T45.XYZW, T41.X, 48, #1
4199; EG-NEXT:     VTX_READ_128 T46.XYZW, T41.X, 64, #1
4200; EG-NEXT:     VTX_READ_128 T47.XYZW, T41.X, 80, #1
4201; EG-NEXT:     VTX_READ_128 T48.XYZW, T41.X, 96, #1
4202; EG-NEXT:     VTX_READ_128 T41.XYZW, T41.X, 112, #1
4203; EG-NEXT:    ALU clause starting at 38:
4204; EG-NEXT:     LSHR T35.X, KC0[2].Y, literal.x,
4205; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4206; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4207; EG-NEXT:     LSHR T36.X, PV.W, literal.x,
4208; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4209; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
4210; EG-NEXT:     LSHR T37.X, PV.W, literal.x,
4211; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4212; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
4213; EG-NEXT:     LSHR T38.X, PV.W, literal.x,
4214; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4215; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
4216; EG-NEXT:     LSHR T39.X, PV.W, literal.x,
4217; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4218; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
4219; EG-NEXT:     LSHR T40.X, PV.W, literal.x,
4220; EG-NEXT:     MOV * T41.X, KC0[2].Z,
4221; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4222; EG-NEXT:    ALU clause starting at 56:
4223; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
4224; EG-NEXT:    96(1.345247e-43), 0(0.000000e+00)
4225; EG-NEXT:     LSHR T49.X, PV.W, literal.x,
4226; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4227; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
4228; EG-NEXT:     LSHR T50.X, PV.W, literal.x,
4229; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4230; EG-NEXT:    2(2.802597e-45), 128(1.793662e-43)
4231; EG-NEXT:     LSHR T51.X, PV.W, literal.x,
4232; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4233; EG-NEXT:    2(2.802597e-45), 144(2.017870e-43)
4234; EG-NEXT:     LSHR T52.X, PV.W, literal.x,
4235; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4236; EG-NEXT:    2(2.802597e-45), 160(2.242078e-43)
4237; EG-NEXT:     LSHR T53.X, PV.W, literal.x,
4238; EG-NEXT:     LSHR T0.Y, T41.W, literal.y,
4239; EG-NEXT:     LSHR T0.Z, T41.Y, literal.y,
4240; EG-NEXT:     LSHR T0.W, T48.W, literal.y, BS:VEC_120/SCL_212
4241; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
4242; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4243; EG-NEXT:    176(2.466285e-43), 0(0.000000e+00)
4244; EG-NEXT:     LSHR T54.X, PS, literal.x,
4245; EG-NEXT:     LSHR T1.Y, T48.Y, literal.y,
4246; EG-NEXT:     LSHR T1.Z, T47.W, literal.y,
4247; EG-NEXT:     LSHR T1.W, T47.Y, literal.y, BS:VEC_120/SCL_212
4248; EG-NEXT:     ADD_INT * T2.W, KC0[2].Y, literal.z,
4249; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4250; EG-NEXT:    192(2.690493e-43), 0(0.000000e+00)
4251; EG-NEXT:     LSHR T55.X, PS, literal.x,
4252; EG-NEXT:     LSHR T2.Y, T46.W, literal.y,
4253; EG-NEXT:     LSHR T2.Z, T46.Y, literal.y,
4254; EG-NEXT:     LSHR T2.W, T45.W, literal.y, BS:VEC_120/SCL_212
4255; EG-NEXT:     ADD_INT * T3.W, KC0[2].Y, literal.z,
4256; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4257; EG-NEXT:    208(2.914701e-43), 0(0.000000e+00)
4258; EG-NEXT:     LSHR T56.X, PS, literal.x,
4259; EG-NEXT:     LSHR T3.Y, T45.Y, literal.y,
4260; EG-NEXT:     BFE_INT T57.Z, T44.Y, 0.0, literal.y, BS:VEC_120/SCL_212
4261; EG-NEXT:     LSHR T3.W, T43.W, literal.y,
4262; EG-NEXT:     LSHR * T4.W, T43.Y, literal.y,
4263; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4264; EG-NEXT:     BFE_INT T57.X, T44.X, 0.0, literal.x,
4265; EG-NEXT:     LSHR T4.Y, T42.W, literal.x,
4266; EG-NEXT:     BFE_INT T58.Z, T44.W, 0.0, literal.x, BS:VEC_120/SCL_212
4267; EG-NEXT:     LSHR T5.W, T42.Y, literal.x,
4268; EG-NEXT:     LSHR * T6.W, T44.Y, literal.x,
4269; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4270; EG-NEXT:     BFE_INT T58.X, T44.Z, 0.0, literal.x,
4271; EG-NEXT:     LSHR T5.Y, T44.W, literal.x,
4272; EG-NEXT:     BFE_INT T59.Z, T42.Y, 0.0, literal.x,
4273; EG-NEXT:     BFE_INT T57.W, PS, 0.0, literal.x,
4274; EG-NEXT:     LSHR * T6.W, T44.X, literal.x,
4275; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4276; EG-NEXT:     BFE_INT T59.X, T42.X, 0.0, literal.x,
4277; EG-NEXT:     BFE_INT T57.Y, PS, 0.0, literal.x,
4278; EG-NEXT:     BFE_INT T60.Z, T42.W, 0.0, literal.x,
4279; EG-NEXT:     BFE_INT T58.W, PV.Y, 0.0, literal.x,
4280; EG-NEXT:     LSHR * T6.W, T44.Z, literal.x,
4281; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4282; EG-NEXT:     BFE_INT T60.X, T42.Z, 0.0, literal.x,
4283; EG-NEXT:     BFE_INT T58.Y, PS, 0.0, literal.x,
4284; EG-NEXT:     BFE_INT T44.Z, T43.Y, 0.0, literal.x,
4285; EG-NEXT:     BFE_INT T59.W, T5.W, 0.0, literal.x,
4286; EG-NEXT:     LSHR * T5.W, T42.X, literal.x,
4287; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4288; EG-NEXT:     BFE_INT T44.X, T43.X, 0.0, literal.x,
4289; EG-NEXT:     BFE_INT T59.Y, PS, 0.0, literal.x,
4290; EG-NEXT:     BFE_INT T61.Z, T43.W, 0.0, literal.x,
4291; EG-NEXT:     BFE_INT T60.W, T4.Y, 0.0, literal.x,
4292; EG-NEXT:     LSHR * T5.W, T42.Z, literal.x,
4293; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4294; EG-NEXT:     BFE_INT T61.X, T43.Z, 0.0, literal.x,
4295; EG-NEXT:     BFE_INT T60.Y, PS, 0.0, literal.x,
4296; EG-NEXT:     BFE_INT T42.Z, T45.Y, 0.0, literal.x,
4297; EG-NEXT:     BFE_INT * T44.W, T4.W, 0.0, literal.x,
4298; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4299; EG-NEXT:    ALU clause starting at 132:
4300; EG-NEXT:     LSHR * T4.W, T43.X, literal.x,
4301; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4302; EG-NEXT:     BFE_INT T42.X, T45.X, 0.0, literal.x,
4303; EG-NEXT:     BFE_INT T44.Y, PV.W, 0.0, literal.x,
4304; EG-NEXT:     BFE_INT T62.Z, T45.W, 0.0, literal.x,
4305; EG-NEXT:     BFE_INT T61.W, T3.W, 0.0, literal.x, BS:VEC_120/SCL_212
4306; EG-NEXT:     LSHR * T3.W, T43.Z, literal.x,
4307; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4308; EG-NEXT:     BFE_INT T62.X, T45.Z, 0.0, literal.x,
4309; EG-NEXT:     BFE_INT T61.Y, PS, 0.0, literal.x,
4310; EG-NEXT:     BFE_INT T43.Z, T46.Y, 0.0, literal.x,
4311; EG-NEXT:     BFE_INT T42.W, T3.Y, 0.0, literal.x, BS:VEC_120/SCL_212
4312; EG-NEXT:     LSHR * T3.W, T45.X, literal.x,
4313; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4314; EG-NEXT:     BFE_INT T43.X, T46.X, 0.0, literal.x,
4315; EG-NEXT:     BFE_INT T42.Y, PS, 0.0, literal.x,
4316; EG-NEXT:     BFE_INT T63.Z, T46.W, 0.0, literal.x,
4317; EG-NEXT:     BFE_INT T62.W, T2.W, 0.0, literal.x, BS:VEC_120/SCL_212
4318; EG-NEXT:     LSHR * T2.W, T45.Z, literal.x,
4319; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4320; EG-NEXT:     BFE_INT T63.X, T46.Z, 0.0, literal.x,
4321; EG-NEXT:     BFE_INT T62.Y, PS, 0.0, literal.x,
4322; EG-NEXT:     BFE_INT T45.Z, T47.Y, 0.0, literal.x,
4323; EG-NEXT:     BFE_INT T43.W, T2.Z, 0.0, literal.x, BS:VEC_120/SCL_212
4324; EG-NEXT:     LSHR * T2.W, T46.X, literal.x,
4325; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4326; EG-NEXT:     BFE_INT T45.X, T47.X, 0.0, literal.x,
4327; EG-NEXT:     BFE_INT T43.Y, PS, 0.0, literal.x,
4328; EG-NEXT:     BFE_INT T64.Z, T47.W, 0.0, literal.x,
4329; EG-NEXT:     BFE_INT T63.W, T2.Y, 0.0, literal.x,
4330; EG-NEXT:     LSHR * T2.W, T46.Z, literal.x,
4331; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4332; EG-NEXT:     BFE_INT T64.X, T47.Z, 0.0, literal.x,
4333; EG-NEXT:     BFE_INT T63.Y, PS, 0.0, literal.x,
4334; EG-NEXT:     BFE_INT T46.Z, T48.Y, 0.0, literal.x,
4335; EG-NEXT:     BFE_INT T45.W, T1.W, 0.0, literal.x,
4336; EG-NEXT:     LSHR * T1.W, T47.X, literal.x,
4337; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4338; EG-NEXT:     BFE_INT T46.X, T48.X, 0.0, literal.x,
4339; EG-NEXT:     BFE_INT T45.Y, PS, 0.0, literal.x,
4340; EG-NEXT:     BFE_INT T65.Z, T48.W, 0.0, literal.x,
4341; EG-NEXT:     BFE_INT T64.W, T1.Z, 0.0, literal.x,
4342; EG-NEXT:     LSHR * T1.W, T47.Z, literal.x,
4343; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4344; EG-NEXT:     BFE_INT T65.X, T48.Z, 0.0, literal.x,
4345; EG-NEXT:     BFE_INT T64.Y, PS, 0.0, literal.x,
4346; EG-NEXT:     BFE_INT T47.Z, T41.Y, 0.0, literal.x,
4347; EG-NEXT:     BFE_INT T46.W, T1.Y, 0.0, literal.x, BS:VEC_120/SCL_212
4348; EG-NEXT:     LSHR * T1.W, T48.X, literal.x,
4349; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4350; EG-NEXT:     BFE_INT T47.X, T41.X, 0.0, literal.x,
4351; EG-NEXT:     BFE_INT T46.Y, PS, 0.0, literal.x,
4352; EG-NEXT:     BFE_INT T66.Z, T41.W, 0.0, literal.x,
4353; EG-NEXT:     BFE_INT T65.W, T0.W, 0.0, literal.x, BS:VEC_120/SCL_212
4354; EG-NEXT:     LSHR * T0.W, T48.Z, literal.x,
4355; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4356; EG-NEXT:     BFE_INT T66.X, T41.Z, 0.0, literal.x,
4357; EG-NEXT:     BFE_INT T65.Y, PS, 0.0, literal.x,
4358; EG-NEXT:     LSHR T1.Z, T41.X, literal.x,
4359; EG-NEXT:     BFE_INT T47.W, T0.Z, 0.0, literal.x, BS:VEC_120/SCL_212
4360; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4361; EG-NEXT:    16(2.242078e-44), 224(3.138909e-43)
4362; EG-NEXT:     LSHR T41.X, PS, literal.x,
4363; EG-NEXT:     BFE_INT T47.Y, PV.Z, 0.0, literal.y,
4364; EG-NEXT:     LSHR T0.Z, T41.Z, literal.y,
4365; EG-NEXT:     BFE_INT T66.W, T0.Y, 0.0, literal.y,
4366; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
4367; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4368; EG-NEXT:    240(3.363116e-43), 0(0.000000e+00)
4369; EG-NEXT:     LSHR T48.X, PS, literal.x,
4370; EG-NEXT:     BFE_INT * T66.Y, PV.Z, 0.0, literal.y,
4371; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4372  %load = load <64 x i16>, <64 x i16> addrspace(4)* %in
4373  %ext = sext <64 x i16> %load to <64 x i32>
4374  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
4375  ret void
4376}
4377
4378define amdgpu_kernel void @constant_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
4379; GCN-NOHSA-SI-LABEL: constant_zextload_i16_to_i64:
4380; GCN-NOHSA-SI:       ; %bb.0:
4381; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
4382; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
4383; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
4384; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
4385; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
4386; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4387; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
4388; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
4389; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
4390; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
4391; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
4392; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
4393; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
4394; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4395; GCN-NOHSA-SI-NEXT:    s_endpgm
4396;
4397; GCN-HSA-LABEL: constant_zextload_i16_to_i64:
4398; GCN-HSA:       ; %bb.0:
4399; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
4400; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4401; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
4402; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
4403; GCN-HSA-NEXT:    flat_load_ushort v0, v[0:1]
4404; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
4405; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
4406; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
4407; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
4408; GCN-HSA-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
4409; GCN-HSA-NEXT:    s_endpgm
4410;
4411; GCN-NOHSA-VI-LABEL: constant_zextload_i16_to_i64:
4412; GCN-NOHSA-VI:       ; %bb.0:
4413; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
4414; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
4415; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
4416; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
4417; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
4418; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4419; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
4420; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
4421; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
4422; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
4423; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
4424; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
4425; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
4426; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
4427; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4428; GCN-NOHSA-VI-NEXT:    s_endpgm
4429;
4430; EG-LABEL: constant_zextload_i16_to_i64:
4431; EG:       ; %bb.0:
4432; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4433; EG-NEXT:    TEX 0 @6
4434; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
4435; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
4436; EG-NEXT:    CF_END
4437; EG-NEXT:    PAD
4438; EG-NEXT:    Fetch clause starting at 6:
4439; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
4440; EG-NEXT:    ALU clause starting at 8:
4441; EG-NEXT:     MOV * T0.X, KC0[2].Z,
4442; EG-NEXT:    ALU clause starting at 9:
4443; EG-NEXT:     MOV * T0.Y, 0.0,
4444; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
4445; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4446  %a = load i16, i16 addrspace(4)* %in
4447  %ext = zext i16 %a to i64
4448  store i64 %ext, i64 addrspace(1)* %out
4449  ret void
4450}
4451
4452; FIXME: Need to optimize this sequence to avoid extra bfe:
4453;  t28: i32,ch = load<LD2[%in(addrspace=1)], anyext from i16> t12, t27, undef:i64
4454;          t31: i64 = any_extend t28
4455;        t33: i64 = sign_extend_inreg t31, ValueType:ch:i16
4456; TODO: These could be expanded earlier using ASHR 15
4457define amdgpu_kernel void @constant_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
4458; GCN-NOHSA-SI-LABEL: constant_sextload_i16_to_i64:
4459; GCN-NOHSA-SI:       ; %bb.0:
4460; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
4461; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
4462; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
4463; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
4464; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
4465; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4466; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
4467; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
4468; GCN-NOHSA-SI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
4469; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
4470; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
4471; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
4472; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
4473; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4474; GCN-NOHSA-SI-NEXT:    s_endpgm
4475;
4476; GCN-HSA-LABEL: constant_sextload_i16_to_i64:
4477; GCN-HSA:       ; %bb.0:
4478; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
4479; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4480; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
4481; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
4482; GCN-HSA-NEXT:    flat_load_sshort v0, v[0:1]
4483; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
4484; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
4485; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
4486; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
4487; GCN-HSA-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
4488; GCN-HSA-NEXT:    s_endpgm
4489;
4490; GCN-NOHSA-VI-LABEL: constant_sextload_i16_to_i64:
4491; GCN-NOHSA-VI:       ; %bb.0:
4492; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
4493; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
4494; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
4495; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
4496; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
4497; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4498; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
4499; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
4500; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
4501; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
4502; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
4503; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
4504; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v0, 0, 16
4505; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
4506; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4507; GCN-NOHSA-VI-NEXT:    s_endpgm
4508;
4509; EG-LABEL: constant_sextload_i16_to_i64:
4510; EG:       ; %bb.0:
4511; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4512; EG-NEXT:    TEX 0 @6
4513; EG-NEXT:    ALU 4, @9, KC0[CB0:0-32], KC1[]
4514; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
4515; EG-NEXT:    CF_END
4516; EG-NEXT:    PAD
4517; EG-NEXT:    Fetch clause starting at 6:
4518; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
4519; EG-NEXT:    ALU clause starting at 8:
4520; EG-NEXT:     MOV * T0.X, KC0[2].Z,
4521; EG-NEXT:    ALU clause starting at 9:
4522; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
4523; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
4524; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
4525; EG-NEXT:     ASHR * T0.Y, PV.X, literal.x,
4526; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4527  %a = load i16, i16 addrspace(4)* %in
4528  %ext = sext i16 %a to i64
4529  store i64 %ext, i64 addrspace(1)* %out
4530  ret void
4531}
4532
4533define amdgpu_kernel void @constant_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
4534; GCN-NOHSA-SI-LABEL: constant_zextload_v1i16_to_v1i64:
4535; GCN-NOHSA-SI:       ; %bb.0:
4536; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
4537; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
4538; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
4539; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
4540; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
4541; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4542; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
4543; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
4544; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
4545; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
4546; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
4547; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
4548; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
4549; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4550; GCN-NOHSA-SI-NEXT:    s_endpgm
4551;
4552; GCN-HSA-LABEL: constant_zextload_v1i16_to_v1i64:
4553; GCN-HSA:       ; %bb.0:
4554; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
4555; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4556; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
4557; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
4558; GCN-HSA-NEXT:    flat_load_ushort v0, v[0:1]
4559; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
4560; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
4561; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
4562; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
4563; GCN-HSA-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
4564; GCN-HSA-NEXT:    s_endpgm
4565;
4566; GCN-NOHSA-VI-LABEL: constant_zextload_v1i16_to_v1i64:
4567; GCN-NOHSA-VI:       ; %bb.0:
4568; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
4569; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
4570; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
4571; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
4572; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
4573; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4574; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
4575; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
4576; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
4577; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
4578; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
4579; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
4580; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
4581; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
4582; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4583; GCN-NOHSA-VI-NEXT:    s_endpgm
4584;
4585; EG-LABEL: constant_zextload_v1i16_to_v1i64:
4586; EG:       ; %bb.0:
4587; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4588; EG-NEXT:    TEX 0 @6
4589; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
4590; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
4591; EG-NEXT:    CF_END
4592; EG-NEXT:    PAD
4593; EG-NEXT:    Fetch clause starting at 6:
4594; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
4595; EG-NEXT:    ALU clause starting at 8:
4596; EG-NEXT:     MOV * T0.X, KC0[2].Z,
4597; EG-NEXT:    ALU clause starting at 9:
4598; EG-NEXT:     MOV * T0.Y, 0.0,
4599; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
4600; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4601  %load = load <1 x i16>, <1 x i16> addrspace(4)* %in
4602  %ext = zext <1 x i16> %load to <1 x i64>
4603  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
4604  ret void
4605}
4606
4607define amdgpu_kernel void @constant_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
4608; GCN-NOHSA-SI-LABEL: constant_sextload_v1i16_to_v1i64:
4609; GCN-NOHSA-SI:       ; %bb.0:
4610; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
4611; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
4612; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
4613; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
4614; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
4615; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4616; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
4617; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
4618; GCN-NOHSA-SI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
4619; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
4620; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
4621; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
4622; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
4623; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4624; GCN-NOHSA-SI-NEXT:    s_endpgm
4625;
4626; GCN-HSA-LABEL: constant_sextload_v1i16_to_v1i64:
4627; GCN-HSA:       ; %bb.0:
4628; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
4629; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4630; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
4631; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
4632; GCN-HSA-NEXT:    flat_load_sshort v0, v[0:1]
4633; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
4634; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
4635; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
4636; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
4637; GCN-HSA-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
4638; GCN-HSA-NEXT:    s_endpgm
4639;
4640; GCN-NOHSA-VI-LABEL: constant_sextload_v1i16_to_v1i64:
4641; GCN-NOHSA-VI:       ; %bb.0:
4642; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
4643; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
4644; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
4645; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
4646; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
4647; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4648; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
4649; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
4650; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
4651; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
4652; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
4653; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
4654; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v0, 0, 16
4655; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
4656; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4657; GCN-NOHSA-VI-NEXT:    s_endpgm
4658;
4659; EG-LABEL: constant_sextload_v1i16_to_v1i64:
4660; EG:       ; %bb.0:
4661; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4662; EG-NEXT:    TEX 0 @6
4663; EG-NEXT:    ALU 4, @9, KC0[CB0:0-32], KC1[]
4664; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
4665; EG-NEXT:    CF_END
4666; EG-NEXT:    PAD
4667; EG-NEXT:    Fetch clause starting at 6:
4668; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
4669; EG-NEXT:    ALU clause starting at 8:
4670; EG-NEXT:     MOV * T0.X, KC0[2].Z,
4671; EG-NEXT:    ALU clause starting at 9:
4672; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
4673; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
4674; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
4675; EG-NEXT:     ASHR * T0.Y, PV.X, literal.x,
4676; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4677  %load = load <1 x i16>, <1 x i16> addrspace(4)* %in
4678  %ext = sext <1 x i16> %load to <1 x i64>
4679  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
4680  ret void
4681}
4682
4683define amdgpu_kernel void @constant_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
4684; GCN-NOHSA-SI-LABEL: constant_zextload_v2i16_to_v2i64:
4685; GCN-NOHSA-SI:       ; %bb.0:
4686; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
4687; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4688; GCN-NOHSA-SI-NEXT:    s_load_dword s2, s[2:3], 0x0
4689; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
4690; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
4691; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4692; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s4, s2, 16
4693; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s2, 0xffff
4694; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
4695; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v1
4696; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
4697; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s4
4698; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
4699; GCN-NOHSA-SI-NEXT:    s_endpgm
4700;
4701; GCN-HSA-LABEL: constant_zextload_v2i16_to_v2i64:
4702; GCN-HSA:       ; %bb.0:
4703; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
4704; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
4705; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v1
4706; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4707; GCN-HSA-NEXT:    s_load_dword s2, s[2:3], 0x0
4708; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
4709; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
4710; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4711; GCN-HSA-NEXT:    s_lshr_b32 s0, s2, 16
4712; GCN-HSA-NEXT:    s_and_b32 s1, s2, 0xffff
4713; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s1
4714; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
4715; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4716; GCN-HSA-NEXT:    s_endpgm
4717;
4718; GCN-NOHSA-VI-LABEL: constant_zextload_v2i16_to_v2i64:
4719; GCN-NOHSA-VI:       ; %bb.0:
4720; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
4721; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
4722; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
4723; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
4724; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, v1
4725; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4726; GCN-NOHSA-VI-NEXT:    s_load_dword s2, s[2:3], 0x0
4727; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
4728; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
4729; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4730; GCN-NOHSA-VI-NEXT:    s_and_b32 s0, s2, 0xffff
4731; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s1, s2, 16
4732; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
4733; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s1
4734; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
4735; GCN-NOHSA-VI-NEXT:    s_endpgm
4736;
4737; EG-LABEL: constant_zextload_v2i16_to_v2i64:
4738; EG:       ; %bb.0:
4739; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4740; EG-NEXT:    TEX 0 @6
4741; EG-NEXT:    ALU 6, @9, KC0[CB0:0-32], KC1[]
4742; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1
4743; EG-NEXT:    CF_END
4744; EG-NEXT:    PAD
4745; EG-NEXT:    Fetch clause starting at 6:
4746; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
4747; EG-NEXT:    ALU clause starting at 8:
4748; EG-NEXT:     MOV * T4.X, KC0[2].Z,
4749; EG-NEXT:    ALU clause starting at 9:
4750; EG-NEXT:     LSHR * T4.Z, T4.X, literal.x,
4751; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4752; EG-NEXT:     AND_INT T4.X, T4.X, literal.x,
4753; EG-NEXT:     MOV T4.Y, 0.0,
4754; EG-NEXT:     MOV T4.W, 0.0,
4755; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
4756; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
4757  %load = load <2 x i16>, <2 x i16> addrspace(4)* %in
4758  %ext = zext <2 x i16> %load to <2 x i64>
4759  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
4760  ret void
4761}
4762
4763define amdgpu_kernel void @constant_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
4764; GCN-NOHSA-SI-LABEL: constant_sextload_v2i16_to_v2i64:
4765; GCN-NOHSA-SI:       ; %bb.0:
4766; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
4767; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4768; GCN-NOHSA-SI-NEXT:    s_load_dword s2, s[2:3], 0x0
4769; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
4770; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4771; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s4, s2, 16
4772; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[6:7], s[2:3], 0x100000
4773; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x100000
4774; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
4775; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
4776; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s7
4777; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s4
4778; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s5
4779; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
4780; GCN-NOHSA-SI-NEXT:    s_endpgm
4781;
4782; GCN-HSA-LABEL: constant_sextload_v2i16_to_v2i64:
4783; GCN-HSA:       ; %bb.0:
4784; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
4785; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4786; GCN-HSA-NEXT:    s_load_dword s2, s[2:3], 0x0
4787; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
4788; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
4789; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4790; GCN-HSA-NEXT:    s_lshr_b32 s0, s2, 16
4791; GCN-HSA-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x100000
4792; GCN-HSA-NEXT:    s_bfe_i64 s[0:1], s[0:1], 0x100000
4793; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
4794; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
4795; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
4796; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
4797; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4798; GCN-HSA-NEXT:    s_endpgm
4799;
4800; GCN-NOHSA-VI-LABEL: constant_sextload_v2i16_to_v2i64:
4801; GCN-NOHSA-VI:       ; %bb.0:
4802; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
4803; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4804; GCN-NOHSA-VI-NEXT:    s_load_dword s4, s[2:3], 0x0
4805; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
4806; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
4807; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4808; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[6:7], s[4:5], 0x100000
4809; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s4, s4, 16
4810; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x100000
4811; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
4812; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s7
4813; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
4814; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s5
4815; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
4816; GCN-NOHSA-VI-NEXT:    s_endpgm
4817;
4818; EG-LABEL: constant_sextload_v2i16_to_v2i64:
4819; EG:       ; %bb.0:
4820; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4821; EG-NEXT:    TEX 0 @6
4822; EG-NEXT:    ALU 8, @9, KC0[CB0:0-32], KC1[]
4823; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1
4824; EG-NEXT:    CF_END
4825; EG-NEXT:    PAD
4826; EG-NEXT:    Fetch clause starting at 6:
4827; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
4828; EG-NEXT:    ALU clause starting at 8:
4829; EG-NEXT:     MOV * T4.X, KC0[2].Z,
4830; EG-NEXT:    ALU clause starting at 9:
4831; EG-NEXT:     ASHR * T4.W, T4.X, literal.x,
4832; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4833; EG-NEXT:     ASHR * T4.Z, T4.X, literal.x,
4834; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4835; EG-NEXT:     BFE_INT T4.X, T4.X, 0.0, literal.x,
4836; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
4837; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
4838; EG-NEXT:     ASHR * T4.Y, PV.X, literal.x,
4839; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4840  %load = load <2 x i16>, <2 x i16> addrspace(4)* %in
4841  %ext = sext <2 x i16> %load to <2 x i64>
4842  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
4843  ret void
4844}
4845
4846define amdgpu_kernel void @constant_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
4847; GCN-NOHSA-SI-LABEL: constant_zextload_v4i16_to_v4i64:
4848; GCN-NOHSA-SI:       ; %bb.0:
4849; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
4850; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
4851; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4852; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
4853; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
4854; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
4855; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, 0xffff
4856; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v1
4857; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
4858; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
4859; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4860; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s0, s3, 16
4861; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s1, s2, 16
4862; GCN-NOHSA-SI-NEXT:    s_and_b32 s3, s3, s8
4863; GCN-NOHSA-SI-NEXT:    s_and_b32 s2, s2, s8
4864; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s3
4865; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s0
4866; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16
4867; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
4868; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s2
4869; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s1
4870; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
4871; GCN-NOHSA-SI-NEXT:    s_endpgm
4872;
4873; GCN-HSA-LABEL: constant_zextload_v4i16_to_v4i64:
4874; GCN-HSA:       ; %bb.0:
4875; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
4876; GCN-HSA-NEXT:    s_mov_b32 s4, 0xffff
4877; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
4878; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v1
4879; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4880; GCN-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
4881; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4882; GCN-HSA-NEXT:    s_lshr_b32 s5, s3, 16
4883; GCN-HSA-NEXT:    s_lshr_b32 s6, s2, 16
4884; GCN-HSA-NEXT:    s_and_b32 s7, s2, s4
4885; GCN-HSA-NEXT:    s_and_b32 s2, s3, s4
4886; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
4887; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
4888; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4889; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
4890; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
4891; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
4892; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4893; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
4894; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s7
4895; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s6
4896; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
4897; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4898; GCN-HSA-NEXT:    s_endpgm
4899;
4900; GCN-NOHSA-VI-LABEL: constant_zextload_v4i16_to_v4i64:
4901; GCN-NOHSA-VI:       ; %bb.0:
4902; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
4903; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, 0xffff
4904; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
4905; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
4906; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
4907; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4908; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
4909; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
4910; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
4911; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, v1
4912; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4913; GCN-NOHSA-VI-NEXT:    s_and_b32 s0, s2, s8
4914; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s1, s2, 16
4915; GCN-NOHSA-VI-NEXT:    s_and_b32 s2, s3, s8
4916; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s3, s3, 16
4917; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
4918; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s3
4919; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16
4920; GCN-NOHSA-VI-NEXT:    s_nop 0
4921; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
4922; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s1
4923; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
4924; GCN-NOHSA-VI-NEXT:    s_endpgm
4925;
4926; EG-LABEL: constant_zextload_v4i16_to_v4i64:
4927; EG:       ; %bb.0:
4928; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4929; EG-NEXT:    TEX 0 @6
4930; EG-NEXT:    ALU 18, @9, KC0[CB0:0-32], KC1[]
4931; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T8.X, 0
4932; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T7.X, 1
4933; EG-NEXT:    CF_END
4934; EG-NEXT:    Fetch clause starting at 6:
4935; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
4936; EG-NEXT:    ALU clause starting at 8:
4937; EG-NEXT:     MOV * T5.X, KC0[2].Z,
4938; EG-NEXT:    ALU clause starting at 9:
4939; EG-NEXT:     MOV T2.X, T5.X,
4940; EG-NEXT:     MOV * T3.X, T5.Y,
4941; EG-NEXT:     MOV T0.Y, PV.X,
4942; EG-NEXT:     MOV * T0.Z, PS,
4943; EG-NEXT:     LSHR * T5.Z, PV.Z, literal.x,
4944; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4945; EG-NEXT:     AND_INT T5.X, T0.Z, literal.x,
4946; EG-NEXT:     MOV T5.Y, 0.0,
4947; EG-NEXT:     LSHR T6.Z, T0.Y, literal.y,
4948; EG-NEXT:     AND_INT * T6.X, T0.Y, literal.x,
4949; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
4950; EG-NEXT:     MOV T6.Y, 0.0,
4951; EG-NEXT:     MOV T5.W, 0.0,
4952; EG-NEXT:     MOV * T6.W, 0.0,
4953; EG-NEXT:     LSHR T7.X, KC0[2].Y, literal.x,
4954; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4955; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4956; EG-NEXT:     LSHR * T8.X, PV.W, literal.x,
4957; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4958  %load = load <4 x i16>, <4 x i16> addrspace(4)* %in
4959  %ext = zext <4 x i16> %load to <4 x i64>
4960  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
4961  ret void
4962}
4963
4964define amdgpu_kernel void @constant_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
4965; GCN-NOHSA-SI-LABEL: constant_sextload_v4i16_to_v4i64:
4966; GCN-NOHSA-SI:       ; %bb.0:
4967; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
4968; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4969; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
4970; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
4971; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
4972; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4973; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, s5
4974; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s8, s4, 16
4975; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[10:11], s[4:5], 0x100000
4976; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[4:5], s[4:5], 48
4977; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x100000
4978; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x100000
4979; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
4980; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s7
4981; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s4
4982; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s5
4983; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
4984; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
4985; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
4986; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s11
4987; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s8
4988; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s9
4989; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
4990; GCN-NOHSA-SI-NEXT:    s_endpgm
4991;
4992; GCN-HSA-LABEL: constant_sextload_v4i16_to_v4i64:
4993; GCN-HSA:       ; %bb.0:
4994; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
4995; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4996; GCN-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
4997; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4998; GCN-HSA-NEXT:    s_mov_b32 s4, s3
4999; GCN-HSA-NEXT:    s_lshr_b32 s6, s2, 16
5000; GCN-HSA-NEXT:    s_bfe_i64 s[8:9], s[2:3], 0x100000
5001; GCN-HSA-NEXT:    s_ashr_i64 s[2:3], s[2:3], 48
5002; GCN-HSA-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x100000
5003; GCN-HSA-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x100000
5004; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
5005; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
5006; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
5007; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5008; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5009; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
5010; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
5011; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5012; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5013; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
5014; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
5015; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s9
5016; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s6
5017; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s7
5018; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
5019; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5020; GCN-HSA-NEXT:    s_endpgm
5021;
5022; GCN-NOHSA-VI-LABEL: constant_sextload_v4i16_to_v4i64:
5023; GCN-NOHSA-VI:       ; %bb.0:
5024; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
5025; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5026; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
5027; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
5028; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
5029; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5030; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s5
5031; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s10, s5, 16
5032; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[6:7], s[4:5], 0x100000
5033; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s4, s4, 16
5034; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x100000
5035; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x100000
5036; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x100000
5037; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
5038; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s9
5039; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s10
5040; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s11
5041; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
5042; GCN-NOHSA-VI-NEXT:    s_nop 0
5043; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
5044; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s7
5045; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
5046; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s5
5047; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5048; GCN-NOHSA-VI-NEXT:    s_endpgm
5049;
5050; EG-LABEL: constant_sextload_v4i16_to_v4i64:
5051; EG:       ; %bb.0:
5052; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5053; EG-NEXT:    TEX 0 @6
5054; EG-NEXT:    ALU 20, @9, KC0[CB0:0-32], KC1[]
5055; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 0
5056; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1
5057; EG-NEXT:    CF_END
5058; EG-NEXT:    Fetch clause starting at 6:
5059; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
5060; EG-NEXT:    ALU clause starting at 8:
5061; EG-NEXT:     MOV * T5.X, KC0[2].Z,
5062; EG-NEXT:    ALU clause starting at 9:
5063; EG-NEXT:     MOV T2.X, T5.X,
5064; EG-NEXT:     MOV * T3.X, T5.Y,
5065; EG-NEXT:     MOV T0.Y, PS,
5066; EG-NEXT:     MOV * T0.Z, PV.X,
5067; EG-NEXT:     ASHR * T5.W, PV.Z, literal.x,
5068; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5069; EG-NEXT:     LSHR T6.X, KC0[2].Y, literal.x,
5070; EG-NEXT:     ASHR T5.Z, T0.Z, literal.y,
5071; EG-NEXT:     ASHR * T7.W, T0.Y, literal.z,
5072; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5073; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5074; EG-NEXT:     BFE_INT T5.X, T0.Z, 0.0, literal.x,
5075; EG-NEXT:     ASHR * T7.Z, T0.Y, literal.x,
5076; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5077; EG-NEXT:     BFE_INT T7.X, T0.Y, 0.0, literal.x,
5078; EG-NEXT:     ASHR T5.Y, PV.X, literal.y,
5079; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
5080; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
5081; EG-NEXT:     LSHR T8.X, PV.W, literal.x,
5082; EG-NEXT:     ASHR * T7.Y, PV.X, literal.y,
5083; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
5084  %load = load <4 x i16>, <4 x i16> addrspace(4)* %in
5085  %ext = sext <4 x i16> %load to <4 x i64>
5086  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
5087  ret void
5088}
5089
5090define amdgpu_kernel void @constant_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
5091; GCN-NOHSA-SI-LABEL: constant_zextload_v8i16_to_v8i64:
5092; GCN-NOHSA-SI:       ; %bb.0:
5093; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
5094; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
5095; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5096; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[8:11], s[6:7], 0x0
5097; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
5098; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
5099; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, 0xffff
5100; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v1
5101; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s4
5102; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s5
5103; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5104; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s4, s9, 16
5105; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s5, s11, 16
5106; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s7, s10, 16
5107; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s12, s8, 16
5108; GCN-NOHSA-SI-NEXT:    s_and_b32 s8, s8, s6
5109; GCN-NOHSA-SI-NEXT:    s_and_b32 s10, s10, s6
5110; GCN-NOHSA-SI-NEXT:    s_and_b32 s11, s11, s6
5111; GCN-NOHSA-SI-NEXT:    s_and_b32 s6, s9, s6
5112; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s11
5113; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
5114; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
5115; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5116; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
5117; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s4
5118; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
5119; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5120; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
5121; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
5122; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
5123; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5124; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
5125; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s12
5126; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5127; GCN-NOHSA-SI-NEXT:    s_endpgm
5128;
5129; GCN-HSA-LABEL: constant_zextload_v8i16_to_v8i64:
5130; GCN-HSA:       ; %bb.0:
5131; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5132; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
5133; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v1
5134; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5135; GCN-HSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
5136; GCN-HSA-NEXT:    s_mov_b32 s2, 0xffff
5137; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5138; GCN-HSA-NEXT:    s_lshr_b32 s8, s5, 16
5139; GCN-HSA-NEXT:    s_lshr_b32 s3, s7, 16
5140; GCN-HSA-NEXT:    s_lshr_b32 s9, s6, 16
5141; GCN-HSA-NEXT:    s_lshr_b32 s10, s4, 16
5142; GCN-HSA-NEXT:    s_and_b32 s4, s4, s2
5143; GCN-HSA-NEXT:    s_and_b32 s6, s6, s2
5144; GCN-HSA-NEXT:    s_and_b32 s5, s5, s2
5145; GCN-HSA-NEXT:    s_and_b32 s2, s7, s2
5146; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
5147; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
5148; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s3
5149; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5150; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5151; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5152; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
5153; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5154; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5155; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5156; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5157; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
5158; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s5
5159; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s8
5160; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5161; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5162; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5163; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
5164; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s9
5165; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5166; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5167; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
5168; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
5169; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s10
5170; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
5171; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5172; GCN-HSA-NEXT:    s_endpgm
5173;
5174; GCN-NOHSA-VI-LABEL: constant_zextload_v8i16_to_v8i64:
5175; GCN-NOHSA-VI:       ; %bb.0:
5176; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
5177; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
5178; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
5179; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
5180; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, v1
5181; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5182; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[8:11], s[6:7], 0x0
5183; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, 0xffff
5184; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
5185; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
5186; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5187; GCN-NOHSA-VI-NEXT:    s_and_b32 s4, s8, s6
5188; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s5, s8, 16
5189; GCN-NOHSA-VI-NEXT:    s_and_b32 s7, s9, s6
5190; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s8, s9, 16
5191; GCN-NOHSA-VI-NEXT:    s_and_b32 s9, s10, s6
5192; GCN-NOHSA-VI-NEXT:    s_and_b32 s6, s11, s6
5193; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s11, s11, 16
5194; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s10, s10, 16
5195; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
5196; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
5197; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
5198; GCN-NOHSA-VI-NEXT:    s_nop 0
5199; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s9
5200; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s10
5201; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
5202; GCN-NOHSA-VI-NEXT:    s_nop 0
5203; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s7
5204; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s8
5205; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
5206; GCN-NOHSA-VI-NEXT:    s_nop 0
5207; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
5208; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
5209; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5210; GCN-NOHSA-VI-NEXT:    s_endpgm
5211;
5212; EG-LABEL: constant_zextload_v8i16_to_v8i64:
5213; EG:       ; %bb.0:
5214; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
5215; EG-NEXT:    TEX 0 @8
5216; EG-NEXT:    ALU 30, @11, KC0[CB0:0-32], KC1[]
5217; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T14.X, 0
5218; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T13.X, 0
5219; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T12.X, 0
5220; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T11.X, 1
5221; EG-NEXT:    CF_END
5222; EG-NEXT:    Fetch clause starting at 8:
5223; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
5224; EG-NEXT:    ALU clause starting at 10:
5225; EG-NEXT:     MOV * T7.X, KC0[2].Z,
5226; EG-NEXT:    ALU clause starting at 11:
5227; EG-NEXT:     LSHR * T8.Z, T7.W, literal.x,
5228; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5229; EG-NEXT:     AND_INT T8.X, T7.W, literal.x,
5230; EG-NEXT:     MOV T8.Y, 0.0,
5231; EG-NEXT:     LSHR T9.Z, T7.Z, literal.y,
5232; EG-NEXT:     AND_INT * T9.X, T7.Z, literal.x,
5233; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
5234; EG-NEXT:     MOV T9.Y, 0.0,
5235; EG-NEXT:     LSHR * T10.Z, T7.Y, literal.x,
5236; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5237; EG-NEXT:     AND_INT T10.X, T7.Y, literal.x,
5238; EG-NEXT:     MOV T10.Y, 0.0,
5239; EG-NEXT:     LSHR T7.Z, T7.X, literal.y,
5240; EG-NEXT:     AND_INT * T7.X, T7.X, literal.x,
5241; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
5242; EG-NEXT:     MOV T7.Y, 0.0,
5243; EG-NEXT:     MOV T8.W, 0.0,
5244; EG-NEXT:     MOV * T9.W, 0.0,
5245; EG-NEXT:     MOV T10.W, 0.0,
5246; EG-NEXT:     MOV * T7.W, 0.0,
5247; EG-NEXT:     LSHR T11.X, KC0[2].Y, literal.x,
5248; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5249; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5250; EG-NEXT:     LSHR T12.X, PV.W, literal.x,
5251; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5252; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
5253; EG-NEXT:     LSHR T13.X, PV.W, literal.x,
5254; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5255; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
5256; EG-NEXT:     LSHR * T14.X, PV.W, literal.x,
5257; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5258  %load = load <8 x i16>, <8 x i16> addrspace(4)* %in
5259  %ext = zext <8 x i16> %load to <8 x i64>
5260  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
5261  ret void
5262}
5263
5264define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
5265; GCN-NOHSA-SI-LABEL: constant_sextload_v8i16_to_v8i64:
5266; GCN-NOHSA-SI:       ; %bb.0:
5267; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
5268; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5269; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
5270; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
5271; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
5272; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5273; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s7
5274; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s5
5275; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s12, s6, 16
5276; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s14, s4, 16
5277; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[16:17], s[4:5], 0x100000
5278; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[18:19], s[6:7], 0x100000
5279; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[4:5], s[4:5], 48
5280; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[6:7], s[6:7], 48
5281; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x100000
5282; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x100000
5283; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x100000
5284; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x100000
5285; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
5286; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s9
5287; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s6
5288; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s7
5289; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
5290; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5291; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
5292; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s11
5293; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s4
5294; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s5
5295; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
5296; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5297; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s18
5298; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
5299; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s16
5300; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s17
5301; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s12
5302; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s13
5303; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
5304; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s14
5305; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s15
5306; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
5307; GCN-NOHSA-SI-NEXT:    s_endpgm
5308;
5309; GCN-HSA-LABEL: constant_sextload_v8i16_to_v8i64:
5310; GCN-HSA:       ; %bb.0:
5311; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5312; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5313; GCN-HSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
5314; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5315; GCN-HSA-NEXT:    s_mov_b32 s2, s7
5316; GCN-HSA-NEXT:    s_mov_b32 s8, s5
5317; GCN-HSA-NEXT:    s_lshr_b32 s10, s6, 16
5318; GCN-HSA-NEXT:    s_lshr_b32 s12, s4, 16
5319; GCN-HSA-NEXT:    s_bfe_i64 s[14:15], s[4:5], 0x100000
5320; GCN-HSA-NEXT:    s_bfe_i64 s[16:17], s[6:7], 0x100000
5321; GCN-HSA-NEXT:    s_ashr_i64 s[4:5], s[4:5], 48
5322; GCN-HSA-NEXT:    s_ashr_i64 s[6:7], s[6:7], 48
5323; GCN-HSA-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x100000
5324; GCN-HSA-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x100000
5325; GCN-HSA-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x100000
5326; GCN-HSA-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x100000
5327; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
5328; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
5329; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
5330; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5331; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5332; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5333; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
5334; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s6
5335; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s7
5336; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5337; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5338; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5339; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5340; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
5341; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
5342; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s9
5343; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s4
5344; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s5
5345; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5346; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5347; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5348; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s16
5349; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s17
5350; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s10
5351; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s11
5352; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5353; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5354; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
5355; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s14
5356; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s15
5357; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s12
5358; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s13
5359; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
5360; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5361; GCN-HSA-NEXT:    s_endpgm
5362;
5363; GCN-NOHSA-VI-LABEL: constant_sextload_v8i16_to_v8i64:
5364; GCN-NOHSA-VI:       ; %bb.0:
5365; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
5366; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5367; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
5368; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
5369; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
5370; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5371; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[14:15], s[6:7], 0x100000
5372; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s6, s6, 16
5373; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[16:17], s[6:7], 0x100000
5374; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, s7
5375; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[18:19], s[6:7], 0x100000
5376; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s6, s7, 16
5377; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x100000
5378; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s5
5379; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s12, s5, 16
5380; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s18
5381; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s19
5382; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s6
5383; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s7
5384; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[8:9], s[4:5], 0x100000
5385; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s4, s4, 16
5386; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x100000
5387; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x100000
5388; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
5389; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x100000
5390; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s14
5391; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s15
5392; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s16
5393; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s17
5394; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
5395; GCN-NOHSA-VI-NEXT:    s_nop 0
5396; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
5397; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s11
5398; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s12
5399; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s13
5400; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
5401; GCN-NOHSA-VI-NEXT:    s_nop 0
5402; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
5403; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s9
5404; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
5405; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s5
5406; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5407; GCN-NOHSA-VI-NEXT:    s_endpgm
5408;
5409; EG-LABEL: constant_sextload_v8i16_to_v8i64:
5410; EG:       ; %bb.0:
5411; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
5412; EG-NEXT:    TEX 0 @8
5413; EG-NEXT:    ALU 33, @11, KC0[CB0:0-32], KC1[]
5414; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T7.X, 0
5415; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T11.X, 0
5416; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T9.X, 0
5417; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T8.X, 1
5418; EG-NEXT:    CF_END
5419; EG-NEXT:    Fetch clause starting at 8:
5420; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
5421; EG-NEXT:    ALU clause starting at 10:
5422; EG-NEXT:     MOV * T7.X, KC0[2].Z,
5423; EG-NEXT:    ALU clause starting at 11:
5424; EG-NEXT:     LSHR T8.X, KC0[2].Y, literal.x,
5425; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5426; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5427; EG-NEXT:     LSHR T9.X, PV.W, literal.x,
5428; EG-NEXT:     ADD_INT T0.W, KC0[2].Y, literal.y,
5429; EG-NEXT:     ASHR * T10.W, T7.X, literal.z,
5430; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
5431; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5432; EG-NEXT:     LSHR T11.X, PV.W, literal.x,
5433; EG-NEXT:     ASHR T10.Z, T7.X, literal.y,
5434; EG-NEXT:     ASHR * T12.W, T7.Y, literal.z,
5435; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5436; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5437; EG-NEXT:     BFE_INT T10.X, T7.X, 0.0, literal.x,
5438; EG-NEXT:     ASHR T12.Z, T7.Y, literal.x,
5439; EG-NEXT:     ASHR * T13.W, T7.Z, literal.y,
5440; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
5441; EG-NEXT:     BFE_INT T12.X, T7.Y, 0.0, literal.x,
5442; EG-NEXT:     ASHR T10.Y, PV.X, literal.y,
5443; EG-NEXT:     ASHR T13.Z, T7.Z, literal.x,
5444; EG-NEXT:     ASHR * T14.W, T7.W, literal.y,
5445; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
5446; EG-NEXT:     BFE_INT T13.X, T7.Z, 0.0, literal.x,
5447; EG-NEXT:     ASHR T12.Y, PV.X, literal.y,
5448; EG-NEXT:     ASHR * T14.Z, T7.W, literal.x,
5449; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
5450; EG-NEXT:     BFE_INT T14.X, T7.W, 0.0, literal.x,
5451; EG-NEXT:     ASHR T13.Y, PV.X, literal.y,
5452; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
5453; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
5454; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
5455; EG-NEXT:     LSHR T7.X, PV.W, literal.x,
5456; EG-NEXT:     ASHR * T14.Y, PV.X, literal.y,
5457; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
5458  %load = load <8 x i16>, <8 x i16> addrspace(4)* %in
5459  %ext = sext <8 x i16> %load to <8 x i64>
5460  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
5461  ret void
5462}
5463
5464define amdgpu_kernel void @constant_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
5465; GCN-NOHSA-SI-LABEL: constant_zextload_v16i16_to_v16i64:
5466; GCN-NOHSA-SI:       ; %bb.0:
5467; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x9
5468; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
5469; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5470; GCN-NOHSA-SI-NEXT:    s_load_dwordx8 s[4:11], s[14:15], 0x0
5471; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
5472; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
5473; GCN-NOHSA-SI-NEXT:    s_mov_b32 s14, 0xffff
5474; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v1
5475; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s12
5476; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s13
5477; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5478; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s12, s5, 16
5479; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s13, s7, 16
5480; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s15, s11, 16
5481; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s16, s9, 16
5482; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s17, s8, 16
5483; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s18, s10, 16
5484; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s19, s6, 16
5485; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s20, s4, 16
5486; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, s14
5487; GCN-NOHSA-SI-NEXT:    s_and_b32 s6, s6, s14
5488; GCN-NOHSA-SI-NEXT:    s_and_b32 s10, s10, s14
5489; GCN-NOHSA-SI-NEXT:    s_and_b32 s8, s8, s14
5490; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, s14
5491; GCN-NOHSA-SI-NEXT:    s_and_b32 s7, s7, s14
5492; GCN-NOHSA-SI-NEXT:    s_and_b32 s9, s9, s14
5493; GCN-NOHSA-SI-NEXT:    s_and_b32 s11, s11, s14
5494; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s9
5495; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s16
5496; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
5497; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5498; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s11
5499; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s15
5500; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
5501; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5502; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s7
5503; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s13
5504; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
5505; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5506; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
5507; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s12
5508; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
5509; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5510; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
5511; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s17
5512; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
5513; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5514; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
5515; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s18
5516; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
5517; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5518; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
5519; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s19
5520; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
5521; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5522; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
5523; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s20
5524; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5525; GCN-NOHSA-SI-NEXT:    s_endpgm
5526;
5527; GCN-HSA-LABEL: constant_zextload_v16i16_to_v16i64:
5528; GCN-HSA:       ; %bb.0:
5529; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5530; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
5531; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v1
5532; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5533; GCN-HSA-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
5534; GCN-HSA-NEXT:    s_mov_b32 s2, 0xffff
5535; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5536; GCN-HSA-NEXT:    s_lshr_b32 s12, s5, 16
5537; GCN-HSA-NEXT:    s_lshr_b32 s13, s7, 16
5538; GCN-HSA-NEXT:    s_lshr_b32 s14, s11, 16
5539; GCN-HSA-NEXT:    s_lshr_b32 s3, s9, 16
5540; GCN-HSA-NEXT:    s_lshr_b32 s15, s8, 16
5541; GCN-HSA-NEXT:    s_lshr_b32 s16, s10, 16
5542; GCN-HSA-NEXT:    s_lshr_b32 s17, s6, 16
5543; GCN-HSA-NEXT:    s_lshr_b32 s18, s4, 16
5544; GCN-HSA-NEXT:    s_and_b32 s4, s4, s2
5545; GCN-HSA-NEXT:    s_and_b32 s6, s6, s2
5546; GCN-HSA-NEXT:    s_and_b32 s10, s10, s2
5547; GCN-HSA-NEXT:    s_and_b32 s8, s8, s2
5548; GCN-HSA-NEXT:    s_and_b32 s5, s5, s2
5549; GCN-HSA-NEXT:    s_and_b32 s7, s7, s2
5550; GCN-HSA-NEXT:    s_and_b32 s11, s11, s2
5551; GCN-HSA-NEXT:    s_and_b32 s2, s9, s2
5552; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
5553; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x50
5554; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s3
5555; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5556; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5557; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5558; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x70
5559; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5560; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5561; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5562; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5563; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
5564; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s11
5565; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s14
5566; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5567; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5568; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5569; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5570; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
5571; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s7
5572; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s13
5573; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5574; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5575; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5576; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5577; GCN-HSA-NEXT:    s_add_u32 s2, s0, 64
5578; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s5
5579; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s12
5580; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5581; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5582; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5583; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5584; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x60
5585; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
5586; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s15
5587; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5588; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5589; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5590; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5591; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
5592; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s10
5593; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s16
5594; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5595; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5596; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5597; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
5598; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s17
5599; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5600; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5601; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
5602; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
5603; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s18
5604; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
5605; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5606; GCN-HSA-NEXT:    s_endpgm
5607;
5608; GCN-NOHSA-VI-LABEL: constant_zextload_v16i16_to_v16i64:
5609; GCN-NOHSA-VI:       ; %bb.0:
5610; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x24
5611; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
5612; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
5613; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
5614; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, v1
5615; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5616; GCN-NOHSA-VI-NEXT:    s_load_dwordx8 s[4:11], s[14:15], 0x0
5617; GCN-NOHSA-VI-NEXT:    s_mov_b32 s14, 0xffff
5618; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s12
5619; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s13
5620; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5621; GCN-NOHSA-VI-NEXT:    s_and_b32 s12, s4, s14
5622; GCN-NOHSA-VI-NEXT:    s_and_b32 s13, s5, s14
5623; GCN-NOHSA-VI-NEXT:    s_and_b32 s15, s6, s14
5624; GCN-NOHSA-VI-NEXT:    s_and_b32 s16, s7, s14
5625; GCN-NOHSA-VI-NEXT:    s_and_b32 s17, s8, s14
5626; GCN-NOHSA-VI-NEXT:    s_and_b32 s18, s9, s14
5627; GCN-NOHSA-VI-NEXT:    s_and_b32 s19, s10, s14
5628; GCN-NOHSA-VI-NEXT:    s_and_b32 s14, s11, s14
5629; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s11, s11, 16
5630; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s10, s10, 16
5631; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s14
5632; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
5633; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s9, s9, 16
5634; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
5635; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s8, s8, 16
5636; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s19
5637; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s10
5638; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
5639; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s7, s7, 16
5640; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s18
5641; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
5642; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
5643; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s6, s6, 16
5644; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s17
5645; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s8
5646; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
5647; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s5, s5, 16
5648; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s16
5649; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
5650; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
5651; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s4, s4, 16
5652; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s15
5653; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s6
5654; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
5655; GCN-NOHSA-VI-NEXT:    s_nop 0
5656; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s13
5657; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
5658; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
5659; GCN-NOHSA-VI-NEXT:    s_nop 0
5660; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s12
5661; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
5662; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5663; GCN-NOHSA-VI-NEXT:    s_endpgm
5664;
5665; EG-LABEL: constant_zextload_v16i16_to_v16i64:
5666; EG:       ; %bb.0:
5667; EG-NEXT:    ALU 0, @16, KC0[CB0:0-32], KC1[]
5668; EG-NEXT:    TEX 1 @12
5669; EG-NEXT:    ALU 62, @17, KC0[CB0:0-32], KC1[]
5670; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T26.X, 0
5671; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T25.X, 0
5672; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T24.X, 0
5673; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T23.X, 0
5674; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T22.X, 0
5675; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T21.X, 0
5676; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T20.X, 0
5677; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T19.X, 1
5678; EG-NEXT:    CF_END
5679; EG-NEXT:    Fetch clause starting at 12:
5680; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
5681; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
5682; EG-NEXT:    ALU clause starting at 16:
5683; EG-NEXT:     MOV * T11.X, KC0[2].Z,
5684; EG-NEXT:    ALU clause starting at 17:
5685; EG-NEXT:     LSHR * T13.Z, T12.W, literal.x,
5686; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5687; EG-NEXT:     AND_INT T13.X, T12.W, literal.x,
5688; EG-NEXT:     MOV T13.Y, 0.0,
5689; EG-NEXT:     LSHR T14.Z, T12.Z, literal.y,
5690; EG-NEXT:     AND_INT * T14.X, T12.Z, literal.x,
5691; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
5692; EG-NEXT:     MOV T14.Y, 0.0,
5693; EG-NEXT:     LSHR * T15.Z, T12.Y, literal.x,
5694; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5695; EG-NEXT:     AND_INT T15.X, T12.Y, literal.x,
5696; EG-NEXT:     MOV T15.Y, 0.0,
5697; EG-NEXT:     LSHR T12.Z, T12.X, literal.y,
5698; EG-NEXT:     AND_INT * T12.X, T12.X, literal.x,
5699; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
5700; EG-NEXT:     MOV T12.Y, 0.0,
5701; EG-NEXT:     LSHR * T16.Z, T11.W, literal.x,
5702; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5703; EG-NEXT:     AND_INT T16.X, T11.W, literal.x,
5704; EG-NEXT:     MOV T16.Y, 0.0,
5705; EG-NEXT:     LSHR T17.Z, T11.Z, literal.y,
5706; EG-NEXT:     AND_INT * T17.X, T11.Z, literal.x,
5707; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
5708; EG-NEXT:     MOV T17.Y, 0.0,
5709; EG-NEXT:     LSHR * T18.Z, T11.Y, literal.x,
5710; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5711; EG-NEXT:     AND_INT T18.X, T11.Y, literal.x,
5712; EG-NEXT:     MOV T18.Y, 0.0,
5713; EG-NEXT:     LSHR T11.Z, T11.X, literal.y,
5714; EG-NEXT:     AND_INT * T11.X, T11.X, literal.x,
5715; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
5716; EG-NEXT:     MOV T11.Y, 0.0,
5717; EG-NEXT:     MOV T13.W, 0.0,
5718; EG-NEXT:     MOV * T14.W, 0.0,
5719; EG-NEXT:     MOV T15.W, 0.0,
5720; EG-NEXT:     MOV * T12.W, 0.0,
5721; EG-NEXT:     MOV T16.W, 0.0,
5722; EG-NEXT:     MOV * T17.W, 0.0,
5723; EG-NEXT:     MOV T18.W, 0.0,
5724; EG-NEXT:     MOV * T11.W, 0.0,
5725; EG-NEXT:     LSHR T19.X, KC0[2].Y, literal.x,
5726; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5727; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5728; EG-NEXT:     LSHR T20.X, PV.W, literal.x,
5729; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5730; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
5731; EG-NEXT:     LSHR T21.X, PV.W, literal.x,
5732; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5733; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
5734; EG-NEXT:     LSHR T22.X, PV.W, literal.x,
5735; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5736; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
5737; EG-NEXT:     LSHR T23.X, PV.W, literal.x,
5738; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5739; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
5740; EG-NEXT:     LSHR T24.X, PV.W, literal.x,
5741; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5742; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
5743; EG-NEXT:     LSHR T25.X, PV.W, literal.x,
5744; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5745; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
5746; EG-NEXT:     LSHR * T26.X, PV.W, literal.x,
5747; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5748  %load = load <16 x i16>, <16 x i16> addrspace(4)* %in
5749  %ext = zext <16 x i16> %load to <16 x i64>
5750  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
5751  ret void
5752}
5753
5754define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
5755; GCN-NOHSA-SI-LABEL: constant_sextload_v16i16_to_v16i64:
5756; GCN-NOHSA-SI:       ; %bb.0:
5757; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
5758; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5759; GCN-NOHSA-SI-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
5760; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
5761; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
5762; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5763; GCN-NOHSA-SI-NEXT:    s_mov_b32 s12, s11
5764; GCN-NOHSA-SI-NEXT:    s_mov_b32 s14, s9
5765; GCN-NOHSA-SI-NEXT:    s_mov_b32 s16, s7
5766; GCN-NOHSA-SI-NEXT:    s_mov_b32 s18, s5
5767; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s20, s10, 16
5768; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s22, s8, 16
5769; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s24, s6, 16
5770; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s26, s4, 16
5771; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[28:29], s[4:5], 0x100000
5772; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[30:31], s[6:7], 0x100000
5773; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[34:35], s[8:9], 0x100000
5774; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[36:37], s[10:11], 0x100000
5775; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[4:5], s[4:5], 48
5776; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[6:7], s[6:7], 48
5777; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[8:9], s[8:9], 48
5778; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[10:11], s[10:11], 48
5779; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x100000
5780; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x100000
5781; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x100000
5782; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x100000
5783; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x100000
5784; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x100000
5785; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x100000
5786; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x100000
5787; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s12
5788; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s13
5789; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s10
5790; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s11
5791; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
5792; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5793; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s14
5794; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s15
5795; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s8
5796; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s9
5797; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
5798; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5799; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s16
5800; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s17
5801; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s6
5802; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s7
5803; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
5804; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5805; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s18
5806; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
5807; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s4
5808; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s5
5809; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
5810; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5811; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s36
5812; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s37
5813; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s34
5814; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s35
5815; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s30
5816; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s31
5817; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s28
5818; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s29
5819; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s20
5820; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s21
5821; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
5822; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s22
5823; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s23
5824; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64
5825; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s24
5826; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s25
5827; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
5828; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s26
5829; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s27
5830; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0
5831; GCN-NOHSA-SI-NEXT:    s_endpgm
5832;
5833; GCN-HSA-LABEL: constant_sextload_v16i16_to_v16i64:
5834; GCN-HSA:       ; %bb.0:
5835; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5836; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5837; GCN-HSA-NEXT:    s_load_dwordx8 s[8:15], s[2:3], 0x0
5838; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5839; GCN-HSA-NEXT:    s_mov_b32 s6, s15
5840; GCN-HSA-NEXT:    s_mov_b32 s16, s13
5841; GCN-HSA-NEXT:    s_mov_b32 s18, s11
5842; GCN-HSA-NEXT:    s_mov_b32 s20, s9
5843; GCN-HSA-NEXT:    s_lshr_b32 s22, s14, 16
5844; GCN-HSA-NEXT:    s_lshr_b32 s24, s12, 16
5845; GCN-HSA-NEXT:    s_lshr_b32 s26, s10, 16
5846; GCN-HSA-NEXT:    s_lshr_b32 s28, s8, 16
5847; GCN-HSA-NEXT:    s_bfe_i64 s[34:35], s[14:15], 0x100000
5848; GCN-HSA-NEXT:    s_ashr_i64 s[14:15], s[14:15], 48
5849; GCN-HSA-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x100000
5850; GCN-HSA-NEXT:    s_bfe_i64 s[2:3], s[8:9], 0x100000
5851; GCN-HSA-NEXT:    s_bfe_i64 s[4:5], s[10:11], 0x100000
5852; GCN-HSA-NEXT:    s_bfe_i64 s[30:31], s[12:13], 0x100000
5853; GCN-HSA-NEXT:    s_ashr_i64 s[8:9], s[8:9], 48
5854; GCN-HSA-NEXT:    s_ashr_i64 s[10:11], s[10:11], 48
5855; GCN-HSA-NEXT:    s_ashr_i64 s[12:13], s[12:13], 48
5856; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
5857; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s7
5858; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s14
5859; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s15
5860; GCN-HSA-NEXT:    s_bfe_i64 s[6:7], s[28:29], 0x100000
5861; GCN-HSA-NEXT:    s_bfe_i64 s[14:15], s[26:27], 0x100000
5862; GCN-HSA-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x100000
5863; GCN-HSA-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x100000
5864; GCN-HSA-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x100000
5865; GCN-HSA-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x100000
5866; GCN-HSA-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x100000
5867; GCN-HSA-NEXT:    s_add_u32 s26, s0, 0x70
5868; GCN-HSA-NEXT:    s_addc_u32 s27, s1, 0
5869; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s26
5870; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s12
5871; GCN-HSA-NEXT:    s_add_u32 s12, s0, 0x50
5872; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s27
5873; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s13
5874; GCN-HSA-NEXT:    s_addc_u32 s13, s1, 0
5875; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s12
5876; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s16
5877; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s17
5878; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s13
5879; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
5880; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
5881; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s10
5882; GCN-HSA-NEXT:    s_add_u32 s10, s0, 48
5883; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s11
5884; GCN-HSA-NEXT:    s_addc_u32 s11, s1, 0
5885; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s10
5886; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s18
5887; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s19
5888; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s11
5889; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5890; GCN-HSA-NEXT:    s_nop 0
5891; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s8
5892; GCN-HSA-NEXT:    s_add_u32 s8, s0, 16
5893; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s9
5894; GCN-HSA-NEXT:    s_addc_u32 s9, s1, 0
5895; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s8
5896; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s20
5897; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s21
5898; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s9
5899; GCN-HSA-NEXT:    s_add_u32 s8, s0, 0x60
5900; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5901; GCN-HSA-NEXT:    s_addc_u32 s9, s1, 0
5902; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s8
5903; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s34
5904; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s35
5905; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s22
5906; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s23
5907; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s9
5908; GCN-HSA-NEXT:    s_add_u32 s8, s0, 64
5909; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5910; GCN-HSA-NEXT:    s_addc_u32 s9, s1, 0
5911; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s8
5912; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s30
5913; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s31
5914; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s24
5915; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s25
5916; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s9
5917; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5918; GCN-HSA-NEXT:    s_nop 0
5919; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
5920; GCN-HSA-NEXT:    s_add_u32 s4, s0, 32
5921; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
5922; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
5923; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
5924; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s14
5925; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s15
5926; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
5927; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5928; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
5929; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
5930; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
5931; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s6
5932; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s7
5933; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
5934; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5935; GCN-HSA-NEXT:    s_endpgm
5936;
5937; GCN-NOHSA-VI-LABEL: constant_sextload_v16i16_to_v16i64:
5938; GCN-NOHSA-VI:       ; %bb.0:
5939; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x24
5940; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5941; GCN-NOHSA-VI-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
5942; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, 0xf000
5943; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, -1
5944; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5945; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[30:31], s[6:7], 0x100000
5946; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s6, s6, 16
5947; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[34:35], s[6:7], 0x100000
5948; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, s7
5949; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[24:25], s[4:5], 0x100000
5950; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s4, s4, 16
5951; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[36:37], s[6:7], 0x100000
5952; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s6, s7, 16
5953; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[26:27], s[4:5], 0x100000
5954; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s5
5955; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x100000
5956; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[18:19], s[2:3], 0x100000
5957; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s2, s2, 16
5958; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[28:29], s[4:5], 0x100000
5959; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s4, s5, 16
5960; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s36
5961; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s37
5962; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s6
5963; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s7
5964; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[20:21], s[2:3], 0x100000
5965; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, s3
5966; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x100000
5967; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:112
5968; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[22:23], s[2:3], 0x100000
5969; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s30
5970; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s31
5971; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s34
5972; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s35
5973; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s2, s3, 16
5974; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:96
5975; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x100000
5976; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s28
5977; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s29
5978; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
5979; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s5
5980; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:80
5981; GCN-NOHSA-VI-NEXT:    s_mov_b32 s14, s1
5982; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s24
5983; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s25
5984; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s26
5985; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s27
5986; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s16, s1, 16
5987; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:64
5988; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[12:13], s[0:1], 0x100000
5989; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s22
5990; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s23
5991; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s2
5992; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s3
5993; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s0, s0, 16
5994; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x100000
5995; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x100000
5996; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:48
5997; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[0:1], s[0:1], 0x100000
5998; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s18
5999; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s19
6000; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s20
6001; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s21
6002; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:32
6003; GCN-NOHSA-VI-NEXT:    s_nop 0
6004; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s14
6005; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s15
6006; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s16
6007; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s17
6008; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16
6009; GCN-NOHSA-VI-NEXT:    s_nop 0
6010; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s12
6011; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s13
6012; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s0
6013; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s1
6014; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
6015; GCN-NOHSA-VI-NEXT:    s_endpgm
6016;
6017; EG-LABEL: constant_sextload_v16i16_to_v16i64:
6018; EG:       ; %bb.0:
6019; EG-NEXT:    ALU 0, @16, KC0[CB0:0-32], KC1[]
6020; EG-NEXT:    TEX 1 @12
6021; EG-NEXT:    ALU 65, @17, KC0[CB0:0-32], KC1[]
6022; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T12.X, 0
6023; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T20.X, 0
6024; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T18.X, 0
6025; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T17.X, 0
6026; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T16.X, 0
6027; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T15.X, 0
6028; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T14.X, 0
6029; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T13.X, 1
6030; EG-NEXT:    CF_END
6031; EG-NEXT:    Fetch clause starting at 12:
6032; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
6033; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
6034; EG-NEXT:    ALU clause starting at 16:
6035; EG-NEXT:     MOV * T11.X, KC0[2].Z,
6036; EG-NEXT:    ALU clause starting at 17:
6037; EG-NEXT:     LSHR T13.X, KC0[2].Y, literal.x,
6038; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6039; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6040; EG-NEXT:     LSHR T14.X, PV.W, literal.x,
6041; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6042; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
6043; EG-NEXT:     LSHR T15.X, PV.W, literal.x,
6044; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6045; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
6046; EG-NEXT:     LSHR T16.X, PV.W, literal.x,
6047; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6048; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
6049; EG-NEXT:     LSHR T17.X, PV.W, literal.x,
6050; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6051; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
6052; EG-NEXT:     LSHR T18.X, PV.W, literal.x,
6053; EG-NEXT:     ADD_INT T0.W, KC0[2].Y, literal.y,
6054; EG-NEXT:     ASHR * T19.W, T11.X, literal.z,
6055; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
6056; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6057; EG-NEXT:     LSHR T20.X, PV.W, literal.x,
6058; EG-NEXT:     ASHR T19.Z, T11.X, literal.y,
6059; EG-NEXT:     ASHR * T21.W, T11.Y, literal.z,
6060; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6061; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6062; EG-NEXT:     BFE_INT T19.X, T11.X, 0.0, literal.x,
6063; EG-NEXT:     ASHR T21.Z, T11.Y, literal.x,
6064; EG-NEXT:     ASHR * T22.W, T11.Z, literal.y,
6065; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6066; EG-NEXT:     BFE_INT T21.X, T11.Y, 0.0, literal.x,
6067; EG-NEXT:     ASHR T19.Y, PV.X, literal.y,
6068; EG-NEXT:     ASHR T22.Z, T11.Z, literal.x,
6069; EG-NEXT:     ASHR * T23.W, T11.W, literal.y,
6070; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6071; EG-NEXT:     BFE_INT T22.X, T11.Z, 0.0, literal.x,
6072; EG-NEXT:     ASHR T21.Y, PV.X, literal.y,
6073; EG-NEXT:     ASHR T23.Z, T11.W, literal.x,
6074; EG-NEXT:     ASHR * T24.W, T12.X, literal.y,
6075; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6076; EG-NEXT:     BFE_INT T23.X, T11.W, 0.0, literal.x,
6077; EG-NEXT:     ASHR T22.Y, PV.X, literal.y,
6078; EG-NEXT:     ASHR T24.Z, T12.X, literal.x,
6079; EG-NEXT:     ASHR * T11.W, T12.Y, literal.y,
6080; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6081; EG-NEXT:     BFE_INT T24.X, T12.X, 0.0, literal.x,
6082; EG-NEXT:     ASHR T23.Y, PV.X, literal.y,
6083; EG-NEXT:     ASHR T11.Z, T12.Y, literal.x,
6084; EG-NEXT:     ASHR * T25.W, T12.Z, literal.y,
6085; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6086; EG-NEXT:     BFE_INT T11.X, T12.Y, 0.0, literal.x,
6087; EG-NEXT:     ASHR T24.Y, PV.X, literal.y,
6088; EG-NEXT:     ASHR T25.Z, T12.Z, literal.x,
6089; EG-NEXT:     ASHR * T26.W, T12.W, literal.y,
6090; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6091; EG-NEXT:     BFE_INT T25.X, T12.Z, 0.0, literal.x,
6092; EG-NEXT:     ASHR T11.Y, PV.X, literal.y,
6093; EG-NEXT:     ASHR * T26.Z, T12.W, literal.x,
6094; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6095; EG-NEXT:     BFE_INT T26.X, T12.W, 0.0, literal.x,
6096; EG-NEXT:     ASHR T25.Y, PV.X, literal.y,
6097; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
6098; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6099; EG-NEXT:    112(1.569454e-43), 0(0.000000e+00)
6100; EG-NEXT:     LSHR T12.X, PV.W, literal.x,
6101; EG-NEXT:     ASHR * T26.Y, PV.X, literal.y,
6102; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
6103  %load = load <16 x i16>, <16 x i16> addrspace(4)* %in
6104  %ext = sext <16 x i16> %load to <16 x i64>
6105  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
6106  ret void
6107}
6108
6109define amdgpu_kernel void @constant_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
6110; GCN-NOHSA-SI-LABEL: constant_zextload_v32i16_to_v32i64:
6111; GCN-NOHSA-SI:       ; %bb.0:
6112; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x9
6113; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
6114; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
6115; GCN-NOHSA-SI-NEXT:    s_mov_b32 s18, 0xffff
6116; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
6117; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s19, s1, 16
6118; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s20, s3, 16
6119; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s21, s5, 16
6120; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s22, s7, 16
6121; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s23, s9, 16
6122; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s24, s11, 16
6123; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s25, s13, 16
6124; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s26, s15, 16
6125; GCN-NOHSA-SI-NEXT:    s_and_b32 s27, s0, s18
6126; GCN-NOHSA-SI-NEXT:    s_and_b32 s28, s2, s18
6127; GCN-NOHSA-SI-NEXT:    s_and_b32 s29, s4, s18
6128; GCN-NOHSA-SI-NEXT:    s_and_b32 s30, s6, s18
6129; GCN-NOHSA-SI-NEXT:    s_and_b32 s31, s8, s18
6130; GCN-NOHSA-SI-NEXT:    s_and_b32 s33, s10, s18
6131; GCN-NOHSA-SI-NEXT:    s_and_b32 s34, s12, s18
6132; GCN-NOHSA-SI-NEXT:    s_and_b32 s35, s14, s18
6133; GCN-NOHSA-SI-NEXT:    s_and_b32 s36, s1, s18
6134; GCN-NOHSA-SI-NEXT:    s_and_b32 s37, s3, s18
6135; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, s18
6136; GCN-NOHSA-SI-NEXT:    s_and_b32 s7, s7, s18
6137; GCN-NOHSA-SI-NEXT:    s_and_b32 s9, s9, s18
6138; GCN-NOHSA-SI-NEXT:    s_and_b32 s11, s11, s18
6139; GCN-NOHSA-SI-NEXT:    s_and_b32 s13, s13, s18
6140; GCN-NOHSA-SI-NEXT:    s_and_b32 s15, s15, s18
6141; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s14, s14, 16
6142; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s12, s12, 16
6143; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s10, s10, 16
6144; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s8, s8, 16
6145; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s6, s6, 16
6146; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s4, s4, 16
6147; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s18, s2, 16
6148; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s38, s0, 16
6149; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
6150; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
6151; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
6152; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v1
6153; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s16
6154; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s17
6155; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s15
6156; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s26
6157; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
6158; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6159; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s13
6160; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s25
6161; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
6162; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6163; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s11
6164; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s24
6165; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
6166; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6167; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s9
6168; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s23
6169; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
6170; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6171; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s7
6172; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s22
6173; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
6174; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6175; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
6176; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s21
6177; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
6178; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6179; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s37
6180; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s20
6181; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
6182; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6183; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s36
6184; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s19
6185; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
6186; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6187; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s35
6188; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s14
6189; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
6190; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6191; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s34
6192; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s12
6193; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
6194; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6195; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s33
6196; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s10
6197; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
6198; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6199; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s31
6200; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s8
6201; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
6202; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6203; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s30
6204; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s6
6205; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
6206; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6207; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s29
6208; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s4
6209; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
6210; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6211; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s28
6212; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s18
6213; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
6214; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6215; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s27
6216; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s38
6217; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
6218; GCN-NOHSA-SI-NEXT:    s_endpgm
6219;
6220; GCN-HSA-LABEL: constant_zextload_v32i16_to_v32i64:
6221; GCN-HSA:       ; %bb.0:
6222; GCN-HSA-NEXT:    s_load_dwordx4 s[16:19], s[4:5], 0x0
6223; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
6224; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v1
6225; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6226; GCN-HSA-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
6227; GCN-HSA-NEXT:    s_mov_b32 s18, 0xffff
6228; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6229; GCN-HSA-NEXT:    s_and_b32 s19, s0, s18
6230; GCN-HSA-NEXT:    s_and_b32 s20, s2, s18
6231; GCN-HSA-NEXT:    s_and_b32 s21, s4, s18
6232; GCN-HSA-NEXT:    s_and_b32 s22, s6, s18
6233; GCN-HSA-NEXT:    s_and_b32 s23, s8, s18
6234; GCN-HSA-NEXT:    s_and_b32 s24, s10, s18
6235; GCN-HSA-NEXT:    s_and_b32 s25, s12, s18
6236; GCN-HSA-NEXT:    s_and_b32 s26, s14, s18
6237; GCN-HSA-NEXT:    s_and_b32 s27, s1, s18
6238; GCN-HSA-NEXT:    s_and_b32 s28, s3, s18
6239; GCN-HSA-NEXT:    s_and_b32 s29, s5, s18
6240; GCN-HSA-NEXT:    s_and_b32 s30, s7, s18
6241; GCN-HSA-NEXT:    s_and_b32 s31, s9, s18
6242; GCN-HSA-NEXT:    s_and_b32 s33, s11, s18
6243; GCN-HSA-NEXT:    s_and_b32 s34, s13, s18
6244; GCN-HSA-NEXT:    s_and_b32 s18, s15, s18
6245; GCN-HSA-NEXT:    s_lshr_b32 s35, s1, 16
6246; GCN-HSA-NEXT:    s_lshr_b32 s3, s3, 16
6247; GCN-HSA-NEXT:    s_lshr_b32 s5, s5, 16
6248; GCN-HSA-NEXT:    s_lshr_b32 s7, s7, 16
6249; GCN-HSA-NEXT:    s_lshr_b32 s9, s9, 16
6250; GCN-HSA-NEXT:    s_lshr_b32 s11, s11, 16
6251; GCN-HSA-NEXT:    s_lshr_b32 s13, s13, 16
6252; GCN-HSA-NEXT:    s_lshr_b32 s15, s15, 16
6253; GCN-HSA-NEXT:    s_lshr_b32 s14, s14, 16
6254; GCN-HSA-NEXT:    s_lshr_b32 s12, s12, 16
6255; GCN-HSA-NEXT:    s_lshr_b32 s10, s10, 16
6256; GCN-HSA-NEXT:    s_lshr_b32 s8, s8, 16
6257; GCN-HSA-NEXT:    s_lshr_b32 s6, s6, 16
6258; GCN-HSA-NEXT:    s_lshr_b32 s4, s4, 16
6259; GCN-HSA-NEXT:    s_lshr_b32 s2, s2, 16
6260; GCN-HSA-NEXT:    s_lshr_b32 s36, s0, 16
6261; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0xf0
6262; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
6263; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
6264; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
6265; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0xd0
6266; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
6267; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s1
6268; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s0
6269; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0xb0
6270; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
6271; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s1
6272; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s0
6273; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0x90
6274; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
6275; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s1
6276; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s0
6277; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s18
6278; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s15
6279; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0x70
6280; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6281; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
6282; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s34
6283; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s13
6284; GCN-HSA-NEXT:    flat_store_dwordx4 v[6:7], v[0:3]
6285; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
6286; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s33
6287; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s11
6288; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
6289; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
6290; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s31
6291; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s9
6292; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0x50
6293; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[0:3]
6294; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
6295; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s30
6296; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s7
6297; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6298; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
6299; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
6300; GCN-HSA-NEXT:    s_add_u32 s0, s16, 48
6301; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s29
6302; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
6303; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
6304; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6305; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
6306; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
6307; GCN-HSA-NEXT:    s_add_u32 s0, s16, 16
6308; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s28
6309; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s3
6310; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
6311; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6312; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
6313; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
6314; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0xe0
6315; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s27
6316; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s35
6317; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
6318; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6319; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
6320; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
6321; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0xc0
6322; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s26
6323; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s14
6324; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
6325; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6326; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
6327; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
6328; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0xa0
6329; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s25
6330; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s12
6331; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
6332; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6333; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
6334; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
6335; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0x80
6336; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s24
6337; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s10
6338; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
6339; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6340; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
6341; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
6342; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0x60
6343; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s23
6344; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s8
6345; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
6346; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6347; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
6348; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
6349; GCN-HSA-NEXT:    s_add_u32 s0, s16, 64
6350; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s22
6351; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s6
6352; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
6353; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6354; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
6355; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
6356; GCN-HSA-NEXT:    s_add_u32 s0, s16, 32
6357; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s21
6358; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s4
6359; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
6360; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6361; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
6362; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s20
6363; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
6364; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
6365; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6366; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s16
6367; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s19
6368; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s36
6369; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s17
6370; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6371; GCN-HSA-NEXT:    s_endpgm
6372;
6373; GCN-NOHSA-VI-LABEL: constant_zextload_v32i16_to_v32i64:
6374; GCN-NOHSA-VI:       ; %bb.0:
6375; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[20:23], s[0:1], 0x24
6376; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
6377; GCN-NOHSA-VI-NEXT:    s_mov_b32 s19, 0xf000
6378; GCN-NOHSA-VI-NEXT:    s_mov_b32 s18, -1
6379; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, v1
6380; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
6381; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[0:15], s[22:23], 0x0
6382; GCN-NOHSA-VI-NEXT:    s_mov_b32 s22, 0xffff
6383; GCN-NOHSA-VI-NEXT:    s_mov_b32 s16, s20
6384; GCN-NOHSA-VI-NEXT:    s_mov_b32 s17, s21
6385; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
6386; GCN-NOHSA-VI-NEXT:    s_and_b32 s20, s0, s22
6387; GCN-NOHSA-VI-NEXT:    s_and_b32 s21, s1, s22
6388; GCN-NOHSA-VI-NEXT:    s_and_b32 s23, s2, s22
6389; GCN-NOHSA-VI-NEXT:    s_and_b32 s24, s3, s22
6390; GCN-NOHSA-VI-NEXT:    s_and_b32 s25, s4, s22
6391; GCN-NOHSA-VI-NEXT:    s_and_b32 s26, s5, s22
6392; GCN-NOHSA-VI-NEXT:    s_and_b32 s27, s6, s22
6393; GCN-NOHSA-VI-NEXT:    s_and_b32 s28, s7, s22
6394; GCN-NOHSA-VI-NEXT:    s_and_b32 s29, s8, s22
6395; GCN-NOHSA-VI-NEXT:    s_and_b32 s30, s9, s22
6396; GCN-NOHSA-VI-NEXT:    s_and_b32 s31, s10, s22
6397; GCN-NOHSA-VI-NEXT:    s_and_b32 s33, s11, s22
6398; GCN-NOHSA-VI-NEXT:    s_and_b32 s34, s12, s22
6399; GCN-NOHSA-VI-NEXT:    s_and_b32 s35, s13, s22
6400; GCN-NOHSA-VI-NEXT:    s_and_b32 s36, s14, s22
6401; GCN-NOHSA-VI-NEXT:    s_and_b32 s22, s15, s22
6402; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s15, s15, 16
6403; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s14, s14, 16
6404; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s22
6405; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s15
6406; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s13, s13, 16
6407; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:240
6408; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s12, s12, 16
6409; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s36
6410; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s14
6411; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:224
6412; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s11, s11, 16
6413; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s35
6414; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s13
6415; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:208
6416; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s10, s10, 16
6417; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s34
6418; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s12
6419; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:192
6420; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s9, s9, 16
6421; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s33
6422; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
6423; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:176
6424; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s8, s8, 16
6425; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s31
6426; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s10
6427; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:160
6428; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s7, s7, 16
6429; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s30
6430; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
6431; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:144
6432; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s6, s6, 16
6433; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s29
6434; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s8
6435; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:128
6436; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s5, s5, 16
6437; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s28
6438; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
6439; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:112
6440; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s4, s4, 16
6441; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s27
6442; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s6
6443; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:96
6444; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s3, s3, 16
6445; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s26
6446; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
6447; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:80
6448; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s2, s2, 16
6449; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s25
6450; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
6451; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:64
6452; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s1, s1, 16
6453; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s24
6454; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s3
6455; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:48
6456; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s0, s0, 16
6457; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s23
6458; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s2
6459; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:32
6460; GCN-NOHSA-VI-NEXT:    s_nop 0
6461; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s21
6462; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s1
6463; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:16
6464; GCN-NOHSA-VI-NEXT:    s_nop 0
6465; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s20
6466; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s0
6467; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
6468; GCN-NOHSA-VI-NEXT:    s_endpgm
6469;
6470; EG-LABEL: constant_zextload_v32i16_to_v32i64:
6471; EG:       ; %bb.0:
6472; EG-NEXT:    ALU 0, @30, KC0[CB0:0-32], KC1[]
6473; EG-NEXT:    TEX 2 @22
6474; EG-NEXT:    ALU 33, @31, KC0[], KC1[]
6475; EG-NEXT:    TEX 0 @28
6476; EG-NEXT:    ALU 92, @65, KC0[CB0:0-32], KC1[]
6477; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T50.X, 0
6478; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T49.X, 0
6479; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T48.X, 0
6480; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T47.X, 0
6481; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T46.X, 0
6482; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T45.X, 0
6483; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T44.X, 0
6484; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T43.X, 0
6485; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T42.X, 0
6486; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T41.X, 0
6487; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T40.X, 0
6488; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T39.X, 0
6489; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T38.X, 0
6490; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T37.X, 0
6491; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T36.X, 0
6492; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T35.X, 1
6493; EG-NEXT:    CF_END
6494; EG-NEXT:    Fetch clause starting at 22:
6495; EG-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 48, #1
6496; EG-NEXT:     VTX_READ_128 T21.XYZW, T19.X, 16, #1
6497; EG-NEXT:     VTX_READ_128 T22.XYZW, T19.X, 32, #1
6498; EG-NEXT:    Fetch clause starting at 28:
6499; EG-NEXT:     VTX_READ_128 T29.XYZW, T19.X, 0, #1
6500; EG-NEXT:    ALU clause starting at 30:
6501; EG-NEXT:     MOV * T19.X, KC0[2].Z,
6502; EG-NEXT:    ALU clause starting at 31:
6503; EG-NEXT:     LSHR * T23.Z, T20.W, literal.x,
6504; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6505; EG-NEXT:     AND_INT T23.X, T20.W, literal.x,
6506; EG-NEXT:     MOV T23.Y, 0.0,
6507; EG-NEXT:     LSHR T24.Z, T20.Z, literal.y,
6508; EG-NEXT:     AND_INT * T24.X, T20.Z, literal.x,
6509; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6510; EG-NEXT:     MOV T24.Y, 0.0,
6511; EG-NEXT:     LSHR * T25.Z, T20.Y, literal.x,
6512; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6513; EG-NEXT:     AND_INT T25.X, T20.Y, literal.x,
6514; EG-NEXT:     MOV T25.Y, 0.0,
6515; EG-NEXT:     LSHR T20.Z, T20.X, literal.y,
6516; EG-NEXT:     AND_INT * T20.X, T20.X, literal.x,
6517; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6518; EG-NEXT:     MOV T20.Y, 0.0,
6519; EG-NEXT:     LSHR * T26.Z, T22.W, literal.x,
6520; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6521; EG-NEXT:     AND_INT T26.X, T22.W, literal.x,
6522; EG-NEXT:     MOV T26.Y, 0.0,
6523; EG-NEXT:     LSHR T27.Z, T22.Z, literal.y,
6524; EG-NEXT:     AND_INT * T27.X, T22.Z, literal.x,
6525; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6526; EG-NEXT:     MOV T27.Y, 0.0,
6527; EG-NEXT:     LSHR * T28.Z, T22.Y, literal.x,
6528; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6529; EG-NEXT:     AND_INT T28.X, T22.Y, literal.x,
6530; EG-NEXT:     MOV T28.Y, 0.0,
6531; EG-NEXT:     LSHR T22.Z, T22.X, literal.y,
6532; EG-NEXT:     AND_INT * T22.X, T22.X, literal.x,
6533; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6534; EG-NEXT:     MOV T22.Y, 0.0,
6535; EG-NEXT:     LSHR * T19.Z, T21.W, literal.x,
6536; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6537; EG-NEXT:    ALU clause starting at 65:
6538; EG-NEXT:     AND_INT T19.X, T21.W, literal.x,
6539; EG-NEXT:     MOV T19.Y, 0.0,
6540; EG-NEXT:     LSHR T30.Z, T21.Z, literal.y,
6541; EG-NEXT:     AND_INT * T30.X, T21.Z, literal.x,
6542; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6543; EG-NEXT:     MOV T30.Y, 0.0,
6544; EG-NEXT:     LSHR * T31.Z, T21.Y, literal.x,
6545; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6546; EG-NEXT:     AND_INT T31.X, T21.Y, literal.x,
6547; EG-NEXT:     MOV T31.Y, 0.0,
6548; EG-NEXT:     LSHR T21.Z, T21.X, literal.y,
6549; EG-NEXT:     AND_INT * T21.X, T21.X, literal.x,
6550; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6551; EG-NEXT:     MOV T21.Y, 0.0,
6552; EG-NEXT:     LSHR * T32.Z, T29.W, literal.x,
6553; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6554; EG-NEXT:     AND_INT T32.X, T29.W, literal.x,
6555; EG-NEXT:     MOV T32.Y, 0.0,
6556; EG-NEXT:     LSHR T33.Z, T29.Z, literal.y,
6557; EG-NEXT:     AND_INT * T33.X, T29.Z, literal.x,
6558; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6559; EG-NEXT:     MOV T33.Y, 0.0,
6560; EG-NEXT:     LSHR * T34.Z, T29.Y, literal.x,
6561; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6562; EG-NEXT:     AND_INT T34.X, T29.Y, literal.x,
6563; EG-NEXT:     MOV T34.Y, 0.0,
6564; EG-NEXT:     LSHR T29.Z, T29.X, literal.y,
6565; EG-NEXT:     AND_INT * T29.X, T29.X, literal.x,
6566; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6567; EG-NEXT:     MOV T29.Y, 0.0,
6568; EG-NEXT:     MOV T23.W, 0.0,
6569; EG-NEXT:     MOV * T24.W, 0.0,
6570; EG-NEXT:     MOV T25.W, 0.0,
6571; EG-NEXT:     MOV * T20.W, 0.0,
6572; EG-NEXT:     MOV T26.W, 0.0,
6573; EG-NEXT:     MOV * T27.W, 0.0,
6574; EG-NEXT:     MOV T28.W, 0.0,
6575; EG-NEXT:     MOV * T22.W, 0.0,
6576; EG-NEXT:     MOV T19.W, 0.0,
6577; EG-NEXT:     MOV * T30.W, 0.0,
6578; EG-NEXT:     MOV T31.W, 0.0,
6579; EG-NEXT:     MOV * T21.W, 0.0,
6580; EG-NEXT:     MOV T32.W, 0.0,
6581; EG-NEXT:     MOV * T33.W, 0.0,
6582; EG-NEXT:     MOV T34.W, 0.0,
6583; EG-NEXT:     MOV * T29.W, 0.0,
6584; EG-NEXT:     LSHR T35.X, KC0[2].Y, literal.x,
6585; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6586; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6587; EG-NEXT:     LSHR T36.X, PV.W, literal.x,
6588; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6589; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
6590; EG-NEXT:     LSHR T37.X, PV.W, literal.x,
6591; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6592; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
6593; EG-NEXT:     LSHR T38.X, PV.W, literal.x,
6594; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6595; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
6596; EG-NEXT:     LSHR T39.X, PV.W, literal.x,
6597; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6598; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
6599; EG-NEXT:     LSHR T40.X, PV.W, literal.x,
6600; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6601; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
6602; EG-NEXT:     LSHR T41.X, PV.W, literal.x,
6603; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6604; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
6605; EG-NEXT:     LSHR T42.X, PV.W, literal.x,
6606; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6607; EG-NEXT:    2(2.802597e-45), 128(1.793662e-43)
6608; EG-NEXT:     LSHR T43.X, PV.W, literal.x,
6609; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6610; EG-NEXT:    2(2.802597e-45), 144(2.017870e-43)
6611; EG-NEXT:     LSHR T44.X, PV.W, literal.x,
6612; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6613; EG-NEXT:    2(2.802597e-45), 160(2.242078e-43)
6614; EG-NEXT:     LSHR T45.X, PV.W, literal.x,
6615; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6616; EG-NEXT:    2(2.802597e-45), 176(2.466285e-43)
6617; EG-NEXT:     LSHR T46.X, PV.W, literal.x,
6618; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6619; EG-NEXT:    2(2.802597e-45), 192(2.690493e-43)
6620; EG-NEXT:     LSHR T47.X, PV.W, literal.x,
6621; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6622; EG-NEXT:    2(2.802597e-45), 208(2.914701e-43)
6623; EG-NEXT:     LSHR T48.X, PV.W, literal.x,
6624; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6625; EG-NEXT:    2(2.802597e-45), 224(3.138909e-43)
6626; EG-NEXT:     LSHR T49.X, PV.W, literal.x,
6627; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6628; EG-NEXT:    2(2.802597e-45), 240(3.363116e-43)
6629; EG-NEXT:     LSHR * T50.X, PV.W, literal.x,
6630; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
6631  %load = load <32 x i16>, <32 x i16> addrspace(4)* %in
6632  %ext = zext <32 x i16> %load to <32 x i64>
6633  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
6634  ret void
6635}
6636
6637define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
6638; GCN-NOHSA-SI-LABEL: constant_sextload_v32i16_to_v32i64:
6639; GCN-NOHSA-SI:       ; %bb.0:
6640; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x9
6641; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
6642; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
6643; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
6644; GCN-NOHSA-SI-NEXT:    s_mov_b32 s18, s15
6645; GCN-NOHSA-SI-NEXT:    s_mov_b32 s20, s13
6646; GCN-NOHSA-SI-NEXT:    s_mov_b32 s36, s11
6647; GCN-NOHSA-SI-NEXT:    s_mov_b32 s40, s9
6648; GCN-NOHSA-SI-NEXT:    s_mov_b32 s44, s7
6649; GCN-NOHSA-SI-NEXT:    s_mov_b32 s46, s5
6650; GCN-NOHSA-SI-NEXT:    s_mov_b32 s38, s3
6651; GCN-NOHSA-SI-NEXT:    s_mov_b32 s42, s1
6652; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s24, s14, 16
6653; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s26, s12, 16
6654; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s28, s10, 16
6655; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s34, s8, 16
6656; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[48:49], s[20:21], 0x100000
6657; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[50:51], s[18:19], 0x100000
6658; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s52, s6, 16
6659; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s54, s4, 16
6660; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s56, s2, 16
6661; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s58, s0, 16
6662; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[18:19], s[0:1], 0x100000
6663; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[20:21], s[2:3], 0x100000
6664; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[22:23], s[4:5], 0x100000
6665; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[30:31], s[6:7], 0x100000
6666; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[60:61], s[8:9], 0x100000
6667; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[62:63], s[10:11], 0x100000
6668; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[64:65], s[12:13], 0x100000
6669; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[66:67], s[14:15], 0x100000
6670; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[68:69], s[0:1], 48
6671; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[70:71], s[2:3], 48
6672; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[6:7], s[6:7], 48
6673; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[8:9], s[8:9], 48
6674; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[10:11], s[10:11], 48
6675; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[2:3], s[12:13], 48
6676; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[12:13], s[14:15], 48
6677; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[4:5], s[4:5], 48
6678; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s16
6679; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s17
6680; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s50
6681; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s51
6682; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s12
6683; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s13
6684; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s48
6685; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s49
6686; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s2
6687; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s3
6688; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
6689; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
6690; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[12:13], s[46:47], 0x100000
6691; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[14:15], s[44:45], 0x100000
6692; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[16:17], s[40:41], 0x100000
6693; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[36:37], s[36:37], 0x100000
6694; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[40:41], s[42:43], 0x100000
6695; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[38:39], s[38:39], 0x100000
6696; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s36
6697; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s37
6698; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s10
6699; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s11
6700; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s16
6701; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s17
6702; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s8
6703; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s9
6704; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s14
6705; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s15
6706; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, s6
6707; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, s7
6708; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v20, s12
6709; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v21, s13
6710; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v22, s4
6711; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v23, s5
6712; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
6713; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[4:5], s[58:59], 0x100000
6714; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[6:7], s[56:57], 0x100000
6715; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[8:9], s[54:55], 0x100000
6716; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[10:11], s[52:53], 0x100000
6717; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[12:13], s[34:35], 0x100000
6718; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[14:15], s[28:29], 0x100000
6719; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[16:17], s[26:27], 0x100000
6720; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x100000
6721; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:208
6722; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:176
6723; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:144
6724; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:112
6725; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:80
6726; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(5)
6727; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s38
6728; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s39
6729; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s70
6730; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s71
6731; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
6732; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6733; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s40
6734; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s41
6735; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s68
6736; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s69
6737; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
6738; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6739; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s66
6740; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s67
6741; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s64
6742; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s65
6743; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s62
6744; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s63
6745; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s60
6746; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s61
6747; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s30
6748; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s31
6749; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v20, s22
6750; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v21, s23
6751; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v24, s20
6752; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v25, s21
6753; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s24
6754; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s25
6755; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
6756; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6757; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s18
6758; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
6759; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s16
6760; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s17
6761; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:192
6762; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s14
6763; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s15
6764; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:160
6765; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s12
6766; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s13
6767; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:128
6768; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, s10
6769; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, s11
6770; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:96
6771; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v22, s8
6772; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v23, s9
6773; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:64
6774; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v26, s6
6775; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v27, s7
6776; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:32
6777; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s4
6778; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s5
6779; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
6780; GCN-NOHSA-SI-NEXT:    s_endpgm
6781;
6782; GCN-HSA-LABEL: constant_sextload_v32i16_to_v32i64:
6783; GCN-HSA:       ; %bb.0:
6784; GCN-HSA-NEXT:    s_load_dwordx4 s[16:19], s[4:5], 0x0
6785; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6786; GCN-HSA-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
6787; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6788; GCN-HSA-NEXT:    s_mov_b32 s42, s15
6789; GCN-HSA-NEXT:    s_mov_b32 s44, s13
6790; GCN-HSA-NEXT:    s_mov_b32 s46, s11
6791; GCN-HSA-NEXT:    s_mov_b32 s48, s9
6792; GCN-HSA-NEXT:    s_mov_b32 s50, s7
6793; GCN-HSA-NEXT:    s_mov_b32 s52, s5
6794; GCN-HSA-NEXT:    s_mov_b32 s54, s3
6795; GCN-HSA-NEXT:    s_mov_b32 s56, s1
6796; GCN-HSA-NEXT:    s_lshr_b32 s58, s14, 16
6797; GCN-HSA-NEXT:    s_lshr_b32 s60, s12, 16
6798; GCN-HSA-NEXT:    s_lshr_b32 s62, s10, 16
6799; GCN-HSA-NEXT:    s_lshr_b32 s64, s8, 16
6800; GCN-HSA-NEXT:    s_lshr_b32 s66, s6, 16
6801; GCN-HSA-NEXT:    s_lshr_b32 s68, s4, 16
6802; GCN-HSA-NEXT:    s_lshr_b32 s70, s2, 16
6803; GCN-HSA-NEXT:    s_lshr_b32 s72, s0, 16
6804; GCN-HSA-NEXT:    s_bfe_i64 s[18:19], s[0:1], 0x100000
6805; GCN-HSA-NEXT:    s_bfe_i64 s[20:21], s[2:3], 0x100000
6806; GCN-HSA-NEXT:    s_ashr_i64 s[36:37], s[0:1], 48
6807; GCN-HSA-NEXT:    s_ashr_i64 s[38:39], s[2:3], 48
6808; GCN-HSA-NEXT:    s_ashr_i64 s[0:1], s[14:15], 48
6809; GCN-HSA-NEXT:    s_bfe_i64 s[2:3], s[42:43], 0x100000
6810; GCN-HSA-NEXT:    s_bfe_i64 s[22:23], s[4:5], 0x100000
6811; GCN-HSA-NEXT:    s_bfe_i64 s[24:25], s[6:7], 0x100000
6812; GCN-HSA-NEXT:    s_bfe_i64 s[26:27], s[8:9], 0x100000
6813; GCN-HSA-NEXT:    s_bfe_i64 s[28:29], s[10:11], 0x100000
6814; GCN-HSA-NEXT:    s_bfe_i64 s[30:31], s[12:13], 0x100000
6815; GCN-HSA-NEXT:    s_bfe_i64 s[34:35], s[14:15], 0x100000
6816; GCN-HSA-NEXT:    s_ashr_i64 s[40:41], s[4:5], 48
6817; GCN-HSA-NEXT:    s_ashr_i64 s[74:75], s[6:7], 48
6818; GCN-HSA-NEXT:    s_ashr_i64 s[76:77], s[8:9], 48
6819; GCN-HSA-NEXT:    s_ashr_i64 s[78:79], s[10:11], 48
6820; GCN-HSA-NEXT:    s_ashr_i64 s[80:81], s[12:13], 48
6821; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
6822; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
6823; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
6824; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
6825; GCN-HSA-NEXT:    s_bfe_i64 s[0:1], s[72:73], 0x100000
6826; GCN-HSA-NEXT:    s_bfe_i64 s[2:3], s[70:71], 0x100000
6827; GCN-HSA-NEXT:    s_bfe_i64 s[4:5], s[68:69], 0x100000
6828; GCN-HSA-NEXT:    s_bfe_i64 s[6:7], s[66:67], 0x100000
6829; GCN-HSA-NEXT:    s_bfe_i64 s[8:9], s[64:65], 0x100000
6830; GCN-HSA-NEXT:    s_bfe_i64 s[10:11], s[62:63], 0x100000
6831; GCN-HSA-NEXT:    s_bfe_i64 s[12:13], s[60:61], 0x100000
6832; GCN-HSA-NEXT:    s_bfe_i64 s[14:15], s[58:59], 0x100000
6833; GCN-HSA-NEXT:    s_bfe_i64 s[42:43], s[56:57], 0x100000
6834; GCN-HSA-NEXT:    s_bfe_i64 s[54:55], s[54:55], 0x100000
6835; GCN-HSA-NEXT:    s_bfe_i64 s[52:53], s[52:53], 0x100000
6836; GCN-HSA-NEXT:    s_bfe_i64 s[50:51], s[50:51], 0x100000
6837; GCN-HSA-NEXT:    s_bfe_i64 s[48:49], s[48:49], 0x100000
6838; GCN-HSA-NEXT:    s_bfe_i64 s[46:47], s[46:47], 0x100000
6839; GCN-HSA-NEXT:    s_bfe_i64 s[44:45], s[44:45], 0x100000
6840; GCN-HSA-NEXT:    s_add_u32 s56, s16, 0xf0
6841; GCN-HSA-NEXT:    s_addc_u32 s57, s17, 0
6842; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s44
6843; GCN-HSA-NEXT:    s_add_u32 s44, s16, 0xd0
6844; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s45
6845; GCN-HSA-NEXT:    s_addc_u32 s45, s17, 0
6846; GCN-HSA-NEXT:    v_mov_b32_e32 v24, s44
6847; GCN-HSA-NEXT:    v_mov_b32_e32 v25, s45
6848; GCN-HSA-NEXT:    s_add_u32 s44, s16, 0xb0
6849; GCN-HSA-NEXT:    s_addc_u32 s45, s17, 0
6850; GCN-HSA-NEXT:    v_mov_b32_e32 v26, s44
6851; GCN-HSA-NEXT:    v_mov_b32_e32 v27, s45
6852; GCN-HSA-NEXT:    s_add_u32 s44, s16, 0x90
6853; GCN-HSA-NEXT:    s_addc_u32 s45, s17, 0
6854; GCN-HSA-NEXT:    v_mov_b32_e32 v28, s44
6855; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s56
6856; GCN-HSA-NEXT:    v_mov_b32_e32 v29, s45
6857; GCN-HSA-NEXT:    s_add_u32 s44, s16, 0x70
6858; GCN-HSA-NEXT:    v_mov_b32_e32 v23, s57
6859; GCN-HSA-NEXT:    s_addc_u32 s45, s17, 0
6860; GCN-HSA-NEXT:    flat_store_dwordx4 v[22:23], v[0:3]
6861; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s40
6862; GCN-HSA-NEXT:    s_add_u32 s40, s16, 0x50
6863; GCN-HSA-NEXT:    v_mov_b32_e32 v23, s41
6864; GCN-HSA-NEXT:    s_addc_u32 s41, s17, 0
6865; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s80
6866; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s81
6867; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s38
6868; GCN-HSA-NEXT:    s_add_u32 s38, s16, 48
6869; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s39
6870; GCN-HSA-NEXT:    flat_store_dwordx4 v[24:25], v[4:7]
6871; GCN-HSA-NEXT:    s_addc_u32 s39, s17, 0
6872; GCN-HSA-NEXT:    v_mov_b32_e32 v24, s38
6873; GCN-HSA-NEXT:    v_mov_b32_e32 v25, s39
6874; GCN-HSA-NEXT:    s_add_u32 s38, s16, 16
6875; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s46
6876; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s47
6877; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s78
6878; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s79
6879; GCN-HSA-NEXT:    s_addc_u32 s39, s17, 0
6880; GCN-HSA-NEXT:    flat_store_dwordx4 v[26:27], v[8:11]
6881; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s48
6882; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s14
6883; GCN-HSA-NEXT:    s_add_u32 s14, s16, 0xe0
6884; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s49
6885; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s76
6886; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s77
6887; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s15
6888; GCN-HSA-NEXT:    s_addc_u32 s15, s17, 0
6889; GCN-HSA-NEXT:    v_mov_b32_e32 v30, s44
6890; GCN-HSA-NEXT:    flat_store_dwordx4 v[28:29], v[12:15]
6891; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s50
6892; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s12
6893; GCN-HSA-NEXT:    s_add_u32 s12, s16, 0xc0
6894; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s51
6895; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s74
6896; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s75
6897; GCN-HSA-NEXT:    v_mov_b32_e32 v31, s45
6898; GCN-HSA-NEXT:    v_mov_b32_e32 v32, s40
6899; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s13
6900; GCN-HSA-NEXT:    s_addc_u32 s13, s17, 0
6901; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s52
6902; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s53
6903; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s54
6904; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s55
6905; GCN-HSA-NEXT:    v_mov_b32_e32 v33, s41
6906; GCN-HSA-NEXT:    v_mov_b32_e32 v34, s38
6907; GCN-HSA-NEXT:    flat_store_dwordx4 v[30:31], v[16:19]
6908; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s42
6909; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s15
6910; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s13
6911; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s43
6912; GCN-HSA-NEXT:    v_mov_b32_e32 v35, s39
6913; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s36
6914; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s37
6915; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s34
6916; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s35
6917; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s30
6918; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s31
6919; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s14
6920; GCN-HSA-NEXT:    flat_store_dwordx4 v[32:33], v[20:23]
6921; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s12
6922; GCN-HSA-NEXT:    flat_store_dwordx4 v[24:25], v[0:3]
6923; GCN-HSA-NEXT:    flat_store_dwordx4 v[34:35], v[4:7]
6924; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
6925; GCN-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[12:15]
6926; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s10
6927; GCN-HSA-NEXT:    s_add_u32 s10, s16, 0xa0
6928; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s11
6929; GCN-HSA-NEXT:    s_addc_u32 s11, s17, 0
6930; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s10
6931; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s28
6932; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s29
6933; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s11
6934; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6935; GCN-HSA-NEXT:    s_nop 0
6936; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s8
6937; GCN-HSA-NEXT:    s_add_u32 s8, s16, 0x80
6938; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s9
6939; GCN-HSA-NEXT:    s_addc_u32 s9, s17, 0
6940; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s8
6941; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s26
6942; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s27
6943; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s9
6944; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6945; GCN-HSA-NEXT:    s_nop 0
6946; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s6
6947; GCN-HSA-NEXT:    s_add_u32 s6, s16, 0x60
6948; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s7
6949; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
6950; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s6
6951; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s24
6952; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s25
6953; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s7
6954; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6955; GCN-HSA-NEXT:    s_nop 0
6956; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s4
6957; GCN-HSA-NEXT:    s_add_u32 s4, s16, 64
6958; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s5
6959; GCN-HSA-NEXT:    s_addc_u32 s5, s17, 0
6960; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
6961; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s22
6962; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s23
6963; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
6964; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6965; GCN-HSA-NEXT:    s_nop 0
6966; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
6967; GCN-HSA-NEXT:    s_add_u32 s2, s16, 32
6968; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
6969; GCN-HSA-NEXT:    s_addc_u32 s3, s17, 0
6970; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6971; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s20
6972; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s21
6973; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6974; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6975; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s16
6976; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s18
6977; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s19
6978; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
6979; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
6980; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s17
6981; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6982; GCN-HSA-NEXT:    s_endpgm
6983;
6984; GCN-NOHSA-VI-LABEL: constant_sextload_v32i16_to_v32i64:
6985; GCN-NOHSA-VI:       ; %bb.0:
6986; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x24
6987; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
6988; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
6989; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
6990; GCN-NOHSA-VI-NEXT:    s_mov_b32 s30, s1
6991; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s34, s1, 16
6992; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s46, s5, 16
6993; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s48, s6, 16
6994; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s62, s10, 16
6995; GCN-NOHSA-VI-NEXT:    s_mov_b32 s64, s11
6996; GCN-NOHSA-VI-NEXT:    s_mov_b32 s80, s15
6997; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s82, s15, 16
6998; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s28, s0, 16
6999; GCN-NOHSA-VI-NEXT:    s_mov_b32 s44, s5
7000; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[24:25], s[6:7], 0x100000
7001; GCN-NOHSA-VI-NEXT:    s_mov_b32 s50, s7
7002; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s52, s7, 16
7003; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[26:27], s[8:9], 0x100000
7004; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s54, s8, 16
7005; GCN-NOHSA-VI-NEXT:    s_mov_b32 s56, s9
7006; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s58, s9, 16
7007; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s78, s14, 16
7008; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[8:9], s[30:31], 0x100000
7009; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[6:7], s[34:35], 0x100000
7010; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[30:31], s[46:47], 0x100000
7011; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[34:35], s[48:49], 0x100000
7012; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[46:47], s[62:63], 0x100000
7013; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[48:49], s[64:65], 0x100000
7014; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[62:63], s[80:81], 0x100000
7015; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[64:65], s[82:83], 0x100000
7016; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[18:19], s[0:1], 0x100000
7017; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[20:21], s[2:3], 0x100000
7018; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s36, s2, 16
7019; GCN-NOHSA-VI-NEXT:    s_mov_b32 s38, s3
7020; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s40, s3, 16
7021; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[22:23], s[4:5], 0x100000
7022; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s42, s4, 16
7023; GCN-NOHSA-VI-NEXT:    s_mov_b32 s72, s13
7024; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s74, s13, 16
7025; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[76:77], s[14:15], 0x100000
7026; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
7027; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
7028; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s16
7029; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s17
7030; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[4:5], s[28:29], 0x100000
7031; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[28:29], s[44:45], 0x100000
7032; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[44:45], s[58:59], 0x100000
7033; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[58:59], s[78:79], 0x100000
7034; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s62
7035; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s63
7036; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s64
7037; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s65
7038; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[68:69], s[12:13], 0x100000
7039; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s70, s12, 16
7040; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[12:13], s[40:41], 0x100000
7041; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[16:17], s[42:43], 0x100000
7042; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[40:41], s[54:55], 0x100000
7043; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[42:43], s[56:57], 0x100000
7044; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[54:55], s[72:73], 0x100000
7045; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[56:57], s[74:75], 0x100000
7046; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
7047; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s66, s11, 16
7048; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s76
7049; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s77
7050; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s58
7051; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s59
7052; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[14:15], s[38:39], 0x100000
7053; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[38:39], s[52:53], 0x100000
7054; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[52:53], s[70:71], 0x100000
7055; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
7056; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[60:61], s[10:11], 0x100000
7057; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s54
7058; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s55
7059; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s56
7060; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s57
7061; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[10:11], s[36:37], 0x100000
7062; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[36:37], s[50:51], 0x100000
7063; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[50:51], s[66:67], 0x100000
7064; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
7065; GCN-NOHSA-VI-NEXT:    s_nop 0
7066; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s68
7067; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s69
7068; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s52
7069; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s53
7070; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
7071; GCN-NOHSA-VI-NEXT:    s_nop 0
7072; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s48
7073; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s49
7074; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s50
7075; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s51
7076; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
7077; GCN-NOHSA-VI-NEXT:    s_nop 0
7078; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s60
7079; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s61
7080; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s46
7081; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s47
7082; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
7083; GCN-NOHSA-VI-NEXT:    s_nop 0
7084; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s42
7085; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s43
7086; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s44
7087; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s45
7088; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
7089; GCN-NOHSA-VI-NEXT:    s_nop 0
7090; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s26
7091; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s27
7092; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s40
7093; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s41
7094; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
7095; GCN-NOHSA-VI-NEXT:    s_nop 0
7096; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s36
7097; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s37
7098; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s38
7099; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s39
7100; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
7101; GCN-NOHSA-VI-NEXT:    s_nop 0
7102; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s24
7103; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s25
7104; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s34
7105; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s35
7106; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
7107; GCN-NOHSA-VI-NEXT:    s_nop 0
7108; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s28
7109; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s29
7110; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s30
7111; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s31
7112; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
7113; GCN-NOHSA-VI-NEXT:    s_nop 0
7114; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s22
7115; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s23
7116; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s16
7117; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s17
7118; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
7119; GCN-NOHSA-VI-NEXT:    s_nop 0
7120; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s14
7121; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s15
7122; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s12
7123; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s13
7124; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
7125; GCN-NOHSA-VI-NEXT:    s_nop 0
7126; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s20
7127; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s21
7128; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s10
7129; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s11
7130; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
7131; GCN-NOHSA-VI-NEXT:    s_nop 0
7132; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
7133; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s9
7134; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s6
7135; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s7
7136; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
7137; GCN-NOHSA-VI-NEXT:    s_nop 0
7138; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s18
7139; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s19
7140; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
7141; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s5
7142; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
7143; GCN-NOHSA-VI-NEXT:    s_endpgm
7144;
7145; EG-LABEL: constant_sextload_v32i16_to_v32i64:
7146; EG:       ; %bb.0:
7147; EG-NEXT:    ALU 0, @30, KC0[CB0:0-32], KC1[]
7148; EG-NEXT:    TEX 0 @22
7149; EG-NEXT:    ALU 55, @31, KC0[CB0:0-32], KC1[]
7150; EG-NEXT:    TEX 2 @24
7151; EG-NEXT:    ALU 74, @87, KC0[CB0:0-32], KC1[]
7152; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T38.X, 0
7153; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T36.X, 0
7154; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T34.X, 0
7155; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T33.X, 0
7156; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T32.X, 0
7157; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T31.X, 0
7158; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T30.X, 0
7159; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T29.X, 0
7160; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T28.X, 0
7161; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T27.X, 0
7162; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T26.X, 0
7163; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T25.X, 0
7164; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T24.X, 0
7165; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T23.X, 0
7166; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T22.X, 0
7167; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T21.X, 1
7168; EG-NEXT:    CF_END
7169; EG-NEXT:    Fetch clause starting at 22:
7170; EG-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 0, #1
7171; EG-NEXT:    Fetch clause starting at 24:
7172; EG-NEXT:     VTX_READ_128 T38.XYZW, T19.X, 48, #1
7173; EG-NEXT:     VTX_READ_128 T39.XYZW, T19.X, 32, #1
7174; EG-NEXT:     VTX_READ_128 T40.XYZW, T19.X, 16, #1
7175; EG-NEXT:    ALU clause starting at 30:
7176; EG-NEXT:     MOV * T19.X, KC0[2].Z,
7177; EG-NEXT:    ALU clause starting at 31:
7178; EG-NEXT:     LSHR T21.X, KC0[2].Y, literal.x,
7179; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7180; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
7181; EG-NEXT:     LSHR T22.X, PV.W, literal.x,
7182; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7183; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
7184; EG-NEXT:     LSHR T23.X, PV.W, literal.x,
7185; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7186; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
7187; EG-NEXT:     LSHR T24.X, PV.W, literal.x,
7188; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7189; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
7190; EG-NEXT:     LSHR T25.X, PV.W, literal.x,
7191; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7192; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
7193; EG-NEXT:     LSHR T26.X, PV.W, literal.x,
7194; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7195; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
7196; EG-NEXT:     LSHR T27.X, PV.W, literal.x,
7197; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7198; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
7199; EG-NEXT:     LSHR T28.X, PV.W, literal.x,
7200; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7201; EG-NEXT:    2(2.802597e-45), 128(1.793662e-43)
7202; EG-NEXT:     LSHR T29.X, PV.W, literal.x,
7203; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7204; EG-NEXT:    2(2.802597e-45), 144(2.017870e-43)
7205; EG-NEXT:     LSHR T30.X, PV.W, literal.x,
7206; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7207; EG-NEXT:    2(2.802597e-45), 160(2.242078e-43)
7208; EG-NEXT:     LSHR T31.X, PV.W, literal.x,
7209; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7210; EG-NEXT:    2(2.802597e-45), 176(2.466285e-43)
7211; EG-NEXT:     LSHR T32.X, PV.W, literal.x,
7212; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7213; EG-NEXT:    2(2.802597e-45), 192(2.690493e-43)
7214; EG-NEXT:     LSHR T33.X, PV.W, literal.x,
7215; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7216; EG-NEXT:    2(2.802597e-45), 208(2.914701e-43)
7217; EG-NEXT:     LSHR T34.X, PV.W, literal.x,
7218; EG-NEXT:     ADD_INT T0.W, KC0[2].Y, literal.y,
7219; EG-NEXT:     ASHR * T35.W, T20.X, literal.z,
7220; EG-NEXT:    2(2.802597e-45), 224(3.138909e-43)
7221; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7222; EG-NEXT:     LSHR T36.X, PV.W, literal.x,
7223; EG-NEXT:     ASHR T35.Z, T20.X, literal.y,
7224; EG-NEXT:     ASHR * T37.W, T20.Y, literal.z,
7225; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
7226; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7227; EG-NEXT:     BFE_INT T35.X, T20.X, 0.0, literal.x,
7228; EG-NEXT:     ASHR * T37.Z, T20.Y, literal.x,
7229; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7230; EG-NEXT:     BFE_INT T37.X, T20.Y, 0.0, literal.x,
7231; EG-NEXT:     ASHR T35.Y, PV.X, literal.y,
7232; EG-NEXT:     ASHR * T19.W, T20.Z, literal.y,
7233; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7234; EG-NEXT:    ALU clause starting at 87:
7235; EG-NEXT:     ASHR T19.Z, T20.Z, literal.x,
7236; EG-NEXT:     ASHR * T41.W, T20.W, literal.y,
7237; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7238; EG-NEXT:     BFE_INT T19.X, T20.Z, 0.0, literal.x,
7239; EG-NEXT:     ASHR T37.Y, T37.X, literal.y,
7240; EG-NEXT:     ASHR T41.Z, T20.W, literal.x,
7241; EG-NEXT:     ASHR * T42.W, T40.X, literal.y, BS:VEC_120/SCL_212
7242; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7243; EG-NEXT:     BFE_INT T41.X, T20.W, 0.0, literal.x,
7244; EG-NEXT:     ASHR T19.Y, PV.X, literal.y,
7245; EG-NEXT:     ASHR T42.Z, T40.X, literal.x,
7246; EG-NEXT:     ASHR * T20.W, T40.Y, literal.y,
7247; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7248; EG-NEXT:     BFE_INT T42.X, T40.X, 0.0, literal.x,
7249; EG-NEXT:     ASHR T41.Y, PV.X, literal.y,
7250; EG-NEXT:     ASHR T20.Z, T40.Y, literal.x,
7251; EG-NEXT:     ASHR * T43.W, T40.Z, literal.y,
7252; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7253; EG-NEXT:     BFE_INT T20.X, T40.Y, 0.0, literal.x,
7254; EG-NEXT:     ASHR T42.Y, PV.X, literal.y,
7255; EG-NEXT:     ASHR T43.Z, T40.Z, literal.x,
7256; EG-NEXT:     ASHR * T44.W, T40.W, literal.y,
7257; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7258; EG-NEXT:     BFE_INT T43.X, T40.Z, 0.0, literal.x,
7259; EG-NEXT:     ASHR T20.Y, PV.X, literal.y,
7260; EG-NEXT:     ASHR T44.Z, T40.W, literal.x,
7261; EG-NEXT:     ASHR * T45.W, T39.X, literal.y,
7262; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7263; EG-NEXT:     BFE_INT T44.X, T40.W, 0.0, literal.x,
7264; EG-NEXT:     ASHR T43.Y, PV.X, literal.y,
7265; EG-NEXT:     ASHR T45.Z, T39.X, literal.x,
7266; EG-NEXT:     ASHR * T40.W, T39.Y, literal.y,
7267; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7268; EG-NEXT:     BFE_INT T45.X, T39.X, 0.0, literal.x,
7269; EG-NEXT:     ASHR T44.Y, PV.X, literal.y,
7270; EG-NEXT:     ASHR T40.Z, T39.Y, literal.x,
7271; EG-NEXT:     ASHR * T46.W, T39.Z, literal.y,
7272; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7273; EG-NEXT:     BFE_INT T40.X, T39.Y, 0.0, literal.x,
7274; EG-NEXT:     ASHR T45.Y, PV.X, literal.y,
7275; EG-NEXT:     ASHR T46.Z, T39.Z, literal.x,
7276; EG-NEXT:     ASHR * T47.W, T39.W, literal.y,
7277; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7278; EG-NEXT:     BFE_INT T46.X, T39.Z, 0.0, literal.x,
7279; EG-NEXT:     ASHR T40.Y, PV.X, literal.y,
7280; EG-NEXT:     ASHR T47.Z, T39.W, literal.x,
7281; EG-NEXT:     ASHR * T48.W, T38.X, literal.y,
7282; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7283; EG-NEXT:     BFE_INT T47.X, T39.W, 0.0, literal.x,
7284; EG-NEXT:     ASHR T46.Y, PV.X, literal.y,
7285; EG-NEXT:     ASHR T48.Z, T38.X, literal.x,
7286; EG-NEXT:     ASHR * T39.W, T38.Y, literal.y,
7287; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7288; EG-NEXT:     BFE_INT T48.X, T38.X, 0.0, literal.x,
7289; EG-NEXT:     ASHR T47.Y, PV.X, literal.y,
7290; EG-NEXT:     ASHR T39.Z, T38.Y, literal.x,
7291; EG-NEXT:     ASHR * T49.W, T38.Z, literal.y,
7292; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7293; EG-NEXT:     BFE_INT T39.X, T38.Y, 0.0, literal.x,
7294; EG-NEXT:     ASHR T48.Y, PV.X, literal.y,
7295; EG-NEXT:     ASHR T49.Z, T38.Z, literal.x,
7296; EG-NEXT:     ASHR * T50.W, T38.W, literal.y,
7297; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7298; EG-NEXT:     BFE_INT T49.X, T38.Z, 0.0, literal.x,
7299; EG-NEXT:     ASHR T39.Y, PV.X, literal.y,
7300; EG-NEXT:     ASHR * T50.Z, T38.W, literal.x,
7301; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7302; EG-NEXT:     BFE_INT T50.X, T38.W, 0.0, literal.x,
7303; EG-NEXT:     ASHR T49.Y, PV.X, literal.y,
7304; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
7305; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7306; EG-NEXT:    240(3.363116e-43), 0(0.000000e+00)
7307; EG-NEXT:     LSHR T38.X, PV.W, literal.x,
7308; EG-NEXT:     ASHR * T50.Y, PV.X, literal.y,
7309; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
7310  %load = load <32 x i16>, <32 x i16> addrspace(4)* %in
7311  %ext = sext <32 x i16> %load to <32 x i64>
7312  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
7313  ret void
7314}
7315
7316; These trigger undefined register machine verifier errors
7317
7318; define amdgpu_kernel void @constant_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
7319;   %load = load <64 x i16>, <64 x i16> addrspace(4)* %in
7320;   %ext = zext <64 x i16> %load to <64 x i64>
7321;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
7322;   ret void
7323; }
7324
7325; define amdgpu_kernel void @constant_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
7326;   %load = load <64 x i16>, <64 x i16> addrspace(4)* %in
7327;   %ext = sext <64 x i16> %load to <64 x i64>
7328;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
7329;   ret void
7330; }
7331
7332attributes #0 = { nounwind }
7333