1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=GCN-NOHSA-SI %s
3; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck --check-prefix=GCN-HSA %s
4; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=GCN-NOHSA-VI %s
5; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck --check-prefix=EG %s
6
7define amdgpu_kernel void @constant_load_i16(i16 addrspace(1)* %out, i16 addrspace(4)* %in) {
8; GCN-NOHSA-SI-LABEL: constant_load_i16:
9; GCN-NOHSA-SI:       ; %bb.0: ; %entry
10; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
11; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
12; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
13; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
14; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
15; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
16; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
17; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
18; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
19; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
20; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
21; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
22; GCN-NOHSA-SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
23; GCN-NOHSA-SI-NEXT:    s_endpgm
24;
25; GCN-HSA-LABEL: constant_load_i16:
26; GCN-HSA:       ; %bb.0: ; %entry
27; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
28; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
29; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
30; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
31; GCN-HSA-NEXT:    flat_load_ushort v2, v[0:1]
32; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
33; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
34; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
35; GCN-HSA-NEXT:    flat_store_short v[0:1], v2
36; GCN-HSA-NEXT:    s_endpgm
37;
38; GCN-NOHSA-VI-LABEL: constant_load_i16:
39; GCN-NOHSA-VI:       ; %bb.0: ; %entry
40; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
41; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
42; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
43; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
44; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
45; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
46; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
47; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
48; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
49; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
50; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
51; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
52; GCN-NOHSA-VI-NEXT:    buffer_store_short v0, off, s[4:7], 0
53; GCN-NOHSA-VI-NEXT:    s_endpgm
54;
55; EG-LABEL: constant_load_i16:
56; EG:       ; %bb.0: ; %entry
57; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
58; EG-NEXT:    TEX 0 @6
59; EG-NEXT:    ALU 11, @9, KC0[CB0:0-32], KC1[]
60; EG-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
61; EG-NEXT:    CF_END
62; EG-NEXT:    PAD
63; EG-NEXT:    Fetch clause starting at 6:
64; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
65; EG-NEXT:    ALU clause starting at 8:
66; EG-NEXT:     MOV * T0.X, KC0[2].Z,
67; EG-NEXT:    ALU clause starting at 9:
68; EG-NEXT:     AND_INT T0.W, KC0[2].Y, literal.x,
69; EG-NEXT:     AND_INT * T1.W, T0.X, literal.y,
70; EG-NEXT:    3(4.203895e-45), 65535(9.183409e-41)
71; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
72; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
73; EG-NEXT:     LSHL T0.X, T1.W, PV.W,
74; EG-NEXT:     LSHL * T0.W, literal.x, PV.W,
75; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
76; EG-NEXT:     MOV T0.Y, 0.0,
77; EG-NEXT:     MOV * T0.Z, 0.0,
78; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
79; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
80entry:
81  %ld = load i16, i16 addrspace(4)* %in
82  store i16 %ld, i16 addrspace(1)* %out
83  ret void
84}
85
86define amdgpu_kernel void @constant_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) {
87; GCN-NOHSA-SI-LABEL: constant_load_v2i16:
88; GCN-NOHSA-SI:       ; %bb.0: ; %entry
89; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
90; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
91; GCN-NOHSA-SI-NEXT:    s_load_dword s4, s[2:3], 0x0
92; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
93; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
94; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
95; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
96; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
97; GCN-NOHSA-SI-NEXT:    s_endpgm
98;
99; GCN-HSA-LABEL: constant_load_v2i16:
100; GCN-HSA:       ; %bb.0: ; %entry
101; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
102; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
103; GCN-HSA-NEXT:    s_load_dword s2, s[2:3], 0x0
104; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
105; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
106; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
107; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
108; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
109; GCN-HSA-NEXT:    s_endpgm
110;
111; GCN-NOHSA-VI-LABEL: constant_load_v2i16:
112; GCN-NOHSA-VI:       ; %bb.0: ; %entry
113; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
114; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
115; GCN-NOHSA-VI-NEXT:    s_load_dword s4, s[2:3], 0x0
116; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
117; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
118; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
119; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
120; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
121; GCN-NOHSA-VI-NEXT:    s_endpgm
122;
123; EG-LABEL: constant_load_v2i16:
124; EG:       ; %bb.0: ; %entry
125; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
126; EG-NEXT:    TEX 0 @6
127; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
128; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
129; EG-NEXT:    CF_END
130; EG-NEXT:    PAD
131; EG-NEXT:    Fetch clause starting at 6:
132; EG-NEXT:     VTX_READ_32 T0.X, T0.X, 0, #1
133; EG-NEXT:    ALU clause starting at 8:
134; EG-NEXT:     MOV * T0.X, KC0[2].Z,
135; EG-NEXT:    ALU clause starting at 9:
136; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
137; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
138entry:
139  %ld = load <2 x i16>, <2 x i16> addrspace(4)* %in
140  store <2 x i16> %ld, <2 x i16> addrspace(1)* %out
141  ret void
142}
143
144define amdgpu_kernel void @constant_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) {
145; GCN-NOHSA-SI-LABEL: constant_load_v3i16:
146; GCN-NOHSA-SI:       ; %bb.0: ; %entry
147; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
148; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
149; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
150; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
151; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
152; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
153; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
154; GCN-NOHSA-SI-NEXT:    buffer_store_short v0, off, s[0:3], 0 offset:4
155; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
156; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
157; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
158; GCN-NOHSA-SI-NEXT:    s_endpgm
159;
160; GCN-HSA-LABEL: constant_load_v3i16:
161; GCN-HSA:       ; %bb.0: ; %entry
162; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
163; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
164; GCN-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
165; GCN-HSA-NEXT:    s_add_u32 s4, s0, 4
166; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
167; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s4
168; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
169; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s5
170; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
171; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s3
172; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
173; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s2
174; GCN-HSA-NEXT:    flat_store_short v[2:3], v4
175; GCN-HSA-NEXT:    flat_store_dword v[0:1], v5
176; GCN-HSA-NEXT:    s_endpgm
177;
178; GCN-NOHSA-VI-LABEL: constant_load_v3i16:
179; GCN-NOHSA-VI:       ; %bb.0: ; %entry
180; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
181; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
182; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
183; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
184; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
185; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
186; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s5
187; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s4
188; GCN-NOHSA-VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 offset:4
189; GCN-NOHSA-VI-NEXT:    buffer_store_dword v1, off, s[0:3], 0
190; GCN-NOHSA-VI-NEXT:    s_endpgm
191;
192; EG-LABEL: constant_load_v3i16:
193; EG:       ; %bb.0: ; %entry
194; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
195; EG-NEXT:    TEX 2 @6
196; EG-NEXT:    ALU 19, @13, KC0[CB0:0-32], KC1[]
197; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0
198; EG-NEXT:    MEM_RAT MSKOR T5.XW, T8.X
199; EG-NEXT:    CF_END
200; EG-NEXT:    Fetch clause starting at 6:
201; EG-NEXT:     VTX_READ_16 T6.X, T5.X, 0, #1
202; EG-NEXT:     VTX_READ_16 T7.X, T5.X, 2, #1
203; EG-NEXT:     VTX_READ_16 T5.X, T5.X, 4, #1
204; EG-NEXT:    ALU clause starting at 12:
205; EG-NEXT:     MOV * T5.X, KC0[2].Z,
206; EG-NEXT:    ALU clause starting at 13:
207; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
208; EG-NEXT:    4(5.605194e-45), 0(0.000000e+00)
209; EG-NEXT:     AND_INT T1.W, PV.W, literal.x,
210; EG-NEXT:     AND_INT * T2.W, T5.X, literal.y,
211; EG-NEXT:    3(4.203895e-45), 65535(9.183409e-41)
212; EG-NEXT:     LSHL * T1.W, PV.W, literal.x,
213; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
214; EG-NEXT:     LSHL T5.X, T2.W, PV.W,
215; EG-NEXT:     LSHL * T5.W, literal.x, PV.W,
216; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
217; EG-NEXT:     MOV T5.Y, 0.0,
218; EG-NEXT:     MOV * T5.Z, 0.0,
219; EG-NEXT:     LSHR T8.X, T0.W, literal.x,
220; EG-NEXT:     LSHL T0.W, T7.X, literal.y,
221; EG-NEXT:     AND_INT * T1.W, T6.X, literal.z,
222; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
223; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
224; EG-NEXT:     OR_INT T6.X, PV.W, PS,
225; EG-NEXT:     LSHR * T7.X, KC0[2].Y, literal.x,
226; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
227entry:
228  %ld = load <3 x i16>, <3 x i16> addrspace(4)* %in
229  store <3 x i16> %ld, <3 x i16> addrspace(1)* %out
230  ret void
231}
232
233define amdgpu_kernel void @constant_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) {
234; GCN-NOHSA-SI-LABEL: constant_load_v4i16:
235; GCN-NOHSA-SI:       ; %bb.0: ; %entry
236; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
237; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
238; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
239; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
240; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
241; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
242; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
243; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s5
244; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
245; GCN-NOHSA-SI-NEXT:    s_endpgm
246;
247; GCN-HSA-LABEL: constant_load_v4i16:
248; GCN-HSA:       ; %bb.0: ; %entry
249; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
250; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
251; GCN-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
252; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
253; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
254; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
255; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
256; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
257; GCN-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
258; GCN-HSA-NEXT:    s_endpgm
259;
260; GCN-NOHSA-VI-LABEL: constant_load_v4i16:
261; GCN-NOHSA-VI:       ; %bb.0: ; %entry
262; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
263; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
264; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
265; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
266; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
267; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
268; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
269; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s5
270; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
271; GCN-NOHSA-VI-NEXT:    s_endpgm
272;
273; EG-LABEL: constant_load_v4i16:
274; EG:       ; %bb.0: ; %entry
275; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
276; EG-NEXT:    TEX 0 @6
277; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
278; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
279; EG-NEXT:    CF_END
280; EG-NEXT:    PAD
281; EG-NEXT:    Fetch clause starting at 6:
282; EG-NEXT:     VTX_READ_64 T0.XY, T0.X, 0, #1
283; EG-NEXT:    ALU clause starting at 8:
284; EG-NEXT:     MOV * T0.X, KC0[2].Z,
285; EG-NEXT:    ALU clause starting at 9:
286; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
287; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
288entry:
289  %ld = load <4 x i16>, <4 x i16> addrspace(4)* %in
290  store <4 x i16> %ld, <4 x i16> addrspace(1)* %out
291  ret void
292}
293
294define amdgpu_kernel void @constant_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) {
295; GCN-NOHSA-SI-LABEL: constant_load_v8i16:
296; GCN-NOHSA-SI:       ; %bb.0: ; %entry
297; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
298; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
299; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
300; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
301; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
302; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
303; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
304; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s5
305; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s6
306; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s7
307; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
308; GCN-NOHSA-SI-NEXT:    s_endpgm
309;
310; GCN-HSA-LABEL: constant_load_v8i16:
311; GCN-HSA:       ; %bb.0: ; %entry
312; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
313; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
314; GCN-HSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
315; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
316; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
317; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
318; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
319; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
320; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s6
321; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s7
322; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
323; GCN-HSA-NEXT:    s_endpgm
324;
325; GCN-NOHSA-VI-LABEL: constant_load_v8i16:
326; GCN-NOHSA-VI:       ; %bb.0: ; %entry
327; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
328; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
329; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
330; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
331; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
332; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
333; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
334; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s5
335; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s6
336; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s7
337; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
338; GCN-NOHSA-VI-NEXT:    s_endpgm
339;
340; EG-LABEL: constant_load_v8i16:
341; EG:       ; %bb.0: ; %entry
342; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
343; EG-NEXT:    TEX 0 @6
344; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
345; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
346; EG-NEXT:    CF_END
347; EG-NEXT:    PAD
348; EG-NEXT:    Fetch clause starting at 6:
349; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
350; EG-NEXT:    ALU clause starting at 8:
351; EG-NEXT:     MOV * T0.X, KC0[2].Z,
352; EG-NEXT:    ALU clause starting at 9:
353; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
354; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
355entry:
356  %ld = load <8 x i16>, <8 x i16> addrspace(4)* %in
357  store <8 x i16> %ld, <8 x i16> addrspace(1)* %out
358  ret void
359}
360
361define amdgpu_kernel void @constant_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) {
362; GCN-NOHSA-SI-LABEL: constant_load_v16i16:
363; GCN-NOHSA-SI:       ; %bb.0: ; %entry
364; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x9
365; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
366; GCN-NOHSA-SI-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
367; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, 0xf000
368; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, -1
369; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
370; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
371; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s5
372; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s6
373; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s7
374; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16
375; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
376; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s0
377; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s1
378; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s2
379; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s3
380; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
381; GCN-NOHSA-SI-NEXT:    s_endpgm
382;
383; GCN-HSA-LABEL: constant_load_v16i16:
384; GCN-HSA:       ; %bb.0: ; %entry
385; GCN-HSA-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x0
386; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
387; GCN-HSA-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
388; GCN-HSA-NEXT:    s_add_u32 s10, s8, 16
389; GCN-HSA-NEXT:    s_addc_u32 s11, s9, 0
390; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s10
391; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s11
392; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
393; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
394; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
395; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s6
396; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s7
397; GCN-HSA-NEXT:    flat_store_dwordx4 v[6:7], v[0:3]
398; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
399; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
400; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
401; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s2
402; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s3
403; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s9
404; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
405; GCN-HSA-NEXT:    s_endpgm
406;
407; GCN-NOHSA-VI-LABEL: constant_load_v16i16:
408; GCN-NOHSA-VI:       ; %bb.0: ; %entry
409; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x24
410; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
411; GCN-NOHSA-VI-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
412; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, 0xf000
413; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, -1
414; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
415; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
416; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s5
417; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s6
418; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s7
419; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
420; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
421; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v6, s2
422; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v7, s3
423; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16
424; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[8:11], 0
425; GCN-NOHSA-VI-NEXT:    s_endpgm
426;
427; EG-LABEL: constant_load_v16i16:
428; EG:       ; %bb.0: ; %entry
429; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
430; EG-NEXT:    TEX 0 @8
431; EG-NEXT:    ALU 3, @13, KC0[CB0:0-32], KC1[]
432; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0
433; EG-NEXT:    ALU 1, @17, KC0[CB0:0-32], KC1[]
434; EG-NEXT:    TEX 0 @10
435; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
436; EG-NEXT:    CF_END
437; EG-NEXT:    Fetch clause starting at 8:
438; EG-NEXT:     VTX_READ_128 T1.XYZW, T0.X, 16, #1
439; EG-NEXT:    Fetch clause starting at 10:
440; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
441; EG-NEXT:    ALU clause starting at 12:
442; EG-NEXT:     MOV * T0.X, KC0[2].Z,
443; EG-NEXT:    ALU clause starting at 13:
444; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
445; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
446; EG-NEXT:     LSHR * T2.X, PV.W, literal.x,
447; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
448; EG-NEXT:    ALU clause starting at 17:
449; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
450; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
451entry:
452  %ld = load <16 x i16>, <16 x i16> addrspace(4)* %in
453  store <16 x i16> %ld, <16 x i16> addrspace(1)* %out
454  ret void
455}
456
457define amdgpu_kernel void @constant_load_v16i16_align2(<16 x i16> addrspace(4)* %ptr0) #0 {
458; GCN-NOHSA-SI-LABEL: constant_load_v16i16_align2:
459; GCN-NOHSA-SI:       ; %bb.0: ; %entry
460; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
461; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
462; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
463; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
464; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[0:3], 0
465; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v1, off, s[0:3], 0 offset:2
466; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v4, off, s[0:3], 0 offset:4
467; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v2, off, s[0:3], 0 offset:6
468; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v5, off, s[0:3], 0 offset:8
469; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v3, off, s[0:3], 0 offset:10
470; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v6, off, s[0:3], 0 offset:12
471; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v7, off, s[0:3], 0 offset:14
472; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v8, off, s[0:3], 0 offset:16
473; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v9, off, s[0:3], 0 offset:18
474; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v10, off, s[0:3], 0 offset:20
475; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v11, off, s[0:3], 0 offset:22
476; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v12, off, s[0:3], 0 offset:24
477; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v13, off, s[0:3], 0 offset:26
478; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v14, off, s[0:3], 0 offset:28
479; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v15, off, s[0:3], 0 offset:30
480; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(8)
481; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
482; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v16, 16, v3
483; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v17, 16, v2
484; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v18, 16, v1
485; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
486; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v15, 16, v15
487; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v13, 16, v13
488; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v11, 16, v11
489; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v9, 16, v9
490; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v3, v7, v6
491; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v2, v16, v5
492; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v1, v17, v4
493; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v0, v18, v0
494; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v7, v15, v14
495; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v6, v13, v12
496; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v5, v11, v10
497; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v4, v9, v8
498; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
499; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
500; GCN-NOHSA-SI-NEXT:    s_endpgm
501;
502; GCN-HSA-LABEL: constant_load_v16i16_align2:
503; GCN-HSA:       ; %bb.0: ; %entry
504; GCN-HSA-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
505; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
506; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
507; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
508; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
509; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
510; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
511; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
512; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
513; GCN-HSA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
514; GCN-HSA-NEXT:    s_waitcnt vmcnt(1)
515; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[0:3]
516; GCN-HSA-NEXT:    s_waitcnt vmcnt(1)
517; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
518; GCN-HSA-NEXT:    s_endpgm
519;
520; GCN-NOHSA-VI-LABEL: constant_load_v16i16_align2:
521; GCN-NOHSA-VI:       ; %bb.0: ; %entry
522; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
523; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
524; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
525; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
526; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[0:3], 0 offset:14
527; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v1, off, s[0:3], 0 offset:10
528; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v2, off, s[0:3], 0 offset:6
529; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v3, off, s[0:3], 0 offset:2
530; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v4, off, s[0:3], 0 offset:30
531; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v5, off, s[0:3], 0 offset:26
532; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v6, off, s[0:3], 0 offset:22
533; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v7, off, s[0:3], 0 offset:18
534; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v8, off, s[0:3], 0 offset:12
535; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v9, off, s[0:3], 0 offset:8
536; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v10, off, s[0:3], 0 offset:4
537; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v11, off, s[0:3], 0
538; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v12, off, s[0:3], 0 offset:28
539; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v13, off, s[0:3], 0 offset:24
540; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v14, off, s[0:3], 0 offset:20
541; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v15, off, s[0:3], 0 offset:16
542; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(14)
543; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
544; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
545; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(13)
546; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v16, 16, v2
547; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(12)
548; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v17, 16, v3
549; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(11)
550; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
551; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(10)
552; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
553; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(9)
554; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v18, 16, v6
555; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(8)
556; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v19, 16, v7
557; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(7)
558; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v3, v8, v0
559; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(6)
560; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v2, v9, v1
561; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(5)
562; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v1, v10, v16
563; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(4)
564; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v0, v11, v17
565; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(3)
566; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v7, v12, v4
567; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(2)
568; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v6, v13, v5
569; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(1)
570; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v5, v14, v18
571; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
572; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v4, v15, v19
573; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
574; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
575; GCN-NOHSA-VI-NEXT:    s_endpgm
576;
577; EG-LABEL: constant_load_v16i16_align2:
578; EG:       ; %bb.0: ; %entry
579; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
580; EG-NEXT:    TEX 0 @8
581; EG-NEXT:    ALU 1, @13, KC0[], KC1[]
582; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0
583; EG-NEXT:    TEX 0 @10
584; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T2.X, 1
585; EG-NEXT:    CF_END
586; EG-NEXT:    PAD
587; EG-NEXT:    Fetch clause starting at 8:
588; EG-NEXT:     VTX_READ_128 T1.XYZW, T0.X, 16, #1
589; EG-NEXT:    Fetch clause starting at 10:
590; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
591; EG-NEXT:    ALU clause starting at 12:
592; EG-NEXT:     MOV * T0.X, KC0[2].Y,
593; EG-NEXT:    ALU clause starting at 13:
594; EG-NEXT:     MOV * T2.X, literal.x,
595; EG-NEXT:    0(0.000000e+00), 0(0.000000e+00)
596entry:
597  %ld =  load <16 x i16>, <16 x i16> addrspace(4)* %ptr0, align 2
598  store <16 x i16> %ld, <16 x i16> addrspace(1)* undef, align 32
599  ret void
600}
601
602define amdgpu_kernel void @constant_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
603; GCN-NOHSA-SI-LABEL: constant_zextload_i16_to_i32:
604; GCN-NOHSA-SI:       ; %bb.0:
605; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
606; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
607; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
608; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
609; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
610; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
611; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
612; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
613; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
614; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
615; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
616; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
617; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
618; GCN-NOHSA-SI-NEXT:    s_endpgm
619;
620; GCN-HSA-LABEL: constant_zextload_i16_to_i32:
621; GCN-HSA:       ; %bb.0:
622; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
623; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
624; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
625; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
626; GCN-HSA-NEXT:    flat_load_ushort v2, v[0:1]
627; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
628; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
629; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
630; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
631; GCN-HSA-NEXT:    s_endpgm
632;
633; GCN-NOHSA-VI-LABEL: constant_zextload_i16_to_i32:
634; GCN-NOHSA-VI:       ; %bb.0:
635; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
636; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
637; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
638; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
639; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
640; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
641; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
642; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
643; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
644; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
645; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
646; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
647; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
648; GCN-NOHSA-VI-NEXT:    s_endpgm
649;
650; EG-LABEL: constant_zextload_i16_to_i32:
651; EG:       ; %bb.0:
652; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
653; EG-NEXT:    TEX 0 @6
654; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
655; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
656; EG-NEXT:    CF_END
657; EG-NEXT:    PAD
658; EG-NEXT:    Fetch clause starting at 6:
659; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
660; EG-NEXT:    ALU clause starting at 8:
661; EG-NEXT:     MOV * T0.X, KC0[2].Z,
662; EG-NEXT:    ALU clause starting at 9:
663; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
664; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
665  %a = load i16, i16 addrspace(4)* %in
666  %ext = zext i16 %a to i32
667  store i32 %ext, i32 addrspace(1)* %out
668  ret void
669}
670
671define amdgpu_kernel void @constant_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
672; GCN-NOHSA-SI-LABEL: constant_sextload_i16_to_i32:
673; GCN-NOHSA-SI:       ; %bb.0:
674; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
675; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
676; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
677; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
678; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
679; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
680; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
681; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
682; GCN-NOHSA-SI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
683; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
684; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
685; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
686; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
687; GCN-NOHSA-SI-NEXT:    s_endpgm
688;
689; GCN-HSA-LABEL: constant_sextload_i16_to_i32:
690; GCN-HSA:       ; %bb.0:
691; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
692; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
693; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
694; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
695; GCN-HSA-NEXT:    flat_load_sshort v2, v[0:1]
696; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
697; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
698; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
699; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
700; GCN-HSA-NEXT:    s_endpgm
701;
702; GCN-NOHSA-VI-LABEL: constant_sextload_i16_to_i32:
703; GCN-NOHSA-VI:       ; %bb.0:
704; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
705; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
706; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
707; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
708; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
709; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
710; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
711; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
712; GCN-NOHSA-VI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
713; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
714; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
715; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
716; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
717; GCN-NOHSA-VI-NEXT:    s_endpgm
718;
719; EG-LABEL: constant_sextload_i16_to_i32:
720; EG:       ; %bb.0:
721; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
722; EG-NEXT:    TEX 0 @6
723; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
724; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
725; EG-NEXT:    CF_END
726; EG-NEXT:    PAD
727; EG-NEXT:    Fetch clause starting at 6:
728; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
729; EG-NEXT:    ALU clause starting at 8:
730; EG-NEXT:     MOV * T0.X, KC0[2].Z,
731; EG-NEXT:    ALU clause starting at 9:
732; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
733; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
734; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
735  %a = load i16, i16 addrspace(4)* %in
736  %ext = sext i16 %a to i32
737  store i32 %ext, i32 addrspace(1)* %out
738  ret void
739}
740
741define amdgpu_kernel void @constant_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
742; GCN-NOHSA-SI-LABEL: constant_zextload_v1i16_to_v1i32:
743; GCN-NOHSA-SI:       ; %bb.0:
744; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
745; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
746; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
747; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
748; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
749; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
750; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
751; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
752; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
753; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
754; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
755; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
756; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
757; GCN-NOHSA-SI-NEXT:    s_endpgm
758;
759; GCN-HSA-LABEL: constant_zextload_v1i16_to_v1i32:
760; GCN-HSA:       ; %bb.0:
761; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
762; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
763; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
764; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
765; GCN-HSA-NEXT:    flat_load_ushort v2, v[0:1]
766; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
767; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
768; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
769; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
770; GCN-HSA-NEXT:    s_endpgm
771;
772; GCN-NOHSA-VI-LABEL: constant_zextload_v1i16_to_v1i32:
773; GCN-NOHSA-VI:       ; %bb.0:
774; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
775; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
776; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
777; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
778; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
779; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
780; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
781; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
782; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
783; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
784; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
785; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
786; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
787; GCN-NOHSA-VI-NEXT:    s_endpgm
788;
789; EG-LABEL: constant_zextload_v1i16_to_v1i32:
790; EG:       ; %bb.0:
791; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
792; EG-NEXT:    TEX 0 @6
793; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
794; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
795; EG-NEXT:    CF_END
796; EG-NEXT:    PAD
797; EG-NEXT:    Fetch clause starting at 6:
798; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
799; EG-NEXT:    ALU clause starting at 8:
800; EG-NEXT:     MOV * T0.X, KC0[2].Z,
801; EG-NEXT:    ALU clause starting at 9:
802; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
803; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
804  %load = load <1 x i16>, <1 x i16> addrspace(4)* %in
805  %ext = zext <1 x i16> %load to <1 x i32>
806  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
807  ret void
808}
809
810define amdgpu_kernel void @constant_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
811; GCN-NOHSA-SI-LABEL: constant_sextload_v1i16_to_v1i32:
812; GCN-NOHSA-SI:       ; %bb.0:
813; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
814; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
815; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
816; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
817; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
818; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
819; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
820; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
821; GCN-NOHSA-SI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
822; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
823; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
824; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
825; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
826; GCN-NOHSA-SI-NEXT:    s_endpgm
827;
828; GCN-HSA-LABEL: constant_sextload_v1i16_to_v1i32:
829; GCN-HSA:       ; %bb.0:
830; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
831; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
832; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
833; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
834; GCN-HSA-NEXT:    flat_load_sshort v2, v[0:1]
835; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
836; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
837; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
838; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
839; GCN-HSA-NEXT:    s_endpgm
840;
841; GCN-NOHSA-VI-LABEL: constant_sextload_v1i16_to_v1i32:
842; GCN-NOHSA-VI:       ; %bb.0:
843; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
844; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
845; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
846; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
847; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
848; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
849; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
850; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
851; GCN-NOHSA-VI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
852; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
853; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
854; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
855; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
856; GCN-NOHSA-VI-NEXT:    s_endpgm
857;
858; EG-LABEL: constant_sextload_v1i16_to_v1i32:
859; EG:       ; %bb.0:
860; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
861; EG-NEXT:    TEX 0 @6
862; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
863; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
864; EG-NEXT:    CF_END
865; EG-NEXT:    PAD
866; EG-NEXT:    Fetch clause starting at 6:
867; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
868; EG-NEXT:    ALU clause starting at 8:
869; EG-NEXT:     MOV * T0.X, KC0[2].Z,
870; EG-NEXT:    ALU clause starting at 9:
871; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
872; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
873; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
874  %load = load <1 x i16>, <1 x i16> addrspace(4)* %in
875  %ext = sext <1 x i16> %load to <1 x i32>
876  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
877  ret void
878}
879
880define amdgpu_kernel void @constant_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
881; GCN-NOHSA-SI-LABEL: constant_zextload_v2i16_to_v2i32:
882; GCN-NOHSA-SI:       ; %bb.0:
883; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
884; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
885; GCN-NOHSA-SI-NEXT:    s_load_dword s2, s[2:3], 0x0
886; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
887; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
888; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s4, s2, 16
889; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s2, 0xffff
890; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
891; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
892; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s4
893; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
894; GCN-NOHSA-SI-NEXT:    s_endpgm
895;
896; GCN-HSA-LABEL: constant_zextload_v2i16_to_v2i32:
897; GCN-HSA:       ; %bb.0:
898; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
899; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
900; GCN-HSA-NEXT:    s_load_dword s2, s[2:3], 0x0
901; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
902; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
903; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
904; GCN-HSA-NEXT:    s_lshr_b32 s0, s2, 16
905; GCN-HSA-NEXT:    s_and_b32 s1, s2, 0xffff
906; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s1
907; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s0
908; GCN-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
909; GCN-HSA-NEXT:    s_endpgm
910;
911; GCN-NOHSA-VI-LABEL: constant_zextload_v2i16_to_v2i32:
912; GCN-NOHSA-VI:       ; %bb.0:
913; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
914; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
915; GCN-NOHSA-VI-NEXT:    s_load_dword s4, s[2:3], 0x0
916; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
917; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
918; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
919; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s5, s4, 16
920; GCN-NOHSA-VI-NEXT:    s_and_b32 s4, s4, 0xffff
921; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
922; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s5
923; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
924; GCN-NOHSA-VI-NEXT:    s_endpgm
925;
926; EG-LABEL: constant_zextload_v2i16_to_v2i32:
927; EG:       ; %bb.0:
928; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
929; EG-NEXT:    TEX 0 @6
930; EG-NEXT:    ALU 4, @9, KC0[CB0:0-32], KC1[]
931; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XY, T5.X, 1
932; EG-NEXT:    CF_END
933; EG-NEXT:    PAD
934; EG-NEXT:    Fetch clause starting at 6:
935; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
936; EG-NEXT:    ALU clause starting at 8:
937; EG-NEXT:     MOV * T4.X, KC0[2].Z,
938; EG-NEXT:    ALU clause starting at 9:
939; EG-NEXT:     LSHR * T4.Y, T4.X, literal.x,
940; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
941; EG-NEXT:     AND_INT T4.X, T4.X, literal.x,
942; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
943; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
944  %load = load <2 x i16>, <2 x i16> addrspace(4)* %in
945  %ext = zext <2 x i16> %load to <2 x i32>
946  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
947  ret void
948}
949
950; TODO: We should use ASHR instead of LSHR + BFE
951define amdgpu_kernel void @constant_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
952; GCN-NOHSA-SI-LABEL: constant_sextload_v2i16_to_v2i32:
953; GCN-NOHSA-SI:       ; %bb.0:
954; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
955; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
956; GCN-NOHSA-SI-NEXT:    s_load_dword s2, s[2:3], 0x0
957; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
958; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
959; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s4, s2, 16
960; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s2
961; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
962; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
963; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s4
964; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
965; GCN-NOHSA-SI-NEXT:    s_endpgm
966;
967; GCN-HSA-LABEL: constant_sextload_v2i16_to_v2i32:
968; GCN-HSA:       ; %bb.0:
969; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
970; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
971; GCN-HSA-NEXT:    s_load_dword s2, s[2:3], 0x0
972; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
973; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
974; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
975; GCN-HSA-NEXT:    s_ashr_i32 s0, s2, 16
976; GCN-HSA-NEXT:    s_sext_i32_i16 s1, s2
977; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s1
978; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s0
979; GCN-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
980; GCN-HSA-NEXT:    s_endpgm
981;
982; GCN-NOHSA-VI-LABEL: constant_sextload_v2i16_to_v2i32:
983; GCN-NOHSA-VI:       ; %bb.0:
984; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
985; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
986; GCN-NOHSA-VI-NEXT:    s_load_dword s4, s[2:3], 0x0
987; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
988; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
989; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
990; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s5, s4, 16
991; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s4, s4
992; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
993; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s5
994; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
995; GCN-NOHSA-VI-NEXT:    s_endpgm
996;
997; EG-LABEL: constant_sextload_v2i16_to_v2i32:
998; EG:       ; %bb.0:
999; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1000; EG-NEXT:    TEX 0 @6
1001; EG-NEXT:    ALU 5, @9, KC0[CB0:0-32], KC1[]
1002; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XY, T4.X, 1
1003; EG-NEXT:    CF_END
1004; EG-NEXT:    PAD
1005; EG-NEXT:    Fetch clause starting at 6:
1006; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
1007; EG-NEXT:    ALU clause starting at 8:
1008; EG-NEXT:     MOV * T4.X, KC0[2].Z,
1009; EG-NEXT:    ALU clause starting at 9:
1010; EG-NEXT:     BFE_INT T5.X, T4.X, 0.0, literal.x,
1011; EG-NEXT:     LSHR T0.W, T4.X, literal.x,
1012; EG-NEXT:     LSHR * T4.X, KC0[2].Y, literal.y,
1013; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
1014; EG-NEXT:     BFE_INT * T5.Y, PV.W, 0.0, literal.x,
1015; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1016  %load = load <2 x i16>, <2 x i16> addrspace(4)* %in
1017  %ext = sext <2 x i16> %load to <2 x i32>
1018  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
1019  ret void
1020}
1021
1022define amdgpu_kernel void @constant_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) {
1023; GCN-NOHSA-SI-LABEL: constant_zextload_v3i16_to_v3i32:
1024; GCN-NOHSA-SI:       ; %bb.0: ; %entry
1025; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1026; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1027; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
1028; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1029; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1030; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1031; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s6, s4, 16
1032; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, 0xffff
1033; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, 0xffff
1034; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
1035; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:8
1036; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1037; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1038; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s6
1039; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1040; GCN-NOHSA-SI-NEXT:    s_endpgm
1041;
1042; GCN-HSA-LABEL: constant_zextload_v3i16_to_v3i32:
1043; GCN-HSA:       ; %bb.0: ; %entry
1044; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1045; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1046; GCN-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
1047; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s0
1048; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s1
1049; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1050; GCN-HSA-NEXT:    s_lshr_b32 s0, s2, 16
1051; GCN-HSA-NEXT:    s_and_b32 s1, s3, 0xffff
1052; GCN-HSA-NEXT:    s_and_b32 s2, s2, 0xffff
1053; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1054; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s0
1055; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s1
1056; GCN-HSA-NEXT:    flat_store_dwordx3 v[3:4], v[0:2]
1057; GCN-HSA-NEXT:    s_endpgm
1058;
1059; GCN-NOHSA-VI-LABEL: constant_zextload_v3i16_to_v3i32:
1060; GCN-NOHSA-VI:       ; %bb.0: ; %entry
1061; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
1062; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1063; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
1064; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1065; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1066; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1067; GCN-NOHSA-VI-NEXT:    s_and_b32 s5, s5, 0xffff
1068; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s6, s4, 16
1069; GCN-NOHSA-VI-NEXT:    s_and_b32 s4, s4, 0xffff
1070; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
1071; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s6
1072; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
1073; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
1074; GCN-NOHSA-VI-NEXT:    s_endpgm
1075;
1076; EG-LABEL: constant_zextload_v3i16_to_v3i32:
1077; EG:       ; %bb.0: ; %entry
1078; EG-NEXT:    ALU 4, @12, KC0[CB0:0-32], KC1[]
1079; EG-NEXT:    TEX 2 @6
1080; EG-NEXT:    ALU 2, @17, KC0[], KC1[]
1081; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T2.X, T4.X, 0
1082; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T3.XY, T0.X, 1
1083; EG-NEXT:    CF_END
1084; EG-NEXT:    Fetch clause starting at 6:
1085; EG-NEXT:     VTX_READ_16 T2.X, T1.X, 4, #1
1086; EG-NEXT:     VTX_READ_16 T3.X, T1.X, 0, #1
1087; EG-NEXT:     VTX_READ_16 T1.X, T1.X, 2, #1
1088; EG-NEXT:    ALU clause starting at 12:
1089; EG-NEXT:     LSHR T0.X, KC0[2].Y, literal.x,
1090; EG-NEXT:     MOV * T1.X, KC0[2].Z,
1091; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1092; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
1093; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
1094; EG-NEXT:    ALU clause starting at 17:
1095; EG-NEXT:     LSHR T4.X, T0.W, literal.x,
1096; EG-NEXT:     MOV * T3.Y, T1.X,
1097; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1098entry:
1099  %ld = load <3 x i16>, <3 x i16> addrspace(4)* %in
1100  %ext = zext <3 x i16> %ld to <3 x i32>
1101  store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
1102  ret void
1103}
1104
1105define amdgpu_kernel void @constant_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) {
1106; GCN-NOHSA-SI-LABEL: constant_sextload_v3i16_to_v3i32:
1107; GCN-NOHSA-SI:       ; %bb.0: ; %entry
1108; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1109; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1110; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
1111; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1112; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1113; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1114; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s6, s4, 16
1115; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s5
1116; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s4, s4
1117; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
1118; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:8
1119; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1120; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1121; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s6
1122; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1123; GCN-NOHSA-SI-NEXT:    s_endpgm
1124;
1125; GCN-HSA-LABEL: constant_sextload_v3i16_to_v3i32:
1126; GCN-HSA:       ; %bb.0: ; %entry
1127; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1128; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1129; GCN-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
1130; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s0
1131; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s1
1132; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1133; GCN-HSA-NEXT:    s_ashr_i32 s0, s2, 16
1134; GCN-HSA-NEXT:    s_sext_i32_i16 s1, s3
1135; GCN-HSA-NEXT:    s_sext_i32_i16 s2, s2
1136; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1137; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s0
1138; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s1
1139; GCN-HSA-NEXT:    flat_store_dwordx3 v[3:4], v[0:2]
1140; GCN-HSA-NEXT:    s_endpgm
1141;
1142; GCN-NOHSA-VI-LABEL: constant_sextload_v3i16_to_v3i32:
1143; GCN-NOHSA-VI:       ; %bb.0: ; %entry
1144; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
1145; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1146; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
1147; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1148; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1149; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1150; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s6, s4, 16
1151; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s5, s5
1152; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s4, s4
1153; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
1154; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s6
1155; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
1156; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
1157; GCN-NOHSA-VI-NEXT:    s_endpgm
1158;
1159; EG-LABEL: constant_sextload_v3i16_to_v3i32:
1160; EG:       ; %bb.0: ; %entry
1161; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
1162; EG-NEXT:    TEX 2 @6
1163; EG-NEXT:    ALU 9, @13, KC0[CB0:0-32], KC1[]
1164; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0
1165; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
1166; EG-NEXT:    CF_END
1167; EG-NEXT:    Fetch clause starting at 6:
1168; EG-NEXT:     VTX_READ_16 T1.X, T0.X, 2, #1
1169; EG-NEXT:     VTX_READ_16 T2.X, T0.X, 4, #1
1170; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
1171; EG-NEXT:    ALU clause starting at 12:
1172; EG-NEXT:     MOV * T0.X, KC0[2].Z,
1173; EG-NEXT:    ALU clause starting at 13:
1174; EG-NEXT:     BFE_INT * T0.Y, T1.X, 0.0, literal.x,
1175; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1176; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
1177; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
1178; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
1179; EG-NEXT:     BFE_INT T2.X, T2.X, 0.0, literal.x,
1180; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1181; EG-NEXT:    16(2.242078e-44), 8(1.121039e-44)
1182; EG-NEXT:     LSHR * T3.X, PV.W, literal.x,
1183; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1184entry:
1185  %ld = load <3 x i16>, <3 x i16> addrspace(4)* %in
1186  %ext = sext <3 x i16> %ld to <3 x i32>
1187  store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
1188  ret void
1189}
1190
1191; v4i16 is naturally 8 byte aligned
1192; TODO: This should use LD, but for some there are redundant MOVs
1193define amdgpu_kernel void @constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
1194; GCN-NOHSA-SI-LABEL: constant_zextload_v4i16_to_v4i32:
1195; GCN-NOHSA-SI:       ; %bb.0:
1196; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1197; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1198; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
1199; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1200; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1201; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s6, s5, 16
1202; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s7, s4, 16
1203; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, 0xffff
1204; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, 0xffff
1205; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1206; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1207; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s7
1208; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
1209; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s6
1210; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1211; GCN-NOHSA-SI-NEXT:    s_endpgm
1212;
1213; GCN-HSA-LABEL: constant_zextload_v4i16_to_v4i32:
1214; GCN-HSA:       ; %bb.0:
1215; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1216; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1217; GCN-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
1218; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
1219; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
1220; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1221; GCN-HSA-NEXT:    s_lshr_b32 s0, s3, 16
1222; GCN-HSA-NEXT:    s_lshr_b32 s1, s2, 16
1223; GCN-HSA-NEXT:    s_and_b32 s3, s3, 0xffff
1224; GCN-HSA-NEXT:    s_and_b32 s2, s2, 0xffff
1225; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1226; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
1227; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s3
1228; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s0
1229; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1230; GCN-HSA-NEXT:    s_endpgm
1231;
1232; GCN-NOHSA-VI-LABEL: constant_zextload_v4i16_to_v4i32:
1233; GCN-NOHSA-VI:       ; %bb.0:
1234; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
1235; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1236; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
1237; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1238; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1239; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1240; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s6, s5, 16
1241; GCN-NOHSA-VI-NEXT:    s_and_b32 s5, s5, 0xffff
1242; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s7, s4, 16
1243; GCN-NOHSA-VI-NEXT:    s_and_b32 s4, s4, 0xffff
1244; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
1245; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s7
1246; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
1247; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s6
1248; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1249; GCN-NOHSA-VI-NEXT:    s_endpgm
1250;
1251; EG-LABEL: constant_zextload_v4i16_to_v4i32:
1252; EG:       ; %bb.0:
1253; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1254; EG-NEXT:    TEX 0 @6
1255; EG-NEXT:    ALU 12, @9, KC0[CB0:0-32], KC1[]
1256; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1
1257; EG-NEXT:    CF_END
1258; EG-NEXT:    PAD
1259; EG-NEXT:    Fetch clause starting at 6:
1260; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
1261; EG-NEXT:    ALU clause starting at 8:
1262; EG-NEXT:     MOV * T5.X, KC0[2].Z,
1263; EG-NEXT:    ALU clause starting at 9:
1264; EG-NEXT:     MOV T2.X, T5.X,
1265; EG-NEXT:     MOV * T3.X, T5.Y,
1266; EG-NEXT:     MOV T0.Y, PV.X,
1267; EG-NEXT:     MOV * T0.Z, PS,
1268; EG-NEXT:     LSHR * T5.W, PV.Z, literal.x,
1269; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1270; EG-NEXT:     AND_INT * T5.Z, T0.Z, literal.x,
1271; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1272; EG-NEXT:     LSHR * T5.Y, T0.Y, literal.x,
1273; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1274; EG-NEXT:     AND_INT T5.X, T0.Y, literal.x,
1275; EG-NEXT:     LSHR * T6.X, KC0[2].Y, literal.y,
1276; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
1277  %load = load <4 x i16>, <4 x i16> addrspace(4)* %in
1278  %ext = zext <4 x i16> %load to <4 x i32>
1279  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
1280  ret void
1281}
1282
1283; v4i16 is naturally 8 byte aligned
1284; TODO: This should use LD, but for some there are redundant MOVs
1285; TODO: We should use ASHR instead of LSHR + BFE
1286define amdgpu_kernel void @constant_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
1287; GCN-NOHSA-SI-LABEL: constant_sextload_v4i16_to_v4i32:
1288; GCN-NOHSA-SI:       ; %bb.0:
1289; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1290; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1291; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
1292; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1293; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1294; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s8, s4, 16
1295; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[6:7], s[4:5], 48
1296; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s5
1297; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s4, s4
1298; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1299; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1300; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s8
1301; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
1302; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s6
1303; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1304; GCN-NOHSA-SI-NEXT:    s_endpgm
1305;
1306; GCN-HSA-LABEL: constant_sextload_v4i16_to_v4i32:
1307; GCN-HSA:       ; %bb.0:
1308; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1309; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1310; GCN-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
1311; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
1312; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
1313; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1314; GCN-HSA-NEXT:    s_ashr_i64 s[0:1], s[2:3], 48
1315; GCN-HSA-NEXT:    s_ashr_i32 s4, s2, 16
1316; GCN-HSA-NEXT:    s_sext_i32_i16 s1, s3
1317; GCN-HSA-NEXT:    s_sext_i32_i16 s2, s2
1318; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1319; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s4
1320; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s1
1321; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s0
1322; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1323; GCN-HSA-NEXT:    s_endpgm
1324;
1325; GCN-NOHSA-VI-LABEL: constant_sextload_v4i16_to_v4i32:
1326; GCN-NOHSA-VI:       ; %bb.0:
1327; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
1328; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1329; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
1330; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1331; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1332; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1333; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s6, s5, 16
1334; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s7, s4, 16
1335; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s5, s5
1336; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s4, s4
1337; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
1338; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s7
1339; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
1340; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s6
1341; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1342; GCN-NOHSA-VI-NEXT:    s_endpgm
1343;
1344; EG-LABEL: constant_sextload_v4i16_to_v4i32:
1345; EG:       ; %bb.0:
1346; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1347; EG-NEXT:    TEX 0 @6
1348; EG-NEXT:    ALU 14, @9, KC0[CB0:0-32], KC1[]
1349; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1
1350; EG-NEXT:    CF_END
1351; EG-NEXT:    PAD
1352; EG-NEXT:    Fetch clause starting at 6:
1353; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
1354; EG-NEXT:    ALU clause starting at 8:
1355; EG-NEXT:     MOV * T5.X, KC0[2].Z,
1356; EG-NEXT:    ALU clause starting at 9:
1357; EG-NEXT:     MOV T2.X, T5.X,
1358; EG-NEXT:     MOV * T3.X, T5.Y,
1359; EG-NEXT:     MOV T0.Y, PV.X,
1360; EG-NEXT:     MOV * T0.Z, PS,
1361; EG-NEXT:     BFE_INT * T5.Z, PV.Z, 0.0, literal.x,
1362; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1363; EG-NEXT:     BFE_INT T5.X, T0.Y, 0.0, literal.x,
1364; EG-NEXT:     LSHR * T0.W, T0.Z, literal.x,
1365; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1366; EG-NEXT:     BFE_INT T5.W, PV.W, 0.0, literal.x,
1367; EG-NEXT:     LSHR * T0.W, T0.Y, literal.x,
1368; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1369; EG-NEXT:     LSHR T6.X, KC0[2].Y, literal.x,
1370; EG-NEXT:     BFE_INT * T5.Y, PS, 0.0, literal.y,
1371; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1372  %load = load <4 x i16>, <4 x i16> addrspace(4)* %in
1373  %ext = sext <4 x i16> %load to <4 x i32>
1374  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
1375  ret void
1376}
1377
1378; v8i16 is naturally 16 byte aligned
1379; TODO: These should use LSHR instead of BFE_UINT
1380; TODO: This should use DST, but for some there are redundant MOVs
1381define amdgpu_kernel void @constant_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
1382; GCN-NOHSA-SI-LABEL: constant_zextload_v8i16_to_v8i32:
1383; GCN-NOHSA-SI:       ; %bb.0:
1384; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1385; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1386; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
1387; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1388; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1389; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1390; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s8, s5, 16
1391; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s9, s4, 16
1392; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s10, s7, 16
1393; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s11, s6, 16
1394; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, 0xffff
1395; GCN-NOHSA-SI-NEXT:    s_and_b32 s7, s7, 0xffff
1396; GCN-NOHSA-SI-NEXT:    s_and_b32 s6, s6, 0xffff
1397; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, 0xffff
1398; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
1399; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s11
1400; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
1401; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s10
1402; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
1403; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1404; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1405; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s9
1406; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
1407; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s8
1408; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1409; GCN-NOHSA-SI-NEXT:    s_endpgm
1410;
1411; GCN-HSA-LABEL: constant_zextload_v8i16_to_v8i32:
1412; GCN-HSA:       ; %bb.0:
1413; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1414; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1415; GCN-HSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
1416; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1417; GCN-HSA-NEXT:    s_lshr_b32 s8, s5, 16
1418; GCN-HSA-NEXT:    s_lshr_b32 s9, s4, 16
1419; GCN-HSA-NEXT:    s_lshr_b32 s2, s7, 16
1420; GCN-HSA-NEXT:    s_lshr_b32 s3, s6, 16
1421; GCN-HSA-NEXT:    s_and_b32 s5, s5, 0xffff
1422; GCN-HSA-NEXT:    s_and_b32 s4, s4, 0xffff
1423; GCN-HSA-NEXT:    s_and_b32 s7, s7, 0xffff
1424; GCN-HSA-NEXT:    s_and_b32 s6, s6, 0xffff
1425; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s2
1426; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
1427; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1428; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
1429; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
1430; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
1431; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s7
1432; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
1433; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1434; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
1435; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
1436; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s9
1437; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
1438; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s8
1439; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
1440; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1441; GCN-HSA-NEXT:    s_endpgm
1442;
1443; GCN-NOHSA-VI-LABEL: constant_zextload_v8i16_to_v8i32:
1444; GCN-NOHSA-VI:       ; %bb.0:
1445; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
1446; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1447; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
1448; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1449; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1450; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1451; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s10, s7, 16
1452; GCN-NOHSA-VI-NEXT:    s_and_b32 s7, s7, 0xffff
1453; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s11, s6, 16
1454; GCN-NOHSA-VI-NEXT:    s_and_b32 s6, s6, 0xffff
1455; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s8, s5, 16
1456; GCN-NOHSA-VI-NEXT:    s_and_b32 s5, s5, 0xffff
1457; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s9, s4, 16
1458; GCN-NOHSA-VI-NEXT:    s_and_b32 s4, s4, 0xffff
1459; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
1460; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s11
1461; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
1462; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s10
1463; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
1464; GCN-NOHSA-VI-NEXT:    s_nop 0
1465; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
1466; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s9
1467; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
1468; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s8
1469; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1470; GCN-NOHSA-VI-NEXT:    s_endpgm
1471;
1472; EG-LABEL: constant_zextload_v8i16_to_v8i32:
1473; EG:       ; %bb.0:
1474; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1475; EG-NEXT:    TEX 0 @6
1476; EG-NEXT:    ALU 17, @9, KC0[CB0:0-32], KC1[]
1477; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
1478; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
1479; EG-NEXT:    CF_END
1480; EG-NEXT:    Fetch clause starting at 6:
1481; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
1482; EG-NEXT:    ALU clause starting at 8:
1483; EG-NEXT:     MOV * T7.X, KC0[2].Z,
1484; EG-NEXT:    ALU clause starting at 9:
1485; EG-NEXT:     LSHR * T8.W, T7.Y, literal.x,
1486; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1487; EG-NEXT:     AND_INT * T8.Z, T7.Y, literal.x,
1488; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1489; EG-NEXT:     LSHR T8.Y, T7.X, literal.x,
1490; EG-NEXT:     LSHR * T9.W, T7.W, literal.x,
1491; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1492; EG-NEXT:     AND_INT T8.X, T7.X, literal.x,
1493; EG-NEXT:     AND_INT T9.Z, T7.W, literal.x,
1494; EG-NEXT:     LSHR * T7.X, KC0[2].Y, literal.y,
1495; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
1496; EG-NEXT:     LSHR * T9.Y, T7.Z, literal.x,
1497; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1498; EG-NEXT:     AND_INT T9.X, T7.Z, literal.x,
1499; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1500; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
1501; EG-NEXT:     LSHR * T10.X, PV.W, literal.x,
1502; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1503  %load = load <8 x i16>, <8 x i16> addrspace(4)* %in
1504  %ext = zext <8 x i16> %load to <8 x i32>
1505  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
1506  ret void
1507}
1508
1509; v8i16 is naturally 16 byte aligned
1510; TODO: 4 of these should use ASHR instead of LSHR + BFE_INT
1511; TODO: This should use DST, but for some there are redundant MOVs
1512define amdgpu_kernel void @constant_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
1513; GCN-NOHSA-SI-LABEL: constant_sextload_v8i16_to_v8i32:
1514; GCN-NOHSA-SI:       ; %bb.0:
1515; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1516; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1517; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
1518; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1519; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1520; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1521; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s8, s5, 16
1522; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s9, s4, 16
1523; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s5
1524; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s10, s7, 16
1525; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s11, s6, 16
1526; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s7, s7
1527; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s6, s6
1528; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s4, s4
1529; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
1530; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s11
1531; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
1532; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s10
1533; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
1534; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1535; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1536; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s9
1537; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
1538; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s8
1539; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1540; GCN-NOHSA-SI-NEXT:    s_endpgm
1541;
1542; GCN-HSA-LABEL: constant_sextload_v8i16_to_v8i32:
1543; GCN-HSA:       ; %bb.0:
1544; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1545; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1546; GCN-HSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
1547; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1548; GCN-HSA-NEXT:    s_ashr_i32 s8, s5, 16
1549; GCN-HSA-NEXT:    s_ashr_i32 s9, s4, 16
1550; GCN-HSA-NEXT:    s_ashr_i32 s2, s7, 16
1551; GCN-HSA-NEXT:    s_ashr_i32 s3, s6, 16
1552; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s2
1553; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
1554; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1555; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
1556; GCN-HSA-NEXT:    s_sext_i32_i16 s7, s7
1557; GCN-HSA-NEXT:    s_sext_i32_i16 s6, s6
1558; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
1559; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
1560; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s7
1561; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
1562; GCN-HSA-NEXT:    s_sext_i32_i16 s5, s5
1563; GCN-HSA-NEXT:    s_sext_i32_i16 s4, s4
1564; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1565; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
1566; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
1567; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s9
1568; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
1569; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s8
1570; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
1571; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1572; GCN-HSA-NEXT:    s_endpgm
1573;
1574; GCN-NOHSA-VI-LABEL: constant_sextload_v8i16_to_v8i32:
1575; GCN-NOHSA-VI:       ; %bb.0:
1576; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
1577; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1578; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
1579; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1580; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1581; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1582; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s10, s7, 16
1583; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s11, s6, 16
1584; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s7, s7
1585; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s6, s6
1586; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s8, s5, 16
1587; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s9, s4, 16
1588; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s5, s5
1589; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s4, s4
1590; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
1591; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s11
1592; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
1593; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s10
1594; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
1595; GCN-NOHSA-VI-NEXT:    s_nop 0
1596; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
1597; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s9
1598; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
1599; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s8
1600; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1601; GCN-NOHSA-VI-NEXT:    s_endpgm
1602;
1603; EG-LABEL: constant_sextload_v8i16_to_v8i32:
1604; EG:       ; %bb.0:
1605; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1606; EG-NEXT:    TEX 0 @6
1607; EG-NEXT:    ALU 19, @9, KC0[CB0:0-32], KC1[]
1608; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
1609; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
1610; EG-NEXT:    CF_END
1611; EG-NEXT:    Fetch clause starting at 6:
1612; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
1613; EG-NEXT:    ALU clause starting at 8:
1614; EG-NEXT:     MOV * T7.X, KC0[2].Z,
1615; EG-NEXT:    ALU clause starting at 9:
1616; EG-NEXT:     BFE_INT * T8.Z, T7.Y, 0.0, literal.x,
1617; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1618; EG-NEXT:     BFE_INT T8.X, T7.X, 0.0, literal.x,
1619; EG-NEXT:     BFE_INT T9.Z, T7.W, 0.0, literal.x,
1620; EG-NEXT:     LSHR * T0.W, T7.Y, literal.x,
1621; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1622; EG-NEXT:     BFE_INT T9.X, T7.Z, 0.0, literal.x,
1623; EG-NEXT:     LSHR T0.Z, T7.W, literal.x,
1624; EG-NEXT:     BFE_INT T8.W, PV.W, 0.0, literal.x,
1625; EG-NEXT:     LSHR * T0.W, T7.X, literal.x,
1626; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1627; EG-NEXT:     LSHR T7.X, KC0[2].Y, literal.x,
1628; EG-NEXT:     BFE_INT T8.Y, PS, 0.0, literal.y,
1629; EG-NEXT:     LSHR T1.Z, T7.Z, literal.y,
1630; EG-NEXT:     BFE_INT T9.W, PV.Z, 0.0, literal.y,
1631; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1632; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1633; EG-NEXT:     LSHR T10.X, PS, literal.x,
1634; EG-NEXT:     BFE_INT * T9.Y, PV.Z, 0.0, literal.y,
1635; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1636  %load = load <8 x i16>, <8 x i16> addrspace(4)* %in
1637  %ext = sext <8 x i16> %load to <8 x i32>
1638  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
1639  ret void
1640}
1641
1642define amdgpu_kernel void @constant_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
1643; GCN-NOHSA-SI-LABEL: constant_zextload_v16i16_to_v16i32:
1644; GCN-NOHSA-SI:       ; %bb.0:
1645; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1646; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1647; GCN-NOHSA-SI-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
1648; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1649; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1650; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1651; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s12, s5, 16
1652; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s13, s4, 16
1653; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s14, s7, 16
1654; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s15, s6, 16
1655; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s16, s9, 16
1656; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s17, s8, 16
1657; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s18, s11, 16
1658; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s19, s10, 16
1659; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, 0xffff
1660; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, 0xffff
1661; GCN-NOHSA-SI-NEXT:    s_and_b32 s7, s7, 0xffff
1662; GCN-NOHSA-SI-NEXT:    s_and_b32 s6, s6, 0xffff
1663; GCN-NOHSA-SI-NEXT:    s_and_b32 s9, s9, 0xffff
1664; GCN-NOHSA-SI-NEXT:    s_and_b32 s11, s11, 0xffff
1665; GCN-NOHSA-SI-NEXT:    s_and_b32 s10, s10, 0xffff
1666; GCN-NOHSA-SI-NEXT:    s_and_b32 s8, s8, 0xffff
1667; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
1668; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
1669; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s11
1670; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s18
1671; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
1672; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1673; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
1674; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s17
1675; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s9
1676; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s16
1677; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
1678; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1679; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
1680; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s15
1681; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
1682; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s14
1683; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
1684; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1685; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1686; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s13
1687; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
1688; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s12
1689; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1690; GCN-NOHSA-SI-NEXT:    s_endpgm
1691;
1692; GCN-HSA-LABEL: constant_zextload_v16i16_to_v16i32:
1693; GCN-HSA:       ; %bb.0:
1694; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1695; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1696; GCN-HSA-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
1697; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1698; GCN-HSA-NEXT:    s_lshr_b32 s12, s5, 16
1699; GCN-HSA-NEXT:    s_lshr_b32 s13, s4, 16
1700; GCN-HSA-NEXT:    s_lshr_b32 s14, s7, 16
1701; GCN-HSA-NEXT:    s_lshr_b32 s15, s6, 16
1702; GCN-HSA-NEXT:    s_lshr_b32 s16, s9, 16
1703; GCN-HSA-NEXT:    s_lshr_b32 s17, s8, 16
1704; GCN-HSA-NEXT:    s_lshr_b32 s2, s11, 16
1705; GCN-HSA-NEXT:    s_lshr_b32 s3, s10, 16
1706; GCN-HSA-NEXT:    s_and_b32 s5, s5, 0xffff
1707; GCN-HSA-NEXT:    s_and_b32 s4, s4, 0xffff
1708; GCN-HSA-NEXT:    s_and_b32 s7, s7, 0xffff
1709; GCN-HSA-NEXT:    s_and_b32 s6, s6, 0xffff
1710; GCN-HSA-NEXT:    s_and_b32 s9, s9, 0xffff
1711; GCN-HSA-NEXT:    s_and_b32 s8, s8, 0xffff
1712; GCN-HSA-NEXT:    s_and_b32 s11, s11, 0xffff
1713; GCN-HSA-NEXT:    s_and_b32 s10, s10, 0xffff
1714; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s2
1715; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
1716; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1717; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
1718; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
1719; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
1720; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
1721; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s10
1722; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s11
1723; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
1724; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1725; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
1726; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
1727; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
1728; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
1729; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s17
1730; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s9
1731; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s16
1732; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
1733; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1734; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
1735; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
1736; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s15
1737; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s7
1738; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s14
1739; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
1740; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1741; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
1742; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
1743; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s13
1744; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
1745; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s12
1746; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
1747; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1748; GCN-HSA-NEXT:    s_endpgm
1749;
1750; GCN-NOHSA-VI-LABEL: constant_zextload_v16i16_to_v16i32:
1751; GCN-NOHSA-VI:       ; %bb.0:
1752; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
1753; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1754; GCN-NOHSA-VI-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
1755; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1756; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1757; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1758; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s18, s11, 16
1759; GCN-NOHSA-VI-NEXT:    s_and_b32 s11, s11, 0xffff
1760; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s19, s10, 16
1761; GCN-NOHSA-VI-NEXT:    s_and_b32 s10, s10, 0xffff
1762; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s16, s9, 16
1763; GCN-NOHSA-VI-NEXT:    s_and_b32 s9, s9, 0xffff
1764; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s17, s8, 16
1765; GCN-NOHSA-VI-NEXT:    s_and_b32 s8, s8, 0xffff
1766; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
1767; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s19
1768; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
1769; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s18
1770; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s14, s7, 16
1771; GCN-NOHSA-VI-NEXT:    s_and_b32 s7, s7, 0xffff
1772; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s15, s6, 16
1773; GCN-NOHSA-VI-NEXT:    s_and_b32 s6, s6, 0xffff
1774; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
1775; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s12, s5, 16
1776; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
1777; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s17
1778; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
1779; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s16
1780; GCN-NOHSA-VI-NEXT:    s_and_b32 s5, s5, 0xffff
1781; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s13, s4, 16
1782; GCN-NOHSA-VI-NEXT:    s_and_b32 s4, s4, 0xffff
1783; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
1784; GCN-NOHSA-VI-NEXT:    s_nop 0
1785; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
1786; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s15
1787; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
1788; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s14
1789; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
1790; GCN-NOHSA-VI-NEXT:    s_nop 0
1791; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
1792; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s13
1793; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
1794; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s12
1795; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1796; GCN-NOHSA-VI-NEXT:    s_endpgm
1797;
1798; EG-LABEL: constant_zextload_v16i16_to_v16i32:
1799; EG:       ; %bb.0:
1800; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
1801; EG-NEXT:    TEX 1 @8
1802; EG-NEXT:    ALU 35, @13, KC0[CB0:0-32], KC1[]
1803; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T18.X, 0
1804; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T11.X, 0
1805; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0
1806; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T12.X, 1
1807; EG-NEXT:    CF_END
1808; EG-NEXT:    Fetch clause starting at 8:
1809; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 0, #1
1810; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 16, #1
1811; EG-NEXT:    ALU clause starting at 12:
1812; EG-NEXT:     MOV * T11.X, KC0[2].Z,
1813; EG-NEXT:    ALU clause starting at 13:
1814; EG-NEXT:     LSHR * T13.W, T12.Y, literal.x,
1815; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1816; EG-NEXT:     AND_INT * T13.Z, T12.Y, literal.x,
1817; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1818; EG-NEXT:     LSHR T13.Y, T12.X, literal.x,
1819; EG-NEXT:     LSHR * T14.W, T12.W, literal.x,
1820; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1821; EG-NEXT:     AND_INT T13.X, T12.X, literal.x,
1822; EG-NEXT:     AND_INT T14.Z, T12.W, literal.x,
1823; EG-NEXT:     LSHR * T12.X, KC0[2].Y, literal.y,
1824; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
1825; EG-NEXT:     LSHR T14.Y, T12.Z, literal.x,
1826; EG-NEXT:     LSHR * T15.W, T11.Y, literal.x,
1827; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1828; EG-NEXT:     AND_INT T14.X, T12.Z, literal.x,
1829; EG-NEXT:     AND_INT T15.Z, T11.Y, literal.x,
1830; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1831; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
1832; EG-NEXT:     LSHR T16.X, PV.W, literal.x,
1833; EG-NEXT:     LSHR T15.Y, T11.X, literal.y,
1834; EG-NEXT:     LSHR T17.W, T11.W, literal.y,
1835; EG-NEXT:     AND_INT * T15.X, T11.X, literal.z,
1836; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1837; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1838; EG-NEXT:     AND_INT T17.Z, T11.W, literal.x,
1839; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1840; EG-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
1841; EG-NEXT:     LSHR T11.X, PV.W, literal.x,
1842; EG-NEXT:     LSHR T17.Y, T11.Z, literal.y,
1843; EG-NEXT:     AND_INT * T17.X, T11.Z, literal.z,
1844; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1845; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1846; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
1847; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
1848; EG-NEXT:     LSHR * T18.X, PV.W, literal.x,
1849; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1850  %load = load <16 x i16>, <16 x i16> addrspace(4)* %in
1851  %ext = zext <16 x i16> %load to <16 x i32>
1852  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
1853  ret void
1854}
1855
1856define amdgpu_kernel void @constant_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
1857; GCN-NOHSA-SI-LABEL: constant_sextload_v16i16_to_v16i32:
1858; GCN-NOHSA-SI:       ; %bb.0:
1859; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1860; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1861; GCN-NOHSA-SI-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
1862; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1863; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1864; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1865; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s12, s5, 16
1866; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s13, s4, 16
1867; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s5
1868; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s4, s4
1869; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s14, s7, 16
1870; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s15, s6, 16
1871; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s7, s7
1872; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s6, s6
1873; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s16, s9, 16
1874; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s17, s8, 16
1875; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s9, s9
1876; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s18, s11, 16
1877; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s19, s10, 16
1878; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s11, s11
1879; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s10, s10
1880; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s8, s8
1881; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
1882; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
1883; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s11
1884; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s18
1885; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
1886; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1887; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
1888; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s17
1889; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s9
1890; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s16
1891; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
1892; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1893; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
1894; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s15
1895; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
1896; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s14
1897; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
1898; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1899; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1900; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s13
1901; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
1902; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s12
1903; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1904; GCN-NOHSA-SI-NEXT:    s_endpgm
1905;
1906; GCN-HSA-LABEL: constant_sextload_v16i16_to_v16i32:
1907; GCN-HSA:       ; %bb.0:
1908; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1909; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1910; GCN-HSA-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
1911; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1912; GCN-HSA-NEXT:    s_ashr_i32 s12, s5, 16
1913; GCN-HSA-NEXT:    s_ashr_i32 s13, s4, 16
1914; GCN-HSA-NEXT:    s_ashr_i32 s14, s7, 16
1915; GCN-HSA-NEXT:    s_ashr_i32 s15, s6, 16
1916; GCN-HSA-NEXT:    s_ashr_i32 s16, s9, 16
1917; GCN-HSA-NEXT:    s_ashr_i32 s17, s8, 16
1918; GCN-HSA-NEXT:    s_ashr_i32 s2, s11, 16
1919; GCN-HSA-NEXT:    s_ashr_i32 s3, s10, 16
1920; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s2
1921; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
1922; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1923; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
1924; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
1925; GCN-HSA-NEXT:    s_sext_i32_i16 s11, s11
1926; GCN-HSA-NEXT:    s_sext_i32_i16 s10, s10
1927; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
1928; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
1929; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s10
1930; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s11
1931; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
1932; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1933; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
1934; GCN-HSA-NEXT:    s_sext_i32_i16 s9, s9
1935; GCN-HSA-NEXT:    s_sext_i32_i16 s8, s8
1936; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
1937; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
1938; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
1939; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s17
1940; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s9
1941; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s16
1942; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
1943; GCN-HSA-NEXT:    s_sext_i32_i16 s7, s7
1944; GCN-HSA-NEXT:    s_sext_i32_i16 s6, s6
1945; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1946; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
1947; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
1948; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s15
1949; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s7
1950; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s14
1951; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
1952; GCN-HSA-NEXT:    s_sext_i32_i16 s5, s5
1953; GCN-HSA-NEXT:    s_sext_i32_i16 s4, s4
1954; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1955; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
1956; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
1957; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s13
1958; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
1959; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s12
1960; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
1961; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1962; GCN-HSA-NEXT:    s_endpgm
1963;
1964; GCN-NOHSA-VI-LABEL: constant_sextload_v16i16_to_v16i32:
1965; GCN-NOHSA-VI:       ; %bb.0:
1966; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
1967; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1968; GCN-NOHSA-VI-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
1969; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1970; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1971; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1972; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s18, s11, 16
1973; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s19, s10, 16
1974; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s11, s11
1975; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s10, s10
1976; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s16, s9, 16
1977; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s17, s8, 16
1978; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s9, s9
1979; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s8, s8
1980; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
1981; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s19
1982; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
1983; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s18
1984; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s14, s7, 16
1985; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s15, s6, 16
1986; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s7, s7
1987; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s6, s6
1988; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
1989; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s12, s5, 16
1990; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
1991; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s17
1992; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
1993; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s16
1994; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s13, s4, 16
1995; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s5, s5
1996; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s4, s4
1997; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
1998; GCN-NOHSA-VI-NEXT:    s_nop 0
1999; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
2000; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s15
2001; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
2002; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s14
2003; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
2004; GCN-NOHSA-VI-NEXT:    s_nop 0
2005; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
2006; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s13
2007; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
2008; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s12
2009; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
2010; GCN-NOHSA-VI-NEXT:    s_endpgm
2011;
2012; EG-LABEL: constant_sextload_v16i16_to_v16i32:
2013; EG:       ; %bb.0:
2014; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
2015; EG-NEXT:    TEX 1 @8
2016; EG-NEXT:    ALU 39, @13, KC0[CB0:0-32], KC1[]
2017; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T12.X, 0
2018; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T11.X, 0
2019; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T14.X, 0
2020; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T13.X, 1
2021; EG-NEXT:    CF_END
2022; EG-NEXT:    Fetch clause starting at 8:
2023; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
2024; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
2025; EG-NEXT:    ALU clause starting at 12:
2026; EG-NEXT:     MOV * T11.X, KC0[2].Z,
2027; EG-NEXT:    ALU clause starting at 13:
2028; EG-NEXT:     LSHR T13.X, KC0[2].Y, literal.x,
2029; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2030; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2031; EG-NEXT:     LSHR T14.X, PV.W, literal.x,
2032; EG-NEXT:     BFE_INT * T15.Z, T11.Y, 0.0, literal.y,
2033; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2034; EG-NEXT:     BFE_INT T15.X, T11.X, 0.0, literal.x,
2035; EG-NEXT:     LSHR T0.Y, T12.W, literal.x,
2036; EG-NEXT:     BFE_INT T16.Z, T11.W, 0.0, literal.x, BS:VEC_120/SCL_212
2037; EG-NEXT:     LSHR T0.W, T12.Y, literal.x,
2038; EG-NEXT:     LSHR * T1.W, T11.Y, literal.x,
2039; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2040; EG-NEXT:     BFE_INT T16.X, T11.Z, 0.0, literal.x,
2041; EG-NEXT:     LSHR T1.Y, T11.W, literal.x,
2042; EG-NEXT:     BFE_INT T17.Z, T12.Y, 0.0, literal.x,
2043; EG-NEXT:     BFE_INT T15.W, PS, 0.0, literal.x,
2044; EG-NEXT:     LSHR * T1.W, T11.X, literal.x,
2045; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2046; EG-NEXT:     BFE_INT T17.X, T12.X, 0.0, literal.x,
2047; EG-NEXT:     BFE_INT T15.Y, PS, 0.0, literal.x,
2048; EG-NEXT:     BFE_INT T18.Z, T12.W, 0.0, literal.x,
2049; EG-NEXT:     BFE_INT T16.W, PV.Y, 0.0, literal.x,
2050; EG-NEXT:     LSHR * T1.W, T11.Z, literal.x,
2051; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2052; EG-NEXT:     BFE_INT T18.X, T12.Z, 0.0, literal.x,
2053; EG-NEXT:     BFE_INT T16.Y, PS, 0.0, literal.x,
2054; EG-NEXT:     LSHR T0.Z, T12.X, literal.x,
2055; EG-NEXT:     BFE_INT T17.W, T0.W, 0.0, literal.x,
2056; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2057; EG-NEXT:    16(2.242078e-44), 32(4.484155e-44)
2058; EG-NEXT:     LSHR T11.X, PS, literal.x,
2059; EG-NEXT:     BFE_INT T17.Y, PV.Z, 0.0, literal.y,
2060; EG-NEXT:     LSHR T0.Z, T12.Z, literal.y,
2061; EG-NEXT:     BFE_INT T18.W, T0.Y, 0.0, literal.y,
2062; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
2063; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2064; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
2065; EG-NEXT:     LSHR T12.X, PS, literal.x,
2066; EG-NEXT:     BFE_INT * T18.Y, PV.Z, 0.0, literal.y,
2067; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2068  %load = load <16 x i16>, <16 x i16> addrspace(4)* %in
2069  %ext = sext <16 x i16> %load to <16 x i32>
2070  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
2071  ret void
2072}
2073
2074define amdgpu_kernel void @constant_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
2075; GCN-NOHSA-SI-LABEL: constant_zextload_v32i16_to_v32i32:
2076; GCN-NOHSA-SI:       ; %bb.0:
2077; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x9
2078; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2079; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
2080; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2081; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s18, s1, 16
2082; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s19, s0, 16
2083; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s20, s3, 16
2084; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s21, s2, 16
2085; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s22, s5, 16
2086; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s23, s4, 16
2087; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s24, s7, 16
2088; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s25, s6, 16
2089; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s26, s9, 16
2090; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s27, s8, 16
2091; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s28, s11, 16
2092; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s29, s10, 16
2093; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s30, s13, 16
2094; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s31, s12, 16
2095; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s33, s15, 16
2096; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s34, s14, 16
2097; GCN-NOHSA-SI-NEXT:    s_and_b32 s35, s1, 0xffff
2098; GCN-NOHSA-SI-NEXT:    s_and_b32 s36, s0, 0xffff
2099; GCN-NOHSA-SI-NEXT:    s_and_b32 s37, s3, 0xffff
2100; GCN-NOHSA-SI-NEXT:    s_and_b32 s38, s2, 0xffff
2101; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, 0xffff
2102; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, 0xffff
2103; GCN-NOHSA-SI-NEXT:    s_and_b32 s7, s7, 0xffff
2104; GCN-NOHSA-SI-NEXT:    s_and_b32 s6, s6, 0xffff
2105; GCN-NOHSA-SI-NEXT:    s_and_b32 s9, s9, 0xffff
2106; GCN-NOHSA-SI-NEXT:    s_and_b32 s8, s8, 0xffff
2107; GCN-NOHSA-SI-NEXT:    s_and_b32 s11, s11, 0xffff
2108; GCN-NOHSA-SI-NEXT:    s_and_b32 s10, s10, 0xffff
2109; GCN-NOHSA-SI-NEXT:    s_and_b32 s13, s13, 0xffff
2110; GCN-NOHSA-SI-NEXT:    s_and_b32 s12, s12, 0xffff
2111; GCN-NOHSA-SI-NEXT:    s_and_b32 s15, s15, 0xffff
2112; GCN-NOHSA-SI-NEXT:    s_and_b32 s14, s14, 0xffff
2113; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
2114; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
2115; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s16
2116; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s17
2117; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s14
2118; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s34
2119; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s15
2120; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s33
2121; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
2122; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2123; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s12
2124; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s31
2125; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s13
2126; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s30
2127; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
2128; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2129; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
2130; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s29
2131; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s11
2132; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s28
2133; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
2134; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2135; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
2136; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s27
2137; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s9
2138; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s26
2139; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
2140; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2141; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
2142; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s25
2143; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
2144; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s24
2145; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
2146; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2147; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
2148; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s23
2149; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
2150; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s22
2151; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2152; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2153; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s38
2154; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s21
2155; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s37
2156; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s20
2157; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
2158; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2159; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s36
2160; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
2161; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s35
2162; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s18
2163; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
2164; GCN-NOHSA-SI-NEXT:    s_endpgm
2165;
2166; GCN-HSA-LABEL: constant_zextload_v32i16_to_v32i32:
2167; GCN-HSA:       ; %bb.0:
2168; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
2169; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2170; GCN-HSA-NEXT:    s_load_dwordx16 s[4:19], s[2:3], 0x0
2171; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2172; GCN-HSA-NEXT:    s_lshr_b32 s20, s5, 16
2173; GCN-HSA-NEXT:    s_lshr_b32 s21, s4, 16
2174; GCN-HSA-NEXT:    s_lshr_b32 s22, s7, 16
2175; GCN-HSA-NEXT:    s_lshr_b32 s23, s6, 16
2176; GCN-HSA-NEXT:    s_lshr_b32 s24, s9, 16
2177; GCN-HSA-NEXT:    s_lshr_b32 s25, s8, 16
2178; GCN-HSA-NEXT:    s_lshr_b32 s26, s11, 16
2179; GCN-HSA-NEXT:    s_lshr_b32 s27, s10, 16
2180; GCN-HSA-NEXT:    s_lshr_b32 s28, s13, 16
2181; GCN-HSA-NEXT:    s_lshr_b32 s29, s12, 16
2182; GCN-HSA-NEXT:    s_lshr_b32 s30, s15, 16
2183; GCN-HSA-NEXT:    s_lshr_b32 s31, s14, 16
2184; GCN-HSA-NEXT:    s_lshr_b32 s33, s17, 16
2185; GCN-HSA-NEXT:    s_lshr_b32 s34, s16, 16
2186; GCN-HSA-NEXT:    s_lshr_b32 s35, s19, 16
2187; GCN-HSA-NEXT:    s_lshr_b32 s36, s18, 16
2188; GCN-HSA-NEXT:    s_and_b32 s5, s5, 0xffff
2189; GCN-HSA-NEXT:    s_and_b32 s4, s4, 0xffff
2190; GCN-HSA-NEXT:    s_and_b32 s7, s7, 0xffff
2191; GCN-HSA-NEXT:    s_and_b32 s6, s6, 0xffff
2192; GCN-HSA-NEXT:    s_and_b32 s9, s9, 0xffff
2193; GCN-HSA-NEXT:    s_and_b32 s8, s8, 0xffff
2194; GCN-HSA-NEXT:    s_and_b32 s11, s11, 0xffff
2195; GCN-HSA-NEXT:    s_and_b32 s10, s10, 0xffff
2196; GCN-HSA-NEXT:    s_and_b32 s13, s13, 0xffff
2197; GCN-HSA-NEXT:    s_and_b32 s12, s12, 0xffff
2198; GCN-HSA-NEXT:    s_and_b32 s15, s15, 0xffff
2199; GCN-HSA-NEXT:    s_and_b32 s14, s14, 0xffff
2200; GCN-HSA-NEXT:    s_and_b32 s17, s17, 0xffff
2201; GCN-HSA-NEXT:    s_and_b32 s16, s16, 0xffff
2202; GCN-HSA-NEXT:    s_and_b32 s19, s19, 0xffff
2203; GCN-HSA-NEXT:    s_and_b32 s18, s18, 0xffff
2204; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x70
2205; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2206; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s3
2207; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s2
2208; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x60
2209; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2210; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s3
2211; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s2
2212; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x50
2213; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s18
2214; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s36
2215; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s19
2216; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s35
2217; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s16
2218; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s34
2219; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2220; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s17
2221; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s33
2222; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
2223; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
2224; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s14
2225; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2226; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2227; GCN-HSA-NEXT:    s_add_u32 s2, s0, 64
2228; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s31
2229; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s15
2230; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s30
2231; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2232; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2233; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2234; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2235; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
2236; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s12
2237; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s29
2238; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s13
2239; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s28
2240; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2241; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2242; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2243; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2244; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
2245; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s10
2246; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s27
2247; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s11
2248; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s26
2249; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2250; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2251; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2252; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2253; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
2254; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
2255; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s25
2256; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s9
2257; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s24
2258; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2259; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2260; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2261; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
2262; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s23
2263; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s7
2264; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s22
2265; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2266; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2267; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
2268; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
2269; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s21
2270; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
2271; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s20
2272; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
2273; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2274; GCN-HSA-NEXT:    s_endpgm
2275;
2276; GCN-NOHSA-VI-LABEL: constant_zextload_v32i16_to_v32i32:
2277; GCN-NOHSA-VI:       ; %bb.0:
2278; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x24
2279; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
2280; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
2281; GCN-NOHSA-VI-NEXT:    s_mov_b32 s19, 0xf000
2282; GCN-NOHSA-VI-NEXT:    s_mov_b32 s18, -1
2283; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
2284; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s35, s15, 16
2285; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s36, s14, 16
2286; GCN-NOHSA-VI-NEXT:    s_and_b32 s15, s15, 0xffff
2287; GCN-NOHSA-VI-NEXT:    s_and_b32 s14, s14, 0xffff
2288; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s33, s13, 16
2289; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s34, s12, 16
2290; GCN-NOHSA-VI-NEXT:    s_and_b32 s13, s13, 0xffff
2291; GCN-NOHSA-VI-NEXT:    s_and_b32 s12, s12, 0xffff
2292; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s14
2293; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s36
2294; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s15
2295; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s35
2296; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s30, s11, 16
2297; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s31, s10, 16
2298; GCN-NOHSA-VI-NEXT:    s_and_b32 s11, s11, 0xffff
2299; GCN-NOHSA-VI-NEXT:    s_and_b32 s10, s10, 0xffff
2300; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:112
2301; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s28, s9, 16
2302; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s12
2303; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s34
2304; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s13
2305; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s33
2306; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s29, s8, 16
2307; GCN-NOHSA-VI-NEXT:    s_and_b32 s9, s9, 0xffff
2308; GCN-NOHSA-VI-NEXT:    s_and_b32 s8, s8, 0xffff
2309; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:96
2310; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s26, s7, 16
2311; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
2312; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s31
2313; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
2314; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s30
2315; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s27, s6, 16
2316; GCN-NOHSA-VI-NEXT:    s_and_b32 s7, s7, 0xffff
2317; GCN-NOHSA-VI-NEXT:    s_and_b32 s6, s6, 0xffff
2318; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:80
2319; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s24, s5, 16
2320; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
2321; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s29
2322; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
2323; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s28
2324; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s25, s4, 16
2325; GCN-NOHSA-VI-NEXT:    s_and_b32 s5, s5, 0xffff
2326; GCN-NOHSA-VI-NEXT:    s_and_b32 s4, s4, 0xffff
2327; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:64
2328; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s22, s3, 16
2329; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
2330; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s27
2331; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
2332; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s26
2333; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s23, s2, 16
2334; GCN-NOHSA-VI-NEXT:    s_and_b32 s3, s3, 0xffff
2335; GCN-NOHSA-VI-NEXT:    s_and_b32 s2, s2, 0xffff
2336; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:48
2337; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s20, s1, 16
2338; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
2339; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s25
2340; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
2341; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s24
2342; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s21, s0, 16
2343; GCN-NOHSA-VI-NEXT:    s_and_b32 s1, s1, 0xffff
2344; GCN-NOHSA-VI-NEXT:    s_and_b32 s0, s0, 0xffff
2345; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:32
2346; GCN-NOHSA-VI-NEXT:    s_nop 0
2347; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
2348; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s23
2349; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s3
2350; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s22
2351; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:16
2352; GCN-NOHSA-VI-NEXT:    s_nop 0
2353; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
2354; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s21
2355; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s1
2356; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s20
2357; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
2358; GCN-NOHSA-VI-NEXT:    s_endpgm
2359;
2360; EG-LABEL: constant_zextload_v32i16_to_v32i32:
2361; EG:       ; %bb.0:
2362; EG-NEXT:    ALU 0, @20, KC0[CB0:0-32], KC1[]
2363; EG-NEXT:    TEX 3 @12
2364; EG-NEXT:    ALU 71, @21, KC0[CB0:0-32], KC1[]
2365; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T34.X, 0
2366; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T21.X, 0
2367; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T32.X, 0
2368; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T22.X, 0
2369; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T29.X, 0
2370; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T19.X, 0
2371; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T26.X, 0
2372; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T20.X, 1
2373; EG-NEXT:    CF_END
2374; EG-NEXT:    Fetch clause starting at 12:
2375; EG-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 0, #1
2376; EG-NEXT:     VTX_READ_128 T21.XYZW, T19.X, 48, #1
2377; EG-NEXT:     VTX_READ_128 T22.XYZW, T19.X, 32, #1
2378; EG-NEXT:     VTX_READ_128 T19.XYZW, T19.X, 16, #1
2379; EG-NEXT:    ALU clause starting at 20:
2380; EG-NEXT:     MOV * T19.X, KC0[2].Z,
2381; EG-NEXT:    ALU clause starting at 21:
2382; EG-NEXT:     LSHR * T23.W, T20.Y, literal.x,
2383; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2384; EG-NEXT:     AND_INT * T23.Z, T20.Y, literal.x,
2385; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2386; EG-NEXT:     LSHR T23.Y, T20.X, literal.x,
2387; EG-NEXT:     LSHR * T24.W, T20.W, literal.x,
2388; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2389; EG-NEXT:     AND_INT T23.X, T20.X, literal.x,
2390; EG-NEXT:     AND_INT T24.Z, T20.W, literal.x,
2391; EG-NEXT:     LSHR * T20.X, KC0[2].Y, literal.y,
2392; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
2393; EG-NEXT:     LSHR T24.Y, T20.Z, literal.x,
2394; EG-NEXT:     LSHR * T25.W, T19.Y, literal.x,
2395; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2396; EG-NEXT:     AND_INT T24.X, T20.Z, literal.x,
2397; EG-NEXT:     AND_INT T25.Z, T19.Y, literal.x,
2398; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2399; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
2400; EG-NEXT:     LSHR T26.X, PV.W, literal.x,
2401; EG-NEXT:     LSHR T25.Y, T19.X, literal.y,
2402; EG-NEXT:     LSHR T27.W, T19.W, literal.y,
2403; EG-NEXT:     AND_INT * T25.X, T19.X, literal.z,
2404; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2405; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2406; EG-NEXT:     AND_INT T27.Z, T19.W, literal.x,
2407; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2408; EG-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
2409; EG-NEXT:     LSHR T19.X, PV.W, literal.x,
2410; EG-NEXT:     LSHR T27.Y, T19.Z, literal.y,
2411; EG-NEXT:     LSHR T28.W, T22.Y, literal.y,
2412; EG-NEXT:     AND_INT * T27.X, T19.Z, literal.z,
2413; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2414; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2415; EG-NEXT:     AND_INT T28.Z, T22.Y, literal.x,
2416; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2417; EG-NEXT:    65535(9.183409e-41), 48(6.726233e-44)
2418; EG-NEXT:     LSHR T29.X, PV.W, literal.x,
2419; EG-NEXT:     LSHR T28.Y, T22.X, literal.y,
2420; EG-NEXT:     LSHR T30.W, T22.W, literal.y,
2421; EG-NEXT:     AND_INT * T28.X, T22.X, literal.z,
2422; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2423; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2424; EG-NEXT:     AND_INT T30.Z, T22.W, literal.x,
2425; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2426; EG-NEXT:    65535(9.183409e-41), 64(8.968310e-44)
2427; EG-NEXT:     LSHR T22.X, PV.W, literal.x,
2428; EG-NEXT:     LSHR T30.Y, T22.Z, literal.y,
2429; EG-NEXT:     LSHR T31.W, T21.Y, literal.y,
2430; EG-NEXT:     AND_INT * T30.X, T22.Z, literal.z,
2431; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2432; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2433; EG-NEXT:     AND_INT T31.Z, T21.Y, literal.x,
2434; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2435; EG-NEXT:    65535(9.183409e-41), 80(1.121039e-43)
2436; EG-NEXT:     LSHR T32.X, PV.W, literal.x,
2437; EG-NEXT:     LSHR T31.Y, T21.X, literal.y,
2438; EG-NEXT:     LSHR T33.W, T21.W, literal.y,
2439; EG-NEXT:     AND_INT * T31.X, T21.X, literal.z,
2440; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2441; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2442; EG-NEXT:     AND_INT T33.Z, T21.W, literal.x,
2443; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2444; EG-NEXT:    65535(9.183409e-41), 96(1.345247e-43)
2445; EG-NEXT:     LSHR T21.X, PV.W, literal.x,
2446; EG-NEXT:     LSHR T33.Y, T21.Z, literal.y,
2447; EG-NEXT:     AND_INT * T33.X, T21.Z, literal.z,
2448; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2449; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2450; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
2451; EG-NEXT:    112(1.569454e-43), 0(0.000000e+00)
2452; EG-NEXT:     LSHR * T34.X, PV.W, literal.x,
2453; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
2454  %load = load <32 x i16>, <32 x i16> addrspace(4)* %in
2455  %ext = zext <32 x i16> %load to <32 x i32>
2456  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
2457  ret void
2458}
2459
2460define amdgpu_kernel void @constant_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
2461; GCN-NOHSA-SI-LABEL: constant_sextload_v32i16_to_v32i32:
2462; GCN-NOHSA-SI:       ; %bb.0:
2463; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x9
2464; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2465; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
2466; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2467; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s18, s1, 16
2468; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s19, s0, 16
2469; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s20, s1
2470; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s21, s0
2471; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s22, s3, 16
2472; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s23, s2, 16
2473; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s24, s3
2474; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s25, s2
2475; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s26, s5, 16
2476; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s27, s4, 16
2477; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s5
2478; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s4, s4
2479; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s28, s7, 16
2480; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s29, s6, 16
2481; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s7, s7
2482; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s6, s6
2483; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s30, s9, 16
2484; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s31, s8, 16
2485; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s9, s9
2486; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s8, s8
2487; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s33, s11, 16
2488; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s34, s10, 16
2489; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s11, s11
2490; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s10, s10
2491; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s35, s13, 16
2492; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s36, s12, 16
2493; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s13, s13
2494; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s12, s12
2495; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s37, s15, 16
2496; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s38, s14, 16
2497; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s15, s15
2498; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s14, s14
2499; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
2500; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
2501; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s16
2502; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s17
2503; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s14
2504; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s38
2505; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s15
2506; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s37
2507; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
2508; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2509; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s12
2510; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s36
2511; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s13
2512; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s35
2513; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
2514; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2515; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
2516; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s34
2517; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s11
2518; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s33
2519; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
2520; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2521; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
2522; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s31
2523; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s9
2524; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s30
2525; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
2526; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2527; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
2528; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s29
2529; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
2530; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s28
2531; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
2532; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2533; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
2534; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s27
2535; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
2536; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s26
2537; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2538; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2539; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s25
2540; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s23
2541; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s24
2542; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s22
2543; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
2544; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2545; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s21
2546; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
2547; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s20
2548; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s18
2549; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
2550; GCN-NOHSA-SI-NEXT:    s_endpgm
2551;
2552; GCN-HSA-LABEL: constant_sextload_v32i16_to_v32i32:
2553; GCN-HSA:       ; %bb.0:
2554; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
2555; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2556; GCN-HSA-NEXT:    s_load_dwordx16 s[4:19], s[2:3], 0x0
2557; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2558; GCN-HSA-NEXT:    s_ashr_i32 s20, s5, 16
2559; GCN-HSA-NEXT:    s_ashr_i32 s21, s4, 16
2560; GCN-HSA-NEXT:    s_ashr_i32 s22, s7, 16
2561; GCN-HSA-NEXT:    s_ashr_i32 s23, s6, 16
2562; GCN-HSA-NEXT:    s_ashr_i32 s24, s9, 16
2563; GCN-HSA-NEXT:    s_ashr_i32 s25, s8, 16
2564; GCN-HSA-NEXT:    s_ashr_i32 s26, s11, 16
2565; GCN-HSA-NEXT:    s_ashr_i32 s27, s10, 16
2566; GCN-HSA-NEXT:    s_ashr_i32 s28, s13, 16
2567; GCN-HSA-NEXT:    s_ashr_i32 s29, s12, 16
2568; GCN-HSA-NEXT:    s_ashr_i32 s30, s15, 16
2569; GCN-HSA-NEXT:    s_ashr_i32 s31, s14, 16
2570; GCN-HSA-NEXT:    s_ashr_i32 s33, s17, 16
2571; GCN-HSA-NEXT:    s_ashr_i32 s34, s16, 16
2572; GCN-HSA-NEXT:    s_ashr_i32 s35, s19, 16
2573; GCN-HSA-NEXT:    s_ashr_i32 s36, s18, 16
2574; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x70
2575; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2576; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s3
2577; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s2
2578; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x60
2579; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2580; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s3
2581; GCN-HSA-NEXT:    s_sext_i32_i16 s16, s16
2582; GCN-HSA-NEXT:    s_sext_i32_i16 s19, s19
2583; GCN-HSA-NEXT:    s_sext_i32_i16 s18, s18
2584; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s2
2585; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x50
2586; GCN-HSA-NEXT:    s_sext_i32_i16 s17, s17
2587; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s18
2588; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s36
2589; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s19
2590; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s35
2591; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s16
2592; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s34
2593; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2594; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s17
2595; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s33
2596; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
2597; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
2598; GCN-HSA-NEXT:    s_sext_i32_i16 s15, s15
2599; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2600; GCN-HSA-NEXT:    s_sext_i32_i16 s14, s14
2601; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2602; GCN-HSA-NEXT:    s_add_u32 s2, s0, 64
2603; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s14
2604; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s31
2605; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s15
2606; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s30
2607; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2608; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2609; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2610; GCN-HSA-NEXT:    s_sext_i32_i16 s13, s13
2611; GCN-HSA-NEXT:    s_sext_i32_i16 s12, s12
2612; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2613; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
2614; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s12
2615; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s29
2616; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s13
2617; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s28
2618; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2619; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2620; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2621; GCN-HSA-NEXT:    s_sext_i32_i16 s11, s11
2622; GCN-HSA-NEXT:    s_sext_i32_i16 s10, s10
2623; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2624; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
2625; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s10
2626; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s27
2627; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s11
2628; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s26
2629; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2630; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2631; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2632; GCN-HSA-NEXT:    s_sext_i32_i16 s9, s9
2633; GCN-HSA-NEXT:    s_sext_i32_i16 s8, s8
2634; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2635; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
2636; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
2637; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s25
2638; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s9
2639; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s24
2640; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2641; GCN-HSA-NEXT:    s_sext_i32_i16 s7, s7
2642; GCN-HSA-NEXT:    s_sext_i32_i16 s6, s6
2643; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2644; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2645; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
2646; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s23
2647; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s7
2648; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s22
2649; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2650; GCN-HSA-NEXT:    s_sext_i32_i16 s5, s5
2651; GCN-HSA-NEXT:    s_sext_i32_i16 s4, s4
2652; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2653; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
2654; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
2655; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s21
2656; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
2657; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s20
2658; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
2659; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2660; GCN-HSA-NEXT:    s_endpgm
2661;
2662; GCN-NOHSA-VI-LABEL: constant_sextload_v32i16_to_v32i32:
2663; GCN-NOHSA-VI:       ; %bb.0:
2664; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x24
2665; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
2666; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
2667; GCN-NOHSA-VI-NEXT:    s_mov_b32 s19, 0xf000
2668; GCN-NOHSA-VI-NEXT:    s_mov_b32 s18, -1
2669; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
2670; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s35, s15, 16
2671; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s36, s14, 16
2672; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s15, s15
2673; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s14, s14
2674; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s33, s13, 16
2675; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s34, s12, 16
2676; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s13, s13
2677; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s12, s12
2678; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s14
2679; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s36
2680; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s15
2681; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s35
2682; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s30, s11, 16
2683; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s31, s10, 16
2684; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s11, s11
2685; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s10, s10
2686; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:112
2687; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s28, s9, 16
2688; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s12
2689; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s34
2690; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s13
2691; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s33
2692; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s29, s8, 16
2693; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s9, s9
2694; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s8, s8
2695; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:96
2696; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s26, s7, 16
2697; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
2698; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s31
2699; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
2700; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s30
2701; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s27, s6, 16
2702; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s7, s7
2703; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s6, s6
2704; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:80
2705; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s24, s5, 16
2706; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
2707; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s29
2708; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
2709; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s28
2710; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s25, s4, 16
2711; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s5, s5
2712; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s4, s4
2713; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:64
2714; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s22, s3, 16
2715; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
2716; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s27
2717; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
2718; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s26
2719; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s23, s2, 16
2720; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s3, s3
2721; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s2, s2
2722; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:48
2723; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s20, s1, 16
2724; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
2725; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s25
2726; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
2727; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s24
2728; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s21, s0, 16
2729; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s1, s1
2730; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s0, s0
2731; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:32
2732; GCN-NOHSA-VI-NEXT:    s_nop 0
2733; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
2734; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s23
2735; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s3
2736; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s22
2737; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:16
2738; GCN-NOHSA-VI-NEXT:    s_nop 0
2739; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
2740; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s21
2741; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s1
2742; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s20
2743; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
2744; GCN-NOHSA-VI-NEXT:    s_endpgm
2745;
2746; EG-LABEL: constant_sextload_v32i16_to_v32i32:
2747; EG:       ; %bb.0:
2748; EG-NEXT:    ALU 8, @20, KC0[CB0:0-32], KC1[]
2749; EG-NEXT:    TEX 3 @12
2750; EG-NEXT:    ALU 73, @29, KC0[CB0:0-32], KC1[]
2751; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T24.X, 0
2752; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T22.X, 0
2753; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T28.X, 0
2754; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T27.X, 0
2755; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T26.X, 0
2756; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T21.X, 0
2757; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T20.X, 0
2758; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T19.X, 1
2759; EG-NEXT:    CF_END
2760; EG-NEXT:    Fetch clause starting at 12:
2761; EG-NEXT:     VTX_READ_128 T23.XYZW, T22.X, 16, #1
2762; EG-NEXT:     VTX_READ_128 T24.XYZW, T22.X, 32, #1
2763; EG-NEXT:     VTX_READ_128 T25.XYZW, T22.X, 0, #1
2764; EG-NEXT:     VTX_READ_128 T22.XYZW, T22.X, 48, #1
2765; EG-NEXT:    ALU clause starting at 20:
2766; EG-NEXT:     LSHR T19.X, KC0[2].Y, literal.x,
2767; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2768; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2769; EG-NEXT:     LSHR T20.X, PV.W, literal.x,
2770; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2771; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
2772; EG-NEXT:     LSHR T21.X, PV.W, literal.x,
2773; EG-NEXT:     MOV * T22.X, KC0[2].Z,
2774; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
2775; EG-NEXT:    ALU clause starting at 29:
2776; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
2777; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
2778; EG-NEXT:     LSHR T26.X, PV.W, literal.x,
2779; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2780; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
2781; EG-NEXT:     LSHR T27.X, PV.W, literal.x,
2782; EG-NEXT:     LSHR T0.W, T22.W, literal.y,
2783; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
2784; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2785; EG-NEXT:    80(1.121039e-43), 0(0.000000e+00)
2786; EG-NEXT:     LSHR T28.X, PS, literal.x,
2787; EG-NEXT:     LSHR T0.Y, T22.Y, literal.y,
2788; EG-NEXT:     BFE_INT T29.Z, T25.Y, 0.0, literal.y, BS:VEC_120/SCL_212
2789; EG-NEXT:     LSHR T1.W, T24.W, literal.y,
2790; EG-NEXT:     LSHR * T2.W, T24.Y, literal.y,
2791; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2792; EG-NEXT:     BFE_INT T29.X, T25.X, 0.0, literal.x,
2793; EG-NEXT:     LSHR T1.Y, T23.W, literal.x,
2794; EG-NEXT:     BFE_INT T30.Z, T25.W, 0.0, literal.x, BS:VEC_120/SCL_212
2795; EG-NEXT:     LSHR T3.W, T23.Y, literal.x,
2796; EG-NEXT:     LSHR * T4.W, T25.Y, literal.x,
2797; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2798; EG-NEXT:     BFE_INT T30.X, T25.Z, 0.0, literal.x,
2799; EG-NEXT:     LSHR T2.Y, T25.W, literal.x,
2800; EG-NEXT:     BFE_INT T31.Z, T23.Y, 0.0, literal.x,
2801; EG-NEXT:     BFE_INT T29.W, PS, 0.0, literal.x,
2802; EG-NEXT:     LSHR * T4.W, T25.X, literal.x,
2803; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2804; EG-NEXT:     BFE_INT T31.X, T23.X, 0.0, literal.x,
2805; EG-NEXT:     BFE_INT T29.Y, PS, 0.0, literal.x,
2806; EG-NEXT:     BFE_INT T32.Z, T23.W, 0.0, literal.x,
2807; EG-NEXT:     BFE_INT T30.W, PV.Y, 0.0, literal.x,
2808; EG-NEXT:     LSHR * T4.W, T25.Z, literal.x,
2809; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2810; EG-NEXT:     BFE_INT T32.X, T23.Z, 0.0, literal.x,
2811; EG-NEXT:     BFE_INT T30.Y, PS, 0.0, literal.x,
2812; EG-NEXT:     BFE_INT T25.Z, T24.Y, 0.0, literal.x,
2813; EG-NEXT:     BFE_INT T31.W, T3.W, 0.0, literal.x,
2814; EG-NEXT:     LSHR * T3.W, T23.X, literal.x,
2815; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2816; EG-NEXT:     BFE_INT T25.X, T24.X, 0.0, literal.x,
2817; EG-NEXT:     BFE_INT T31.Y, PS, 0.0, literal.x,
2818; EG-NEXT:     BFE_INT T33.Z, T24.W, 0.0, literal.x,
2819; EG-NEXT:     BFE_INT T32.W, T1.Y, 0.0, literal.x,
2820; EG-NEXT:     LSHR * T3.W, T23.Z, literal.x,
2821; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2822; EG-NEXT:     BFE_INT T33.X, T24.Z, 0.0, literal.x,
2823; EG-NEXT:     BFE_INT T32.Y, PS, 0.0, literal.x,
2824; EG-NEXT:     BFE_INT T23.Z, T22.Y, 0.0, literal.x,
2825; EG-NEXT:     BFE_INT T25.W, T2.W, 0.0, literal.x,
2826; EG-NEXT:     LSHR * T2.W, T24.X, literal.x,
2827; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2828; EG-NEXT:     BFE_INT T23.X, T22.X, 0.0, literal.x,
2829; EG-NEXT:     BFE_INT T25.Y, PS, 0.0, literal.x,
2830; EG-NEXT:     BFE_INT T34.Z, T22.W, 0.0, literal.x,
2831; EG-NEXT:     BFE_INT T33.W, T1.W, 0.0, literal.x, BS:VEC_120/SCL_212
2832; EG-NEXT:     LSHR * T1.W, T24.Z, literal.x,
2833; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2834; EG-NEXT:     BFE_INT T34.X, T22.Z, 0.0, literal.x,
2835; EG-NEXT:     BFE_INT T33.Y, PS, 0.0, literal.x,
2836; EG-NEXT:     LSHR T0.Z, T22.X, literal.x,
2837; EG-NEXT:     BFE_INT T23.W, T0.Y, 0.0, literal.x,
2838; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
2839; EG-NEXT:    16(2.242078e-44), 96(1.345247e-43)
2840; EG-NEXT:     LSHR T22.X, PS, literal.x,
2841; EG-NEXT:     BFE_INT T23.Y, PV.Z, 0.0, literal.y,
2842; EG-NEXT:     LSHR T0.Z, T22.Z, literal.y,
2843; EG-NEXT:     BFE_INT T34.W, T0.W, 0.0, literal.y,
2844; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
2845; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2846; EG-NEXT:    112(1.569454e-43), 0(0.000000e+00)
2847; EG-NEXT:     LSHR T24.X, PS, literal.x,
2848; EG-NEXT:     BFE_INT * T34.Y, PV.Z, 0.0, literal.y,
2849; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2850  %load = load <32 x i16>, <32 x i16> addrspace(4)* %in
2851  %ext = sext <32 x i16> %load to <32 x i32>
2852  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
2853  ret void
2854}
2855
2856define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
2857; GCN-NOHSA-SI-LABEL: constant_zextload_v64i16_to_v64i32:
2858; GCN-NOHSA-SI:       ; %bb.0:
2859; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x9
2860; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2861; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
2862; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[36:51], s[18:19], 0x10
2863; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2864; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s18, s1, 16
2865; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s19, s0, 16
2866; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s20, s3, 16
2867; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s21, s2, 16
2868; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s22, s5, 16
2869; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s23, s4, 16
2870; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s24, s7, 16
2871; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s25, s6, 16
2872; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s26, s9, 16
2873; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s27, s8, 16
2874; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s28, s11, 16
2875; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s29, s10, 16
2876; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s30, s13, 16
2877; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s31, s12, 16
2878; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s33, s15, 16
2879; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s34, s14, 16
2880; GCN-NOHSA-SI-NEXT:    s_and_b32 s35, s1, 0xffff
2881; GCN-NOHSA-SI-NEXT:    s_and_b32 s52, s0, 0xffff
2882; GCN-NOHSA-SI-NEXT:    s_and_b32 s53, s3, 0xffff
2883; GCN-NOHSA-SI-NEXT:    s_and_b32 s54, s2, 0xffff
2884; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, 0xffff
2885; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, 0xffff
2886; GCN-NOHSA-SI-NEXT:    s_and_b32 s7, s7, 0xffff
2887; GCN-NOHSA-SI-NEXT:    s_and_b32 s6, s6, 0xffff
2888; GCN-NOHSA-SI-NEXT:    s_and_b32 s9, s9, 0xffff
2889; GCN-NOHSA-SI-NEXT:    s_and_b32 s8, s8, 0xffff
2890; GCN-NOHSA-SI-NEXT:    s_and_b32 s11, s11, 0xffff
2891; GCN-NOHSA-SI-NEXT:    s_and_b32 s10, s10, 0xffff
2892; GCN-NOHSA-SI-NEXT:    s_and_b32 s13, s13, 0xffff
2893; GCN-NOHSA-SI-NEXT:    s_and_b32 s12, s12, 0xffff
2894; GCN-NOHSA-SI-NEXT:    s_and_b32 s15, s15, 0xffff
2895; GCN-NOHSA-SI-NEXT:    s_and_b32 s14, s14, 0xffff
2896; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s55, s37, 16
2897; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s56, s36, 16
2898; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s57, s39, 16
2899; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s58, s38, 16
2900; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s59, s41, 16
2901; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s60, s40, 16
2902; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s61, s43, 16
2903; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s62, s42, 16
2904; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s63, s45, 16
2905; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s64, s44, 16
2906; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s65, s47, 16
2907; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s66, s46, 16
2908; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s67, s49, 16
2909; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s68, s48, 16
2910; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s69, s51, 16
2911; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s70, s50, 16
2912; GCN-NOHSA-SI-NEXT:    s_and_b32 s37, s37, 0xffff
2913; GCN-NOHSA-SI-NEXT:    s_and_b32 s36, s36, 0xffff
2914; GCN-NOHSA-SI-NEXT:    s_and_b32 s39, s39, 0xffff
2915; GCN-NOHSA-SI-NEXT:    s_and_b32 s38, s38, 0xffff
2916; GCN-NOHSA-SI-NEXT:    s_and_b32 s40, s40, 0xffff
2917; GCN-NOHSA-SI-NEXT:    s_and_b32 s43, s43, 0xffff
2918; GCN-NOHSA-SI-NEXT:    s_and_b32 s42, s42, 0xffff
2919; GCN-NOHSA-SI-NEXT:    s_and_b32 s45, s45, 0xffff
2920; GCN-NOHSA-SI-NEXT:    s_and_b32 s44, s44, 0xffff
2921; GCN-NOHSA-SI-NEXT:    s_and_b32 s47, s47, 0xffff
2922; GCN-NOHSA-SI-NEXT:    s_and_b32 s46, s46, 0xffff
2923; GCN-NOHSA-SI-NEXT:    s_and_b32 s49, s49, 0xffff
2924; GCN-NOHSA-SI-NEXT:    s_and_b32 s48, s48, 0xffff
2925; GCN-NOHSA-SI-NEXT:    s_and_b32 s51, s51, 0xffff
2926; GCN-NOHSA-SI-NEXT:    s_and_b32 s50, s50, 0xffff
2927; GCN-NOHSA-SI-NEXT:    s_and_b32 s41, s41, 0xffff
2928; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s16
2929; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s17
2930; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
2931; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
2932; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s50
2933; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s70
2934; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s51
2935; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s69
2936; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s48
2937; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s68
2938; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s49
2939; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s67
2940; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s46
2941; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s66
2942; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s47
2943; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s65
2944; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s44
2945; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s64
2946; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s45
2947; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s63
2948; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s42
2949; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s62
2950; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, s43
2951; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v20, s40
2952; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, s61
2953; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v21, s60
2954; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v22, s41
2955; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v23, s59
2956; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
2957; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224
2958; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
2959; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192
2960; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176
2961; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:160
2962; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(5)
2963; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s38
2964; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s58
2965; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s39
2966; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s57
2967; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
2968; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2969; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s36
2970; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s56
2971; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s37
2972; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s55
2973; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
2974; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2975; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s14
2976; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s34
2977; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s15
2978; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s33
2979; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
2980; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2981; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s12
2982; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s31
2983; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s13
2984; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s30
2985; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
2986; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2987; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
2988; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s29
2989; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s11
2990; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s28
2991; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
2992; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2993; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
2994; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s27
2995; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s9
2996; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s26
2997; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
2998; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2999; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
3000; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s25
3001; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
3002; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s24
3003; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
3004; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3005; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
3006; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s23
3007; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
3008; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s22
3009; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
3010; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3011; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s54
3012; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s21
3013; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s53
3014; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s20
3015; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
3016; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3017; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s52
3018; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
3019; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s35
3020; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s18
3021; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
3022; GCN-NOHSA-SI-NEXT:    s_endpgm
3023;
3024; GCN-HSA-LABEL: constant_zextload_v64i16_to_v64i32:
3025; GCN-HSA:       ; %bb.0:
3026; GCN-HSA-NEXT:    s_load_dwordx4 s[16:19], s[4:5], 0x0
3027; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
3028; GCN-HSA-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
3029; GCN-HSA-NEXT:    s_load_dwordx16 s[36:51], s[18:19], 0x10
3030; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
3031; GCN-HSA-NEXT:    s_lshr_b32 s20, s1, 16
3032; GCN-HSA-NEXT:    s_lshr_b32 s21, s0, 16
3033; GCN-HSA-NEXT:    s_lshr_b32 s22, s3, 16
3034; GCN-HSA-NEXT:    s_lshr_b32 s23, s2, 16
3035; GCN-HSA-NEXT:    s_lshr_b32 s24, s5, 16
3036; GCN-HSA-NEXT:    s_lshr_b32 s25, s4, 16
3037; GCN-HSA-NEXT:    s_lshr_b32 s26, s7, 16
3038; GCN-HSA-NEXT:    s_lshr_b32 s27, s6, 16
3039; GCN-HSA-NEXT:    s_lshr_b32 s28, s9, 16
3040; GCN-HSA-NEXT:    s_lshr_b32 s29, s8, 16
3041; GCN-HSA-NEXT:    s_lshr_b32 s30, s11, 16
3042; GCN-HSA-NEXT:    s_lshr_b32 s31, s10, 16
3043; GCN-HSA-NEXT:    s_lshr_b32 s33, s13, 16
3044; GCN-HSA-NEXT:    s_lshr_b32 s34, s12, 16
3045; GCN-HSA-NEXT:    s_lshr_b32 s35, s15, 16
3046; GCN-HSA-NEXT:    s_lshr_b32 s52, s14, 16
3047; GCN-HSA-NEXT:    s_and_b32 s1, s1, 0xffff
3048; GCN-HSA-NEXT:    s_and_b32 s0, s0, 0xffff
3049; GCN-HSA-NEXT:    s_and_b32 s3, s3, 0xffff
3050; GCN-HSA-NEXT:    s_and_b32 s2, s2, 0xffff
3051; GCN-HSA-NEXT:    s_and_b32 s5, s5, 0xffff
3052; GCN-HSA-NEXT:    s_and_b32 s4, s4, 0xffff
3053; GCN-HSA-NEXT:    s_and_b32 s53, s7, 0xffff
3054; GCN-HSA-NEXT:    s_and_b32 s54, s6, 0xffff
3055; GCN-HSA-NEXT:    s_and_b32 s9, s9, 0xffff
3056; GCN-HSA-NEXT:    s_and_b32 s8, s8, 0xffff
3057; GCN-HSA-NEXT:    s_and_b32 s11, s11, 0xffff
3058; GCN-HSA-NEXT:    s_and_b32 s10, s10, 0xffff
3059; GCN-HSA-NEXT:    s_and_b32 s13, s13, 0xffff
3060; GCN-HSA-NEXT:    s_and_b32 s12, s12, 0xffff
3061; GCN-HSA-NEXT:    s_and_b32 s15, s15, 0xffff
3062; GCN-HSA-NEXT:    s_and_b32 s14, s14, 0xffff
3063; GCN-HSA-NEXT:    s_lshr_b32 s18, s37, 16
3064; GCN-HSA-NEXT:    s_lshr_b32 s19, s36, 16
3065; GCN-HSA-NEXT:    s_lshr_b32 s55, s39, 16
3066; GCN-HSA-NEXT:    s_lshr_b32 s56, s38, 16
3067; GCN-HSA-NEXT:    s_lshr_b32 s57, s41, 16
3068; GCN-HSA-NEXT:    s_lshr_b32 s58, s40, 16
3069; GCN-HSA-NEXT:    s_lshr_b32 s59, s43, 16
3070; GCN-HSA-NEXT:    s_lshr_b32 s60, s42, 16
3071; GCN-HSA-NEXT:    s_lshr_b32 s61, s45, 16
3072; GCN-HSA-NEXT:    s_lshr_b32 s62, s44, 16
3073; GCN-HSA-NEXT:    s_lshr_b32 s63, s47, 16
3074; GCN-HSA-NEXT:    s_lshr_b32 s64, s46, 16
3075; GCN-HSA-NEXT:    s_lshr_b32 s65, s49, 16
3076; GCN-HSA-NEXT:    s_lshr_b32 s66, s48, 16
3077; GCN-HSA-NEXT:    s_lshr_b32 s67, s51, 16
3078; GCN-HSA-NEXT:    s_lshr_b32 s68, s50, 16
3079; GCN-HSA-NEXT:    s_and_b32 s37, s37, 0xffff
3080; GCN-HSA-NEXT:    s_and_b32 s36, s36, 0xffff
3081; GCN-HSA-NEXT:    s_and_b32 s39, s39, 0xffff
3082; GCN-HSA-NEXT:    s_and_b32 s38, s38, 0xffff
3083; GCN-HSA-NEXT:    s_and_b32 s41, s41, 0xffff
3084; GCN-HSA-NEXT:    s_and_b32 s40, s40, 0xffff
3085; GCN-HSA-NEXT:    s_and_b32 s43, s43, 0xffff
3086; GCN-HSA-NEXT:    s_and_b32 s42, s42, 0xffff
3087; GCN-HSA-NEXT:    s_and_b32 s45, s45, 0xffff
3088; GCN-HSA-NEXT:    s_and_b32 s44, s44, 0xffff
3089; GCN-HSA-NEXT:    s_and_b32 s47, s47, 0xffff
3090; GCN-HSA-NEXT:    s_and_b32 s46, s46, 0xffff
3091; GCN-HSA-NEXT:    s_and_b32 s49, s49, 0xffff
3092; GCN-HSA-NEXT:    s_and_b32 s48, s48, 0xffff
3093; GCN-HSA-NEXT:    s_and_b32 s51, s51, 0xffff
3094; GCN-HSA-NEXT:    s_and_b32 s50, s50, 0xffff
3095; GCN-HSA-NEXT:    s_add_u32 s6, s16, 0xf0
3096; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
3097; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s7
3098; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s6
3099; GCN-HSA-NEXT:    s_add_u32 s6, s16, 0xe0
3100; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
3101; GCN-HSA-NEXT:    v_mov_b32_e32 v25, s7
3102; GCN-HSA-NEXT:    v_mov_b32_e32 v24, s6
3103; GCN-HSA-NEXT:    s_add_u32 s6, s16, 0xd0
3104; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
3105; GCN-HSA-NEXT:    v_mov_b32_e32 v27, s7
3106; GCN-HSA-NEXT:    v_mov_b32_e32 v26, s6
3107; GCN-HSA-NEXT:    s_add_u32 s6, s16, 0xc0
3108; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
3109; GCN-HSA-NEXT:    v_mov_b32_e32 v29, s7
3110; GCN-HSA-NEXT:    v_mov_b32_e32 v28, s6
3111; GCN-HSA-NEXT:    s_add_u32 s6, s16, 0xb0
3112; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
3113; GCN-HSA-NEXT:    v_mov_b32_e32 v31, s7
3114; GCN-HSA-NEXT:    v_mov_b32_e32 v30, s6
3115; GCN-HSA-NEXT:    s_add_u32 s6, s16, 0xa0
3116; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
3117; GCN-HSA-NEXT:    v_mov_b32_e32 v33, s7
3118; GCN-HSA-NEXT:    v_mov_b32_e32 v32, s6
3119; GCN-HSA-NEXT:    s_add_u32 s6, s16, 0x90
3120; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s48
3121; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s66
3122; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s49
3123; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s65
3124; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
3125; GCN-HSA-NEXT:    flat_store_dwordx4 v[24:25], v[4:7]
3126; GCN-HSA-NEXT:    v_mov_b32_e32 v25, s7
3127; GCN-HSA-NEXT:    v_mov_b32_e32 v24, s6
3128; GCN-HSA-NEXT:    s_add_u32 s6, s16, 0x80
3129; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
3130; GCN-HSA-NEXT:    v_mov_b32_e32 v35, s7
3131; GCN-HSA-NEXT:    v_mov_b32_e32 v34, s6
3132; GCN-HSA-NEXT:    s_add_u32 s6, s16, 0x70
3133; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s42
3134; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s60
3135; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s43
3136; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s59
3137; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
3138; GCN-HSA-NEXT:    flat_store_dwordx4 v[30:31], v[16:19]
3139; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s50
3140; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s7
3141; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s6
3142; GCN-HSA-NEXT:    s_add_u32 s6, s16, 0x60
3143; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
3144; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s68
3145; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s51
3146; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s67
3147; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s7
3148; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s46
3149; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s64
3150; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s47
3151; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s63
3152; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s44
3153; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s62
3154; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s45
3155; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s61
3156; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s40
3157; GCN-HSA-NEXT:    flat_store_dwordx4 v[21:22], v[0:3]
3158; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s58
3159; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s38
3160; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s41
3161; GCN-HSA-NEXT:    v_mov_b32_e32 v23, s57
3162; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s56
3163; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s39
3164; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s36
3165; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s55
3166; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s19
3167; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s6
3168; GCN-HSA-NEXT:    s_add_u32 s6, s16, 0x50
3169; GCN-HSA-NEXT:    flat_store_dwordx4 v[26:27], v[8:11]
3170; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s37
3171; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s14
3172; GCN-HSA-NEXT:    flat_store_dwordx4 v[28:29], v[12:15]
3173; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s18
3174; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s12
3175; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s52
3176; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s15
3177; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s35
3178; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s34
3179; GCN-HSA-NEXT:    flat_store_dwordx4 v[32:33], v[20:23]
3180; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s13
3181; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s33
3182; GCN-HSA-NEXT:    flat_store_dwordx4 v[24:25], v[0:3]
3183; GCN-HSA-NEXT:    flat_store_dwordx4 v[34:35], v[4:7]
3184; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
3185; GCN-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[12:15]
3186; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
3187; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s6
3188; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s10
3189; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s31
3190; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s11
3191; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s30
3192; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s7
3193; GCN-HSA-NEXT:    s_add_u32 s6, s16, 64
3194; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3195; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
3196; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s6
3197; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
3198; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s29
3199; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s9
3200; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s28
3201; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s7
3202; GCN-HSA-NEXT:    s_add_u32 s6, s16, 48
3203; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3204; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
3205; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s6
3206; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s54
3207; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s27
3208; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s53
3209; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s26
3210; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s7
3211; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3212; GCN-HSA-NEXT:    s_nop 0
3213; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
3214; GCN-HSA-NEXT:    s_add_u32 s4, s16, 32
3215; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
3216; GCN-HSA-NEXT:    s_addc_u32 s5, s17, 0
3217; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
3218; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s25
3219; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s24
3220; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
3221; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3222; GCN-HSA-NEXT:    s_nop 0
3223; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
3224; GCN-HSA-NEXT:    s_add_u32 s2, s16, 16
3225; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s3
3226; GCN-HSA-NEXT:    s_addc_u32 s3, s17, 0
3227; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
3228; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s23
3229; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s22
3230; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
3231; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3232; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s16
3233; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
3234; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s21
3235; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s1
3236; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s20
3237; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s17
3238; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3239; GCN-HSA-NEXT:    s_endpgm
3240;
3241; GCN-NOHSA-VI-LABEL: constant_zextload_v64i16_to_v64i32:
3242; GCN-NOHSA-VI:       ; %bb.0:
3243; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[36:39], s[0:1], 0x24
3244; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
3245; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[16:31], s[38:39], 0x40
3246; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[0:15], s[38:39], 0x0
3247; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
3248; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s69, s31, 16
3249; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s70, s30, 16
3250; GCN-NOHSA-VI-NEXT:    s_and_b32 s31, s31, 0xffff
3251; GCN-NOHSA-VI-NEXT:    s_and_b32 s30, s30, 0xffff
3252; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s33, s1, 16
3253; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s34, s0, 16
3254; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s35, s3, 16
3255; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s40, s2, 16
3256; GCN-NOHSA-VI-NEXT:    s_and_b32 s60, s1, 0xffff
3257; GCN-NOHSA-VI-NEXT:    s_and_b32 s61, s0, 0xffff
3258; GCN-NOHSA-VI-NEXT:    s_and_b32 s62, s3, 0xffff
3259; GCN-NOHSA-VI-NEXT:    s_and_b32 s63, s2, 0xffff
3260; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s67, s29, 16
3261; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s68, s28, 16
3262; GCN-NOHSA-VI-NEXT:    s_and_b32 s29, s29, 0xffff
3263; GCN-NOHSA-VI-NEXT:    s_and_b32 s28, s28, 0xffff
3264; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
3265; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
3266; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s36
3267; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s37
3268; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s30
3269; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s70
3270; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s31
3271; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s69
3272; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s65, s27, 16
3273; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s66, s26, 16
3274; GCN-NOHSA-VI-NEXT:    s_and_b32 s27, s27, 0xffff
3275; GCN-NOHSA-VI-NEXT:    s_and_b32 s26, s26, 0xffff
3276; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
3277; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s59, s25, 16
3278; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s28
3279; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s68
3280; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s29
3281; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s67
3282; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s64, s24, 16
3283; GCN-NOHSA-VI-NEXT:    s_and_b32 s25, s25, 0xffff
3284; GCN-NOHSA-VI-NEXT:    s_and_b32 s24, s24, 0xffff
3285; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
3286; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s57, s23, 16
3287; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s26
3288; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s66
3289; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s27
3290; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s65
3291; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s58, s22, 16
3292; GCN-NOHSA-VI-NEXT:    s_and_b32 s23, s23, 0xffff
3293; GCN-NOHSA-VI-NEXT:    s_and_b32 s22, s22, 0xffff
3294; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
3295; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s55, s21, 16
3296; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s24
3297; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s64
3298; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s25
3299; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s59
3300; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s56, s20, 16
3301; GCN-NOHSA-VI-NEXT:    s_and_b32 s21, s21, 0xffff
3302; GCN-NOHSA-VI-NEXT:    s_and_b32 s20, s20, 0xffff
3303; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
3304; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s53, s19, 16
3305; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s22
3306; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s58
3307; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s23
3308; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s57
3309; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s54, s18, 16
3310; GCN-NOHSA-VI-NEXT:    s_and_b32 s19, s19, 0xffff
3311; GCN-NOHSA-VI-NEXT:    s_and_b32 s18, s18, 0xffff
3312; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
3313; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s51, s17, 16
3314; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s20
3315; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s56
3316; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s21
3317; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s55
3318; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s52, s16, 16
3319; GCN-NOHSA-VI-NEXT:    s_and_b32 s17, s17, 0xffff
3320; GCN-NOHSA-VI-NEXT:    s_and_b32 s16, s16, 0xffff
3321; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
3322; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s49, s15, 16
3323; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s18
3324; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s54
3325; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s19
3326; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s53
3327; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s50, s14, 16
3328; GCN-NOHSA-VI-NEXT:    s_and_b32 s15, s15, 0xffff
3329; GCN-NOHSA-VI-NEXT:    s_and_b32 s14, s14, 0xffff
3330; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
3331; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s38, s13, 16
3332; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s16
3333; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s52
3334; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s17
3335; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s51
3336; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s39, s12, 16
3337; GCN-NOHSA-VI-NEXT:    s_and_b32 s13, s13, 0xffff
3338; GCN-NOHSA-VI-NEXT:    s_and_b32 s12, s12, 0xffff
3339; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
3340; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s47, s11, 16
3341; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s14
3342; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s50
3343; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s15
3344; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s49
3345; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s48, s10, 16
3346; GCN-NOHSA-VI-NEXT:    s_and_b32 s11, s11, 0xffff
3347; GCN-NOHSA-VI-NEXT:    s_and_b32 s10, s10, 0xffff
3348; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
3349; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s45, s9, 16
3350; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s12
3351; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s39
3352; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s13
3353; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s38
3354; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s46, s8, 16
3355; GCN-NOHSA-VI-NEXT:    s_and_b32 s9, s9, 0xffff
3356; GCN-NOHSA-VI-NEXT:    s_and_b32 s8, s8, 0xffff
3357; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
3358; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s43, s7, 16
3359; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
3360; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s48
3361; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
3362; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s47
3363; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s44, s6, 16
3364; GCN-NOHSA-VI-NEXT:    s_and_b32 s7, s7, 0xffff
3365; GCN-NOHSA-VI-NEXT:    s_and_b32 s6, s6, 0xffff
3366; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
3367; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s41, s5, 16
3368; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
3369; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s46
3370; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
3371; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s45
3372; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s42, s4, 16
3373; GCN-NOHSA-VI-NEXT:    s_and_b32 s5, s5, 0xffff
3374; GCN-NOHSA-VI-NEXT:    s_and_b32 s4, s4, 0xffff
3375; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
3376; GCN-NOHSA-VI-NEXT:    s_nop 0
3377; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
3378; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s44
3379; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
3380; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s43
3381; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
3382; GCN-NOHSA-VI-NEXT:    s_nop 0
3383; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
3384; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s42
3385; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
3386; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s41
3387; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
3388; GCN-NOHSA-VI-NEXT:    s_nop 0
3389; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s63
3390; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s40
3391; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s62
3392; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s35
3393; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
3394; GCN-NOHSA-VI-NEXT:    s_nop 0
3395; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s61
3396; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s34
3397; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s60
3398; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s33
3399; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
3400; GCN-NOHSA-VI-NEXT:    s_endpgm
3401;
3402; EG-LABEL: constant_zextload_v64i16_to_v64i32:
3403; EG:       ; %bb.0:
3404; EG-NEXT:    ALU 0, @38, KC0[CB0:0-32], KC1[]
3405; EG-NEXT:    TEX 3 @22
3406; EG-NEXT:    ALU 55, @39, KC0[CB0:0-32], KC1[]
3407; EG-NEXT:    TEX 3 @30
3408; EG-NEXT:    ALU 87, @95, KC0[CB0:0-32], KC1[]
3409; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T66.X, 0
3410; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T49.X, 0
3411; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T64.X, 0
3412; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T50.X, 0
3413; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T61.X, 0
3414; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T51.X, 0
3415; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T56.XYZW, T58.X, 0
3416; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T54.XYZW, T52.X, 0
3417; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T53.XYZW, T55.X, 0
3418; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T39.X, 0
3419; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T48.X, 0
3420; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T40.X, 0
3421; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T46.X, 0
3422; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T41.X, 0
3423; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T43.X, 0
3424; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T36.X, 1
3425; EG-NEXT:    CF_END
3426; EG-NEXT:    Fetch clause starting at 22:
3427; EG-NEXT:     VTX_READ_128 T36.XYZW, T35.X, 0, #1
3428; EG-NEXT:     VTX_READ_128 T39.XYZW, T35.X, 48, #1
3429; EG-NEXT:     VTX_READ_128 T40.XYZW, T35.X, 32, #1
3430; EG-NEXT:     VTX_READ_128 T41.XYZW, T35.X, 16, #1
3431; EG-NEXT:    Fetch clause starting at 30:
3432; EG-NEXT:     VTX_READ_128 T49.XYZW, T35.X, 112, #1
3433; EG-NEXT:     VTX_READ_128 T50.XYZW, T35.X, 96, #1
3434; EG-NEXT:     VTX_READ_128 T51.XYZW, T35.X, 80, #1
3435; EG-NEXT:     VTX_READ_128 T52.XYZW, T35.X, 64, #1
3436; EG-NEXT:    ALU clause starting at 38:
3437; EG-NEXT:     MOV * T35.X, KC0[2].Z,
3438; EG-NEXT:    ALU clause starting at 39:
3439; EG-NEXT:     LSHR * T37.W, T36.Y, literal.x,
3440; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3441; EG-NEXT:     AND_INT * T37.Z, T36.Y, literal.x,
3442; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3443; EG-NEXT:     LSHR T37.Y, T36.X, literal.x,
3444; EG-NEXT:     LSHR * T38.W, T36.W, literal.x,
3445; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3446; EG-NEXT:     AND_INT T37.X, T36.X, literal.x,
3447; EG-NEXT:     AND_INT T38.Z, T36.W, literal.x,
3448; EG-NEXT:     LSHR * T36.X, KC0[2].Y, literal.y,
3449; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
3450; EG-NEXT:     LSHR T38.Y, T36.Z, literal.x,
3451; EG-NEXT:     LSHR * T42.W, T41.Y, literal.x,
3452; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3453; EG-NEXT:     AND_INT T38.X, T36.Z, literal.x,
3454; EG-NEXT:     AND_INT T42.Z, T41.Y, literal.x,
3455; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3456; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
3457; EG-NEXT:     LSHR T43.X, PV.W, literal.x,
3458; EG-NEXT:     LSHR T42.Y, T41.X, literal.y,
3459; EG-NEXT:     LSHR T44.W, T41.W, literal.y,
3460; EG-NEXT:     AND_INT * T42.X, T41.X, literal.z,
3461; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3462; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3463; EG-NEXT:     AND_INT T44.Z, T41.W, literal.x,
3464; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3465; EG-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
3466; EG-NEXT:     LSHR T41.X, PV.W, literal.x,
3467; EG-NEXT:     LSHR T44.Y, T41.Z, literal.y,
3468; EG-NEXT:     LSHR T45.W, T40.Y, literal.y,
3469; EG-NEXT:     AND_INT * T44.X, T41.Z, literal.z,
3470; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3471; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3472; EG-NEXT:     AND_INT T45.Z, T40.Y, literal.x,
3473; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3474; EG-NEXT:    65535(9.183409e-41), 48(6.726233e-44)
3475; EG-NEXT:     LSHR T46.X, PV.W, literal.x,
3476; EG-NEXT:     LSHR T45.Y, T40.X, literal.y,
3477; EG-NEXT:     LSHR T47.W, T40.W, literal.y,
3478; EG-NEXT:     AND_INT * T45.X, T40.X, literal.z,
3479; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3480; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3481; EG-NEXT:     AND_INT T47.Z, T40.W, literal.x,
3482; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3483; EG-NEXT:    65535(9.183409e-41), 64(8.968310e-44)
3484; EG-NEXT:     LSHR T40.X, PV.W, literal.x,
3485; EG-NEXT:     LSHR T47.Y, T40.Z, literal.y,
3486; EG-NEXT:     AND_INT * T47.X, T40.Z, literal.z,
3487; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3488; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3489; EG-NEXT:     ADD_INT T0.W, KC0[2].Y, literal.x,
3490; EG-NEXT:     LSHR * T35.W, T39.Y, literal.y,
3491; EG-NEXT:    80(1.121039e-43), 16(2.242078e-44)
3492; EG-NEXT:     LSHR T48.X, PV.W, literal.x,
3493; EG-NEXT:     AND_INT * T35.Z, T39.Y, literal.y,
3494; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
3495; EG-NEXT:    ALU clause starting at 95:
3496; EG-NEXT:     LSHR T35.Y, T39.X, literal.x,
3497; EG-NEXT:     LSHR * T53.W, T39.W, literal.x,
3498; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3499; EG-NEXT:     AND_INT T35.X, T39.X, literal.x,
3500; EG-NEXT:     AND_INT T53.Z, T39.W, literal.x,
3501; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3502; EG-NEXT:    65535(9.183409e-41), 96(1.345247e-43)
3503; EG-NEXT:     LSHR T39.X, PV.W, literal.x,
3504; EG-NEXT:     LSHR T53.Y, T39.Z, literal.y,
3505; EG-NEXT:     LSHR T54.W, T52.Y, literal.y,
3506; EG-NEXT:     AND_INT * T53.X, T39.Z, literal.z,
3507; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3508; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3509; EG-NEXT:     AND_INT T54.Z, T52.Y, literal.x,
3510; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3511; EG-NEXT:    65535(9.183409e-41), 112(1.569454e-43)
3512; EG-NEXT:     LSHR T55.X, PV.W, literal.x,
3513; EG-NEXT:     LSHR T54.Y, T52.X, literal.y,
3514; EG-NEXT:     LSHR T56.W, T52.W, literal.y,
3515; EG-NEXT:     AND_INT * T54.X, T52.X, literal.z,
3516; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3517; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3518; EG-NEXT:     AND_INT T56.Z, T52.W, literal.x,
3519; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3520; EG-NEXT:    65535(9.183409e-41), 128(1.793662e-43)
3521; EG-NEXT:     LSHR T52.X, PV.W, literal.x,
3522; EG-NEXT:     LSHR T56.Y, T52.Z, literal.y,
3523; EG-NEXT:     LSHR T57.W, T51.Y, literal.y,
3524; EG-NEXT:     AND_INT * T56.X, T52.Z, literal.z,
3525; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3526; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3527; EG-NEXT:     AND_INT T57.Z, T51.Y, literal.x,
3528; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3529; EG-NEXT:    65535(9.183409e-41), 144(2.017870e-43)
3530; EG-NEXT:     LSHR T58.X, PV.W, literal.x,
3531; EG-NEXT:     LSHR T57.Y, T51.X, literal.y,
3532; EG-NEXT:     LSHR T59.W, T51.W, literal.y,
3533; EG-NEXT:     AND_INT * T57.X, T51.X, literal.z,
3534; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3535; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3536; EG-NEXT:     AND_INT T59.Z, T51.W, literal.x,
3537; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3538; EG-NEXT:    65535(9.183409e-41), 160(2.242078e-43)
3539; EG-NEXT:     LSHR T51.X, PV.W, literal.x,
3540; EG-NEXT:     LSHR T59.Y, T51.Z, literal.y,
3541; EG-NEXT:     LSHR T60.W, T50.Y, literal.y,
3542; EG-NEXT:     AND_INT * T59.X, T51.Z, literal.z,
3543; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3544; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3545; EG-NEXT:     AND_INT T60.Z, T50.Y, literal.x,
3546; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3547; EG-NEXT:    65535(9.183409e-41), 176(2.466285e-43)
3548; EG-NEXT:     LSHR T61.X, PV.W, literal.x,
3549; EG-NEXT:     LSHR T60.Y, T50.X, literal.y,
3550; EG-NEXT:     LSHR T62.W, T50.W, literal.y,
3551; EG-NEXT:     AND_INT * T60.X, T50.X, literal.z,
3552; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3553; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3554; EG-NEXT:     AND_INT T62.Z, T50.W, literal.x,
3555; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3556; EG-NEXT:    65535(9.183409e-41), 192(2.690493e-43)
3557; EG-NEXT:     LSHR T50.X, PV.W, literal.x,
3558; EG-NEXT:     LSHR T62.Y, T50.Z, literal.y,
3559; EG-NEXT:     LSHR T63.W, T49.Y, literal.y,
3560; EG-NEXT:     AND_INT * T62.X, T50.Z, literal.z,
3561; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3562; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3563; EG-NEXT:     AND_INT T63.Z, T49.Y, literal.x,
3564; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3565; EG-NEXT:    65535(9.183409e-41), 208(2.914701e-43)
3566; EG-NEXT:     LSHR T64.X, PV.W, literal.x,
3567; EG-NEXT:     LSHR T63.Y, T49.X, literal.y,
3568; EG-NEXT:     LSHR T65.W, T49.W, literal.y,
3569; EG-NEXT:     AND_INT * T63.X, T49.X, literal.z,
3570; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3571; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3572; EG-NEXT:     AND_INT T65.Z, T49.W, literal.x,
3573; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3574; EG-NEXT:    65535(9.183409e-41), 224(3.138909e-43)
3575; EG-NEXT:     LSHR T49.X, PV.W, literal.x,
3576; EG-NEXT:     LSHR T65.Y, T49.Z, literal.y,
3577; EG-NEXT:     AND_INT * T65.X, T49.Z, literal.z,
3578; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3579; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3580; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
3581; EG-NEXT:    240(3.363116e-43), 0(0.000000e+00)
3582; EG-NEXT:     LSHR * T66.X, PV.W, literal.x,
3583; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
3584  %load = load <64 x i16>, <64 x i16> addrspace(4)* %in
3585  %ext = zext <64 x i16> %load to <64 x i32>
3586  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
3587  ret void
3588}
3589
3590define amdgpu_kernel void @constant_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
3591; GCN-NOHSA-SI-LABEL: constant_sextload_v64i16_to_v64i32:
3592; GCN-NOHSA-SI:       ; %bb.0:
3593; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[36:39], s[0:1], 0x9
3594; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
3595; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[0:15], s[38:39], 0x0
3596; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[16:31], s[38:39], 0x10
3597; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
3598; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s33, s1, 16
3599; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s34, s0, 16
3600; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s35, s1
3601; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s38, s0
3602; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s39, s3, 16
3603; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s40, s2, 16
3604; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s41, s3
3605; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s42, s2
3606; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s43, s5, 16
3607; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s44, s4, 16
3608; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s5
3609; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s4, s4
3610; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s45, s7, 16
3611; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s46, s6, 16
3612; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s7, s7
3613; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s6, s6
3614; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s47, s9, 16
3615; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s48, s8, 16
3616; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s9, s9
3617; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s8, s8
3618; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s49, s11, 16
3619; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s50, s10, 16
3620; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s11, s11
3621; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s10, s10
3622; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s51, s13, 16
3623; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s52, s12, 16
3624; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s13, s13
3625; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s12, s12
3626; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s53, s15, 16
3627; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s54, s14, 16
3628; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s15, s15
3629; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s14, s14
3630; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s55, s17, 16
3631; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s56, s16, 16
3632; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s17, s17
3633; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s16, s16
3634; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s57, s19, 16
3635; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s58, s18, 16
3636; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s19, s19
3637; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s18, s18
3638; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s59, s21, 16
3639; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s60, s20, 16
3640; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s21, s21
3641; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s20, s20
3642; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s61, s22, 16
3643; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s62, s23
3644; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s22, s22
3645; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s63, s25, 16
3646; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s64, s24, 16
3647; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s25, s25
3648; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s24, s24
3649; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s65, s27, 16
3650; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s66, s26, 16
3651; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s27, s27
3652; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s26, s26
3653; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s67, s29, 16
3654; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s68, s28, 16
3655; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s29, s29
3656; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s28, s28
3657; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s69, s31, 16
3658; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s70, s30, 16
3659; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s31, s31
3660; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s30, s30
3661; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s23, s23, 16
3662; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s36
3663; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s37
3664; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
3665; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
3666; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s30
3667; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s70
3668; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s31
3669; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s69
3670; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s28
3671; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s68
3672; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s29
3673; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s67
3674; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s26
3675; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s66
3676; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s27
3677; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s65
3678; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s24
3679; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s64
3680; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s25
3681; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s63
3682; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s22
3683; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s61
3684; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, s62
3685; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v20, s20
3686; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, s23
3687; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v21, s60
3688; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v22, s21
3689; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v23, s59
3690; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
3691; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224
3692; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
3693; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192
3694; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176
3695; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:160
3696; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(5)
3697; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s18
3698; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s58
3699; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s19
3700; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s57
3701; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
3702; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3703; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s16
3704; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s56
3705; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s17
3706; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s55
3707; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
3708; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3709; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s14
3710; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s54
3711; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s15
3712; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s53
3713; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
3714; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3715; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s12
3716; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s52
3717; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s13
3718; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s51
3719; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
3720; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3721; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
3722; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s50
3723; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s11
3724; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s49
3725; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
3726; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3727; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
3728; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s48
3729; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s9
3730; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s47
3731; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
3732; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3733; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
3734; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s46
3735; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
3736; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s45
3737; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
3738; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3739; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
3740; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s44
3741; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
3742; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s43
3743; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
3744; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3745; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s42
3746; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s40
3747; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s41
3748; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s39
3749; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
3750; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3751; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s38
3752; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s34
3753; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s35
3754; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s33
3755; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
3756; GCN-NOHSA-SI-NEXT:    s_endpgm
3757;
3758; GCN-HSA-LABEL: constant_sextload_v64i16_to_v64i32:
3759; GCN-HSA:       ; %bb.0:
3760; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
3761; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
3762; GCN-HSA-NEXT:    s_load_dwordx16 s[16:31], s[2:3], 0x0
3763; GCN-HSA-NEXT:    s_load_dwordx16 s[36:51], s[2:3], 0x10
3764; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
3765; GCN-HSA-NEXT:    s_ashr_i32 s4, s17, 16
3766; GCN-HSA-NEXT:    s_ashr_i32 s5, s16, 16
3767; GCN-HSA-NEXT:    s_sext_i32_i16 s6, s17
3768; GCN-HSA-NEXT:    s_sext_i32_i16 s7, s16
3769; GCN-HSA-NEXT:    s_ashr_i32 s8, s19, 16
3770; GCN-HSA-NEXT:    s_ashr_i32 s9, s18, 16
3771; GCN-HSA-NEXT:    s_sext_i32_i16 s10, s19
3772; GCN-HSA-NEXT:    s_sext_i32_i16 s11, s18
3773; GCN-HSA-NEXT:    s_ashr_i32 s12, s21, 16
3774; GCN-HSA-NEXT:    s_ashr_i32 s13, s20, 16
3775; GCN-HSA-NEXT:    s_sext_i32_i16 s14, s21
3776; GCN-HSA-NEXT:    s_sext_i32_i16 s15, s20
3777; GCN-HSA-NEXT:    s_ashr_i32 s16, s23, 16
3778; GCN-HSA-NEXT:    s_ashr_i32 s17, s22, 16
3779; GCN-HSA-NEXT:    s_sext_i32_i16 s18, s23
3780; GCN-HSA-NEXT:    s_sext_i32_i16 s19, s22
3781; GCN-HSA-NEXT:    s_ashr_i32 s20, s25, 16
3782; GCN-HSA-NEXT:    s_ashr_i32 s21, s24, 16
3783; GCN-HSA-NEXT:    s_sext_i32_i16 s22, s25
3784; GCN-HSA-NEXT:    s_sext_i32_i16 s23, s24
3785; GCN-HSA-NEXT:    s_ashr_i32 s24, s27, 16
3786; GCN-HSA-NEXT:    s_ashr_i32 s25, s26, 16
3787; GCN-HSA-NEXT:    s_ashr_i32 s33, s29, 16
3788; GCN-HSA-NEXT:    s_ashr_i32 s34, s28, 16
3789; GCN-HSA-NEXT:    s_ashr_i32 s35, s31, 16
3790; GCN-HSA-NEXT:    s_ashr_i32 s52, s30, 16
3791; GCN-HSA-NEXT:    s_ashr_i32 s53, s37, 16
3792; GCN-HSA-NEXT:    s_ashr_i32 s54, s36, 16
3793; GCN-HSA-NEXT:    s_ashr_i32 s55, s39, 16
3794; GCN-HSA-NEXT:    s_ashr_i32 s56, s38, 16
3795; GCN-HSA-NEXT:    s_ashr_i32 s57, s41, 16
3796; GCN-HSA-NEXT:    s_ashr_i32 s58, s40, 16
3797; GCN-HSA-NEXT:    s_ashr_i32 s59, s43, 16
3798; GCN-HSA-NEXT:    s_ashr_i32 s60, s42, 16
3799; GCN-HSA-NEXT:    s_ashr_i32 s61, s45, 16
3800; GCN-HSA-NEXT:    s_ashr_i32 s62, s44, 16
3801; GCN-HSA-NEXT:    s_ashr_i32 s63, s47, 16
3802; GCN-HSA-NEXT:    s_ashr_i32 s64, s46, 16
3803; GCN-HSA-NEXT:    s_ashr_i32 s65, s49, 16
3804; GCN-HSA-NEXT:    s_ashr_i32 s66, s48, 16
3805; GCN-HSA-NEXT:    s_ashr_i32 s67, s51, 16
3806; GCN-HSA-NEXT:    s_ashr_i32 s68, s50, 16
3807; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xf0
3808; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3809; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s3
3810; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s2
3811; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xe0
3812; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3813; GCN-HSA-NEXT:    v_mov_b32_e32 v25, s3
3814; GCN-HSA-NEXT:    v_mov_b32_e32 v24, s2
3815; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xd0
3816; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3817; GCN-HSA-NEXT:    v_mov_b32_e32 v27, s3
3818; GCN-HSA-NEXT:    v_mov_b32_e32 v26, s2
3819; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xc0
3820; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3821; GCN-HSA-NEXT:    v_mov_b32_e32 v29, s3
3822; GCN-HSA-NEXT:    v_mov_b32_e32 v28, s2
3823; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xb0
3824; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3825; GCN-HSA-NEXT:    v_mov_b32_e32 v31, s3
3826; GCN-HSA-NEXT:    v_mov_b32_e32 v30, s2
3827; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xa0
3828; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3829; GCN-HSA-NEXT:    v_mov_b32_e32 v33, s3
3830; GCN-HSA-NEXT:    s_sext_i32_i16 s49, s49
3831; GCN-HSA-NEXT:    s_sext_i32_i16 s48, s48
3832; GCN-HSA-NEXT:    v_mov_b32_e32 v32, s2
3833; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x90
3834; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s48
3835; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s66
3836; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s49
3837; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s65
3838; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3839; GCN-HSA-NEXT:    flat_store_dwordx4 v[24:25], v[4:7]
3840; GCN-HSA-NEXT:    v_mov_b32_e32 v25, s3
3841; GCN-HSA-NEXT:    v_mov_b32_e32 v24, s2
3842; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x80
3843; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3844; GCN-HSA-NEXT:    v_mov_b32_e32 v35, s3
3845; GCN-HSA-NEXT:    s_sext_i32_i16 s43, s43
3846; GCN-HSA-NEXT:    s_sext_i32_i16 s42, s42
3847; GCN-HSA-NEXT:    v_mov_b32_e32 v34, s2
3848; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x70
3849; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s42
3850; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s60
3851; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s43
3852; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s59
3853; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3854; GCN-HSA-NEXT:    flat_store_dwordx4 v[30:31], v[16:19]
3855; GCN-HSA-NEXT:    s_sext_i32_i16 s51, s51
3856; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s3
3857; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s2
3858; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x60
3859; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3860; GCN-HSA-NEXT:    s_sext_i32_i16 s50, s50
3861; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s3
3862; GCN-HSA-NEXT:    s_sext_i32_i16 s36, s36
3863; GCN-HSA-NEXT:    s_sext_i32_i16 s39, s39
3864; GCN-HSA-NEXT:    s_sext_i32_i16 s38, s38
3865; GCN-HSA-NEXT:    s_sext_i32_i16 s41, s41
3866; GCN-HSA-NEXT:    s_sext_i32_i16 s40, s40
3867; GCN-HSA-NEXT:    s_sext_i32_i16 s45, s45
3868; GCN-HSA-NEXT:    s_sext_i32_i16 s44, s44
3869; GCN-HSA-NEXT:    s_sext_i32_i16 s47, s47
3870; GCN-HSA-NEXT:    s_sext_i32_i16 s46, s46
3871; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s50
3872; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s68
3873; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s51
3874; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s67
3875; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s2
3876; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x50
3877; GCN-HSA-NEXT:    s_sext_i32_i16 s29, s29
3878; GCN-HSA-NEXT:    s_sext_i32_i16 s28, s28
3879; GCN-HSA-NEXT:    s_sext_i32_i16 s31, s31
3880; GCN-HSA-NEXT:    s_sext_i32_i16 s30, s30
3881; GCN-HSA-NEXT:    s_sext_i32_i16 s37, s37
3882; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s46
3883; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s64
3884; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s47
3885; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s63
3886; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s44
3887; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s62
3888; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s45
3889; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s61
3890; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s40
3891; GCN-HSA-NEXT:    flat_store_dwordx4 v[21:22], v[0:3]
3892; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s58
3893; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s38
3894; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s41
3895; GCN-HSA-NEXT:    v_mov_b32_e32 v23, s57
3896; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s56
3897; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s39
3898; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s36
3899; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s55
3900; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s54
3901; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3902; GCN-HSA-NEXT:    flat_store_dwordx4 v[26:27], v[8:11]
3903; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s37
3904; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s30
3905; GCN-HSA-NEXT:    flat_store_dwordx4 v[28:29], v[12:15]
3906; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s53
3907; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s28
3908; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s52
3909; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s31
3910; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s35
3911; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s34
3912; GCN-HSA-NEXT:    flat_store_dwordx4 v[32:33], v[20:23]
3913; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s29
3914; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s33
3915; GCN-HSA-NEXT:    flat_store_dwordx4 v[24:25], v[0:3]
3916; GCN-HSA-NEXT:    flat_store_dwordx4 v[34:35], v[4:7]
3917; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
3918; GCN-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[12:15]
3919; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
3920; GCN-HSA-NEXT:    s_sext_i32_i16 s27, s27
3921; GCN-HSA-NEXT:    s_sext_i32_i16 s26, s26
3922; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
3923; GCN-HSA-NEXT:    s_add_u32 s2, s0, 64
3924; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s26
3925; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s25
3926; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s27
3927; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s24
3928; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3929; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3930; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
3931; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
3932; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
3933; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s23
3934; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s21
3935; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s22
3936; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s20
3937; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3938; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3939; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
3940; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
3941; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
3942; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s19
3943; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s17
3944; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s18
3945; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s16
3946; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3947; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3948; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
3949; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
3950; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
3951; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s15
3952; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s13
3953; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s14
3954; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s12
3955; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3956; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3957; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
3958; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s11
3959; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s9
3960; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s10
3961; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s8
3962; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
3963; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3964; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
3965; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s7
3966; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
3967; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s6
3968; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s4
3969; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
3970; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3971; GCN-HSA-NEXT:    s_endpgm
3972;
3973; GCN-NOHSA-VI-LABEL: constant_sextload_v64i16_to_v64i32:
3974; GCN-NOHSA-VI:       ; %bb.0:
3975; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[36:39], s[0:1], 0x24
3976; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
3977; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[16:31], s[38:39], 0x0
3978; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[0:15], s[38:39], 0x40
3979; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
3980; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s49, s31, 16
3981; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s69, s15, 16
3982; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s70, s14, 16
3983; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s15, s15
3984; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s14, s14
3985; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s51, s1, 16
3986; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s52, s0, 16
3987; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s53, s1
3988; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s54, s0
3989; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s55, s3, 16
3990; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s56, s2, 16
3991; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s57, s3
3992; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s58, s2
3993; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s67, s13, 16
3994; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s68, s12, 16
3995; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s13, s13
3996; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s12, s12
3997; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
3998; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
3999; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s36
4000; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s37
4001; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s14
4002; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s70
4003; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s15
4004; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s69
4005; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s65, s11, 16
4006; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s66, s10, 16
4007; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s11, s11
4008; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s10, s10
4009; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
4010; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s63, s9, 16
4011; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s12
4012; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s68
4013; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s13
4014; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s67
4015; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s64, s8, 16
4016; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s9, s9
4017; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s8, s8
4018; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
4019; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s61, s7, 16
4020; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
4021; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s66
4022; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
4023; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s65
4024; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s62, s6, 16
4025; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s7, s7
4026; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s6, s6
4027; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
4028; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s59, s5, 16
4029; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
4030; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s64
4031; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
4032; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s63
4033; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s60, s4, 16
4034; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s5, s5
4035; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s4, s4
4036; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
4037; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s50, s30, 16
4038; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
4039; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s62
4040; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
4041; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s61
4042; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
4043; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s31, s31
4044; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
4045; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s60
4046; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
4047; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s59
4048; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
4049; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s30, s30
4050; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s58
4051; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s56
4052; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s57
4053; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s55
4054; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
4055; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s47, s29, 16
4056; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s54
4057; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s52
4058; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s53
4059; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s51
4060; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s48, s28, 16
4061; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s29, s29
4062; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s28, s28
4063; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
4064; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s45, s27, 16
4065; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s30
4066; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s50
4067; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s31
4068; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s49
4069; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s46, s26, 16
4070; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s27, s27
4071; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s26, s26
4072; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
4073; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s43, s25, 16
4074; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s28
4075; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s48
4076; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s29
4077; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s47
4078; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s44, s24, 16
4079; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s25, s25
4080; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s24, s24
4081; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
4082; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s41, s23, 16
4083; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s26
4084; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s46
4085; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s27
4086; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s45
4087; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s42, s22, 16
4088; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s23, s23
4089; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s22, s22
4090; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
4091; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s39, s21, 16
4092; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s24
4093; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s44
4094; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s25
4095; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s43
4096; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s40, s20, 16
4097; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s21, s21
4098; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s20, s20
4099; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
4100; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s35, s19, 16
4101; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s22
4102; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s42
4103; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s23
4104; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s41
4105; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s38, s18, 16
4106; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s19, s19
4107; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s18, s18
4108; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
4109; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s33, s17, 16
4110; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s20
4111; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s40
4112; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s21
4113; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s39
4114; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s34, s16, 16
4115; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s17, s17
4116; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s16, s16
4117; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
4118; GCN-NOHSA-VI-NEXT:    s_nop 0
4119; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s18
4120; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s38
4121; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s19
4122; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s35
4123; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
4124; GCN-NOHSA-VI-NEXT:    s_nop 0
4125; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s16
4126; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s34
4127; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s17
4128; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s33
4129; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
4130; GCN-NOHSA-VI-NEXT:    s_endpgm
4131;
4132; EG-LABEL: constant_sextload_v64i16_to_v64i32:
4133; EG:       ; %bb.0:
4134; EG-NEXT:    ALU 17, @38, KC0[CB0:0-32], KC1[]
4135; EG-NEXT:    TEX 7 @22
4136; EG-NEXT:    ALU 75, @56, KC0[CB0:0-32], KC1[]
4137; EG-NEXT:    ALU 71, @132, KC0[CB0:0-32], KC1[]
4138; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T66.XYZW, T48.X, 0
4139; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T41.X, 0
4140; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T56.X, 0
4141; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T55.X, 0
4142; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T64.XYZW, T54.X, 0
4143; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T53.X, 0
4144; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T52.X, 0
4145; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T51.X, 0
4146; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T50.X, 0
4147; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T49.X, 0
4148; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T61.XYZW, T40.X, 0
4149; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T39.X, 0
4150; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T38.X, 0
4151; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T37.X, 0
4152; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T58.XYZW, T36.X, 0
4153; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T35.X, 1
4154; EG-NEXT:    CF_END
4155; EG-NEXT:    PAD
4156; EG-NEXT:    Fetch clause starting at 22:
4157; EG-NEXT:     VTX_READ_128 T42.XYZW, T41.X, 16, #1
4158; EG-NEXT:     VTX_READ_128 T43.XYZW, T41.X, 32, #1
4159; EG-NEXT:     VTX_READ_128 T44.XYZW, T41.X, 0, #1
4160; EG-NEXT:     VTX_READ_128 T45.XYZW, T41.X, 48, #1
4161; EG-NEXT:     VTX_READ_128 T46.XYZW, T41.X, 64, #1
4162; EG-NEXT:     VTX_READ_128 T47.XYZW, T41.X, 80, #1
4163; EG-NEXT:     VTX_READ_128 T48.XYZW, T41.X, 96, #1
4164; EG-NEXT:     VTX_READ_128 T41.XYZW, T41.X, 112, #1
4165; EG-NEXT:    ALU clause starting at 38:
4166; EG-NEXT:     LSHR T35.X, KC0[2].Y, literal.x,
4167; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4168; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4169; EG-NEXT:     LSHR T36.X, PV.W, literal.x,
4170; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4171; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
4172; EG-NEXT:     LSHR T37.X, PV.W, literal.x,
4173; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4174; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
4175; EG-NEXT:     LSHR T38.X, PV.W, literal.x,
4176; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4177; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
4178; EG-NEXT:     LSHR T39.X, PV.W, literal.x,
4179; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4180; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
4181; EG-NEXT:     LSHR T40.X, PV.W, literal.x,
4182; EG-NEXT:     MOV * T41.X, KC0[2].Z,
4183; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4184; EG-NEXT:    ALU clause starting at 56:
4185; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
4186; EG-NEXT:    96(1.345247e-43), 0(0.000000e+00)
4187; EG-NEXT:     LSHR T49.X, PV.W, literal.x,
4188; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4189; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
4190; EG-NEXT:     LSHR T50.X, PV.W, literal.x,
4191; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4192; EG-NEXT:    2(2.802597e-45), 128(1.793662e-43)
4193; EG-NEXT:     LSHR T51.X, PV.W, literal.x,
4194; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4195; EG-NEXT:    2(2.802597e-45), 144(2.017870e-43)
4196; EG-NEXT:     LSHR T52.X, PV.W, literal.x,
4197; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4198; EG-NEXT:    2(2.802597e-45), 160(2.242078e-43)
4199; EG-NEXT:     LSHR T53.X, PV.W, literal.x,
4200; EG-NEXT:     LSHR T0.Y, T41.W, literal.y,
4201; EG-NEXT:     LSHR T0.Z, T41.Y, literal.y,
4202; EG-NEXT:     LSHR T0.W, T48.W, literal.y, BS:VEC_120/SCL_212
4203; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
4204; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4205; EG-NEXT:    176(2.466285e-43), 0(0.000000e+00)
4206; EG-NEXT:     LSHR T54.X, PS, literal.x,
4207; EG-NEXT:     LSHR T1.Y, T48.Y, literal.y,
4208; EG-NEXT:     LSHR T1.Z, T47.W, literal.y,
4209; EG-NEXT:     LSHR T1.W, T47.Y, literal.y, BS:VEC_120/SCL_212
4210; EG-NEXT:     ADD_INT * T2.W, KC0[2].Y, literal.z,
4211; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4212; EG-NEXT:    192(2.690493e-43), 0(0.000000e+00)
4213; EG-NEXT:     LSHR T55.X, PS, literal.x,
4214; EG-NEXT:     LSHR T2.Y, T46.W, literal.y,
4215; EG-NEXT:     LSHR T2.Z, T46.Y, literal.y,
4216; EG-NEXT:     LSHR T2.W, T45.W, literal.y, BS:VEC_120/SCL_212
4217; EG-NEXT:     ADD_INT * T3.W, KC0[2].Y, literal.z,
4218; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4219; EG-NEXT:    208(2.914701e-43), 0(0.000000e+00)
4220; EG-NEXT:     LSHR T56.X, PS, literal.x,
4221; EG-NEXT:     LSHR T3.Y, T45.Y, literal.y,
4222; EG-NEXT:     BFE_INT T57.Z, T44.Y, 0.0, literal.y, BS:VEC_120/SCL_212
4223; EG-NEXT:     LSHR T3.W, T43.W, literal.y,
4224; EG-NEXT:     LSHR * T4.W, T43.Y, literal.y,
4225; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4226; EG-NEXT:     BFE_INT T57.X, T44.X, 0.0, literal.x,
4227; EG-NEXT:     LSHR T4.Y, T42.W, literal.x,
4228; EG-NEXT:     BFE_INT T58.Z, T44.W, 0.0, literal.x, BS:VEC_120/SCL_212
4229; EG-NEXT:     LSHR T5.W, T42.Y, literal.x,
4230; EG-NEXT:     LSHR * T6.W, T44.Y, literal.x,
4231; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4232; EG-NEXT:     BFE_INT T58.X, T44.Z, 0.0, literal.x,
4233; EG-NEXT:     LSHR T5.Y, T44.W, literal.x,
4234; EG-NEXT:     BFE_INT T59.Z, T42.Y, 0.0, literal.x,
4235; EG-NEXT:     BFE_INT T57.W, PS, 0.0, literal.x,
4236; EG-NEXT:     LSHR * T6.W, T44.X, literal.x,
4237; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4238; EG-NEXT:     BFE_INT T59.X, T42.X, 0.0, literal.x,
4239; EG-NEXT:     BFE_INT T57.Y, PS, 0.0, literal.x,
4240; EG-NEXT:     BFE_INT T60.Z, T42.W, 0.0, literal.x,
4241; EG-NEXT:     BFE_INT T58.W, PV.Y, 0.0, literal.x,
4242; EG-NEXT:     LSHR * T6.W, T44.Z, literal.x,
4243; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4244; EG-NEXT:     BFE_INT T60.X, T42.Z, 0.0, literal.x,
4245; EG-NEXT:     BFE_INT T58.Y, PS, 0.0, literal.x,
4246; EG-NEXT:     BFE_INT T44.Z, T43.Y, 0.0, literal.x,
4247; EG-NEXT:     BFE_INT T59.W, T5.W, 0.0, literal.x,
4248; EG-NEXT:     LSHR * T5.W, T42.X, literal.x,
4249; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4250; EG-NEXT:     BFE_INT T44.X, T43.X, 0.0, literal.x,
4251; EG-NEXT:     BFE_INT T59.Y, PS, 0.0, literal.x,
4252; EG-NEXT:     BFE_INT T61.Z, T43.W, 0.0, literal.x,
4253; EG-NEXT:     BFE_INT T60.W, T4.Y, 0.0, literal.x,
4254; EG-NEXT:     LSHR * T5.W, T42.Z, literal.x,
4255; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4256; EG-NEXT:     BFE_INT T61.X, T43.Z, 0.0, literal.x,
4257; EG-NEXT:     BFE_INT T60.Y, PS, 0.0, literal.x,
4258; EG-NEXT:     BFE_INT T42.Z, T45.Y, 0.0, literal.x,
4259; EG-NEXT:     BFE_INT * T44.W, T4.W, 0.0, literal.x,
4260; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4261; EG-NEXT:    ALU clause starting at 132:
4262; EG-NEXT:     LSHR * T4.W, T43.X, literal.x,
4263; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4264; EG-NEXT:     BFE_INT T42.X, T45.X, 0.0, literal.x,
4265; EG-NEXT:     BFE_INT T44.Y, PV.W, 0.0, literal.x,
4266; EG-NEXT:     BFE_INT T62.Z, T45.W, 0.0, literal.x,
4267; EG-NEXT:     BFE_INT T61.W, T3.W, 0.0, literal.x, BS:VEC_120/SCL_212
4268; EG-NEXT:     LSHR * T3.W, T43.Z, literal.x,
4269; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4270; EG-NEXT:     BFE_INT T62.X, T45.Z, 0.0, literal.x,
4271; EG-NEXT:     BFE_INT T61.Y, PS, 0.0, literal.x,
4272; EG-NEXT:     BFE_INT T43.Z, T46.Y, 0.0, literal.x,
4273; EG-NEXT:     BFE_INT T42.W, T3.Y, 0.0, literal.x, BS:VEC_120/SCL_212
4274; EG-NEXT:     LSHR * T3.W, T45.X, literal.x,
4275; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4276; EG-NEXT:     BFE_INT T43.X, T46.X, 0.0, literal.x,
4277; EG-NEXT:     BFE_INT T42.Y, PS, 0.0, literal.x,
4278; EG-NEXT:     BFE_INT T63.Z, T46.W, 0.0, literal.x,
4279; EG-NEXT:     BFE_INT T62.W, T2.W, 0.0, literal.x, BS:VEC_120/SCL_212
4280; EG-NEXT:     LSHR * T2.W, T45.Z, literal.x,
4281; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4282; EG-NEXT:     BFE_INT T63.X, T46.Z, 0.0, literal.x,
4283; EG-NEXT:     BFE_INT T62.Y, PS, 0.0, literal.x,
4284; EG-NEXT:     BFE_INT T45.Z, T47.Y, 0.0, literal.x,
4285; EG-NEXT:     BFE_INT T43.W, T2.Z, 0.0, literal.x, BS:VEC_120/SCL_212
4286; EG-NEXT:     LSHR * T2.W, T46.X, literal.x,
4287; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4288; EG-NEXT:     BFE_INT T45.X, T47.X, 0.0, literal.x,
4289; EG-NEXT:     BFE_INT T43.Y, PS, 0.0, literal.x,
4290; EG-NEXT:     BFE_INT T64.Z, T47.W, 0.0, literal.x,
4291; EG-NEXT:     BFE_INT T63.W, T2.Y, 0.0, literal.x,
4292; EG-NEXT:     LSHR * T2.W, T46.Z, literal.x,
4293; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4294; EG-NEXT:     BFE_INT T64.X, T47.Z, 0.0, literal.x,
4295; EG-NEXT:     BFE_INT T63.Y, PS, 0.0, literal.x,
4296; EG-NEXT:     BFE_INT T46.Z, T48.Y, 0.0, literal.x,
4297; EG-NEXT:     BFE_INT T45.W, T1.W, 0.0, literal.x,
4298; EG-NEXT:     LSHR * T1.W, T47.X, literal.x,
4299; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4300; EG-NEXT:     BFE_INT T46.X, T48.X, 0.0, literal.x,
4301; EG-NEXT:     BFE_INT T45.Y, PS, 0.0, literal.x,
4302; EG-NEXT:     BFE_INT T65.Z, T48.W, 0.0, literal.x,
4303; EG-NEXT:     BFE_INT T64.W, T1.Z, 0.0, literal.x,
4304; EG-NEXT:     LSHR * T1.W, T47.Z, literal.x,
4305; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4306; EG-NEXT:     BFE_INT T65.X, T48.Z, 0.0, literal.x,
4307; EG-NEXT:     BFE_INT T64.Y, PS, 0.0, literal.x,
4308; EG-NEXT:     BFE_INT T47.Z, T41.Y, 0.0, literal.x,
4309; EG-NEXT:     BFE_INT T46.W, T1.Y, 0.0, literal.x, BS:VEC_120/SCL_212
4310; EG-NEXT:     LSHR * T1.W, T48.X, literal.x,
4311; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4312; EG-NEXT:     BFE_INT T47.X, T41.X, 0.0, literal.x,
4313; EG-NEXT:     BFE_INT T46.Y, PS, 0.0, literal.x,
4314; EG-NEXT:     BFE_INT T66.Z, T41.W, 0.0, literal.x,
4315; EG-NEXT:     BFE_INT T65.W, T0.W, 0.0, literal.x, BS:VEC_120/SCL_212
4316; EG-NEXT:     LSHR * T0.W, T48.Z, literal.x,
4317; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4318; EG-NEXT:     BFE_INT T66.X, T41.Z, 0.0, literal.x,
4319; EG-NEXT:     BFE_INT T65.Y, PS, 0.0, literal.x,
4320; EG-NEXT:     LSHR T1.Z, T41.X, literal.x,
4321; EG-NEXT:     BFE_INT T47.W, T0.Z, 0.0, literal.x, BS:VEC_120/SCL_212
4322; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4323; EG-NEXT:    16(2.242078e-44), 224(3.138909e-43)
4324; EG-NEXT:     LSHR T41.X, PS, literal.x,
4325; EG-NEXT:     BFE_INT T47.Y, PV.Z, 0.0, literal.y,
4326; EG-NEXT:     LSHR T0.Z, T41.Z, literal.y,
4327; EG-NEXT:     BFE_INT T66.W, T0.Y, 0.0, literal.y,
4328; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
4329; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4330; EG-NEXT:    240(3.363116e-43), 0(0.000000e+00)
4331; EG-NEXT:     LSHR T48.X, PS, literal.x,
4332; EG-NEXT:     BFE_INT * T66.Y, PV.Z, 0.0, literal.y,
4333; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4334  %load = load <64 x i16>, <64 x i16> addrspace(4)* %in
4335  %ext = sext <64 x i16> %load to <64 x i32>
4336  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
4337  ret void
4338}
4339
4340define amdgpu_kernel void @constant_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
4341; GCN-NOHSA-SI-LABEL: constant_zextload_i16_to_i64:
4342; GCN-NOHSA-SI:       ; %bb.0:
4343; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
4344; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
4345; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
4346; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
4347; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
4348; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4349; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
4350; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
4351; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
4352; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
4353; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
4354; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
4355; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
4356; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4357; GCN-NOHSA-SI-NEXT:    s_endpgm
4358;
4359; GCN-HSA-LABEL: constant_zextload_i16_to_i64:
4360; GCN-HSA:       ; %bb.0:
4361; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
4362; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4363; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
4364; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
4365; GCN-HSA-NEXT:    flat_load_ushort v0, v[0:1]
4366; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
4367; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
4368; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
4369; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
4370; GCN-HSA-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
4371; GCN-HSA-NEXT:    s_endpgm
4372;
4373; GCN-NOHSA-VI-LABEL: constant_zextload_i16_to_i64:
4374; GCN-NOHSA-VI:       ; %bb.0:
4375; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
4376; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
4377; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
4378; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
4379; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
4380; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4381; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
4382; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
4383; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
4384; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
4385; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
4386; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
4387; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
4388; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
4389; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4390; GCN-NOHSA-VI-NEXT:    s_endpgm
4391;
4392; EG-LABEL: constant_zextload_i16_to_i64:
4393; EG:       ; %bb.0:
4394; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4395; EG-NEXT:    TEX 0 @6
4396; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
4397; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
4398; EG-NEXT:    CF_END
4399; EG-NEXT:    PAD
4400; EG-NEXT:    Fetch clause starting at 6:
4401; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
4402; EG-NEXT:    ALU clause starting at 8:
4403; EG-NEXT:     MOV * T0.X, KC0[2].Z,
4404; EG-NEXT:    ALU clause starting at 9:
4405; EG-NEXT:     MOV * T0.Y, 0.0,
4406; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
4407; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4408  %a = load i16, i16 addrspace(4)* %in
4409  %ext = zext i16 %a to i64
4410  store i64 %ext, i64 addrspace(1)* %out
4411  ret void
4412}
4413
4414; FIXME: Need to optimize this sequence to avoid extra bfe:
4415;  t28: i32,ch = load<LD2[%in(addrspace=1)], anyext from i16> t12, t27, undef:i64
4416;          t31: i64 = any_extend t28
4417;        t33: i64 = sign_extend_inreg t31, ValueType:ch:i16
4418; TODO: These could be expanded earlier using ASHR 15
4419define amdgpu_kernel void @constant_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
4420; GCN-NOHSA-SI-LABEL: constant_sextload_i16_to_i64:
4421; GCN-NOHSA-SI:       ; %bb.0:
4422; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
4423; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
4424; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
4425; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
4426; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
4427; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4428; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
4429; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
4430; GCN-NOHSA-SI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
4431; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
4432; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
4433; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
4434; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
4435; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4436; GCN-NOHSA-SI-NEXT:    s_endpgm
4437;
4438; GCN-HSA-LABEL: constant_sextload_i16_to_i64:
4439; GCN-HSA:       ; %bb.0:
4440; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
4441; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4442; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
4443; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
4444; GCN-HSA-NEXT:    flat_load_sshort v0, v[0:1]
4445; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
4446; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
4447; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
4448; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
4449; GCN-HSA-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
4450; GCN-HSA-NEXT:    s_endpgm
4451;
4452; GCN-NOHSA-VI-LABEL: constant_sextload_i16_to_i64:
4453; GCN-NOHSA-VI:       ; %bb.0:
4454; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
4455; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
4456; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
4457; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
4458; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
4459; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4460; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
4461; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
4462; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
4463; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
4464; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
4465; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
4466; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v0, 0, 16
4467; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
4468; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4469; GCN-NOHSA-VI-NEXT:    s_endpgm
4470;
4471; EG-LABEL: constant_sextload_i16_to_i64:
4472; EG:       ; %bb.0:
4473; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4474; EG-NEXT:    TEX 0 @6
4475; EG-NEXT:    ALU 4, @9, KC0[CB0:0-32], KC1[]
4476; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
4477; EG-NEXT:    CF_END
4478; EG-NEXT:    PAD
4479; EG-NEXT:    Fetch clause starting at 6:
4480; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
4481; EG-NEXT:    ALU clause starting at 8:
4482; EG-NEXT:     MOV * T0.X, KC0[2].Z,
4483; EG-NEXT:    ALU clause starting at 9:
4484; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
4485; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
4486; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
4487; EG-NEXT:     ASHR * T0.Y, PV.X, literal.x,
4488; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4489  %a = load i16, i16 addrspace(4)* %in
4490  %ext = sext i16 %a to i64
4491  store i64 %ext, i64 addrspace(1)* %out
4492  ret void
4493}
4494
4495define amdgpu_kernel void @constant_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
4496; GCN-NOHSA-SI-LABEL: constant_zextload_v1i16_to_v1i64:
4497; GCN-NOHSA-SI:       ; %bb.0:
4498; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
4499; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
4500; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
4501; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
4502; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
4503; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4504; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
4505; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
4506; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
4507; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
4508; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
4509; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
4510; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
4511; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4512; GCN-NOHSA-SI-NEXT:    s_endpgm
4513;
4514; GCN-HSA-LABEL: constant_zextload_v1i16_to_v1i64:
4515; GCN-HSA:       ; %bb.0:
4516; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
4517; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4518; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
4519; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
4520; GCN-HSA-NEXT:    flat_load_ushort v0, v[0:1]
4521; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
4522; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
4523; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
4524; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
4525; GCN-HSA-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
4526; GCN-HSA-NEXT:    s_endpgm
4527;
4528; GCN-NOHSA-VI-LABEL: constant_zextload_v1i16_to_v1i64:
4529; GCN-NOHSA-VI:       ; %bb.0:
4530; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
4531; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
4532; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
4533; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
4534; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
4535; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4536; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
4537; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
4538; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
4539; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
4540; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
4541; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
4542; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
4543; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
4544; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4545; GCN-NOHSA-VI-NEXT:    s_endpgm
4546;
4547; EG-LABEL: constant_zextload_v1i16_to_v1i64:
4548; EG:       ; %bb.0:
4549; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4550; EG-NEXT:    TEX 0 @6
4551; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
4552; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
4553; EG-NEXT:    CF_END
4554; EG-NEXT:    PAD
4555; EG-NEXT:    Fetch clause starting at 6:
4556; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
4557; EG-NEXT:    ALU clause starting at 8:
4558; EG-NEXT:     MOV * T0.X, KC0[2].Z,
4559; EG-NEXT:    ALU clause starting at 9:
4560; EG-NEXT:     MOV * T0.Y, 0.0,
4561; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
4562; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4563  %load = load <1 x i16>, <1 x i16> addrspace(4)* %in
4564  %ext = zext <1 x i16> %load to <1 x i64>
4565  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
4566  ret void
4567}
4568
4569define amdgpu_kernel void @constant_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
4570; GCN-NOHSA-SI-LABEL: constant_sextload_v1i16_to_v1i64:
4571; GCN-NOHSA-SI:       ; %bb.0:
4572; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
4573; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
4574; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
4575; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
4576; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
4577; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4578; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
4579; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
4580; GCN-NOHSA-SI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
4581; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
4582; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
4583; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
4584; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
4585; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4586; GCN-NOHSA-SI-NEXT:    s_endpgm
4587;
4588; GCN-HSA-LABEL: constant_sextload_v1i16_to_v1i64:
4589; GCN-HSA:       ; %bb.0:
4590; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
4591; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4592; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
4593; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
4594; GCN-HSA-NEXT:    flat_load_sshort v0, v[0:1]
4595; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
4596; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
4597; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
4598; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
4599; GCN-HSA-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
4600; GCN-HSA-NEXT:    s_endpgm
4601;
4602; GCN-NOHSA-VI-LABEL: constant_sextload_v1i16_to_v1i64:
4603; GCN-NOHSA-VI:       ; %bb.0:
4604; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
4605; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
4606; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
4607; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
4608; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
4609; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4610; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
4611; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
4612; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
4613; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
4614; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
4615; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
4616; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v0, 0, 16
4617; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
4618; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4619; GCN-NOHSA-VI-NEXT:    s_endpgm
4620;
4621; EG-LABEL: constant_sextload_v1i16_to_v1i64:
4622; EG:       ; %bb.0:
4623; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4624; EG-NEXT:    TEX 0 @6
4625; EG-NEXT:    ALU 4, @9, KC0[CB0:0-32], KC1[]
4626; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
4627; EG-NEXT:    CF_END
4628; EG-NEXT:    PAD
4629; EG-NEXT:    Fetch clause starting at 6:
4630; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
4631; EG-NEXT:    ALU clause starting at 8:
4632; EG-NEXT:     MOV * T0.X, KC0[2].Z,
4633; EG-NEXT:    ALU clause starting at 9:
4634; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
4635; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
4636; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
4637; EG-NEXT:     ASHR * T0.Y, PV.X, literal.x,
4638; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4639  %load = load <1 x i16>, <1 x i16> addrspace(4)* %in
4640  %ext = sext <1 x i16> %load to <1 x i64>
4641  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
4642  ret void
4643}
4644
4645define amdgpu_kernel void @constant_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
4646; GCN-NOHSA-SI-LABEL: constant_zextload_v2i16_to_v2i64:
4647; GCN-NOHSA-SI:       ; %bb.0:
4648; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
4649; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4650; GCN-NOHSA-SI-NEXT:    s_load_dword s2, s[2:3], 0x0
4651; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
4652; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
4653; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4654; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s4, s2, 16
4655; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s2, 0xffff
4656; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
4657; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v1
4658; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
4659; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s4
4660; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
4661; GCN-NOHSA-SI-NEXT:    s_endpgm
4662;
4663; GCN-HSA-LABEL: constant_zextload_v2i16_to_v2i64:
4664; GCN-HSA:       ; %bb.0:
4665; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
4666; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
4667; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v1
4668; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4669; GCN-HSA-NEXT:    s_load_dword s2, s[2:3], 0x0
4670; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
4671; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
4672; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4673; GCN-HSA-NEXT:    s_lshr_b32 s0, s2, 16
4674; GCN-HSA-NEXT:    s_and_b32 s1, s2, 0xffff
4675; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s1
4676; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
4677; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4678; GCN-HSA-NEXT:    s_endpgm
4679;
4680; GCN-NOHSA-VI-LABEL: constant_zextload_v2i16_to_v2i64:
4681; GCN-NOHSA-VI:       ; %bb.0:
4682; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
4683; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
4684; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
4685; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
4686; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, v1
4687; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4688; GCN-NOHSA-VI-NEXT:    s_load_dword s2, s[2:3], 0x0
4689; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
4690; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
4691; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4692; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s0, s2, 16
4693; GCN-NOHSA-VI-NEXT:    s_and_b32 s1, s2, 0xffff
4694; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s1
4695; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s0
4696; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
4697; GCN-NOHSA-VI-NEXT:    s_endpgm
4698;
4699; EG-LABEL: constant_zextload_v2i16_to_v2i64:
4700; EG:       ; %bb.0:
4701; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4702; EG-NEXT:    TEX 0 @6
4703; EG-NEXT:    ALU 6, @9, KC0[CB0:0-32], KC1[]
4704; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1
4705; EG-NEXT:    CF_END
4706; EG-NEXT:    PAD
4707; EG-NEXT:    Fetch clause starting at 6:
4708; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
4709; EG-NEXT:    ALU clause starting at 8:
4710; EG-NEXT:     MOV * T4.X, KC0[2].Z,
4711; EG-NEXT:    ALU clause starting at 9:
4712; EG-NEXT:     LSHR * T4.Z, T4.X, literal.x,
4713; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4714; EG-NEXT:     AND_INT T4.X, T4.X, literal.x,
4715; EG-NEXT:     MOV T4.Y, 0.0,
4716; EG-NEXT:     MOV T4.W, 0.0,
4717; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
4718; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
4719  %load = load <2 x i16>, <2 x i16> addrspace(4)* %in
4720  %ext = zext <2 x i16> %load to <2 x i64>
4721  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
4722  ret void
4723}
4724
4725define amdgpu_kernel void @constant_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
4726; GCN-NOHSA-SI-LABEL: constant_sextload_v2i16_to_v2i64:
4727; GCN-NOHSA-SI:       ; %bb.0:
4728; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
4729; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4730; GCN-NOHSA-SI-NEXT:    s_load_dword s2, s[2:3], 0x0
4731; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
4732; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4733; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s4, s2, 16
4734; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[6:7], s[2:3], 0x100000
4735; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x100000
4736; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
4737; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
4738; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s7
4739; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s4
4740; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s5
4741; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
4742; GCN-NOHSA-SI-NEXT:    s_endpgm
4743;
4744; GCN-HSA-LABEL: constant_sextload_v2i16_to_v2i64:
4745; GCN-HSA:       ; %bb.0:
4746; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
4747; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4748; GCN-HSA-NEXT:    s_load_dword s2, s[2:3], 0x0
4749; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
4750; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
4751; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4752; GCN-HSA-NEXT:    s_lshr_b32 s0, s2, 16
4753; GCN-HSA-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x100000
4754; GCN-HSA-NEXT:    s_bfe_i64 s[0:1], s[0:1], 0x100000
4755; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
4756; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
4757; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
4758; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
4759; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4760; GCN-HSA-NEXT:    s_endpgm
4761;
4762; GCN-NOHSA-VI-LABEL: constant_sextload_v2i16_to_v2i64:
4763; GCN-NOHSA-VI:       ; %bb.0:
4764; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
4765; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4766; GCN-NOHSA-VI-NEXT:    s_load_dword s4, s[2:3], 0x0
4767; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
4768; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
4769; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4770; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[6:7], s[4:5], 0x100000
4771; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s4, s4, 16
4772; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x100000
4773; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
4774; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s7
4775; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
4776; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s5
4777; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
4778; GCN-NOHSA-VI-NEXT:    s_endpgm
4779;
4780; EG-LABEL: constant_sextload_v2i16_to_v2i64:
4781; EG:       ; %bb.0:
4782; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4783; EG-NEXT:    TEX 0 @6
4784; EG-NEXT:    ALU 8, @9, KC0[CB0:0-32], KC1[]
4785; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1
4786; EG-NEXT:    CF_END
4787; EG-NEXT:    PAD
4788; EG-NEXT:    Fetch clause starting at 6:
4789; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
4790; EG-NEXT:    ALU clause starting at 8:
4791; EG-NEXT:     MOV * T4.X, KC0[2].Z,
4792; EG-NEXT:    ALU clause starting at 9:
4793; EG-NEXT:     ASHR * T4.W, T4.X, literal.x,
4794; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4795; EG-NEXT:     ASHR * T4.Z, T4.X, literal.x,
4796; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4797; EG-NEXT:     BFE_INT T4.X, T4.X, 0.0, literal.x,
4798; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
4799; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
4800; EG-NEXT:     ASHR * T4.Y, PV.X, literal.x,
4801; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4802  %load = load <2 x i16>, <2 x i16> addrspace(4)* %in
4803  %ext = sext <2 x i16> %load to <2 x i64>
4804  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
4805  ret void
4806}
4807
4808define amdgpu_kernel void @constant_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
4809; GCN-NOHSA-SI-LABEL: constant_zextload_v4i16_to_v4i64:
4810; GCN-NOHSA-SI:       ; %bb.0:
4811; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
4812; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4813; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
4814; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
4815; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
4816; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
4817; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v1
4818; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4819; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s6, s5, 16
4820; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s7, s4, 16
4821; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, 0xffff
4822; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, 0xffff
4823; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
4824; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s6
4825; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
4826; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
4827; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
4828; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
4829; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
4830; GCN-NOHSA-SI-NEXT:    s_endpgm
4831;
4832; GCN-HSA-LABEL: constant_zextload_v4i16_to_v4i64:
4833; GCN-HSA:       ; %bb.0:
4834; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
4835; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
4836; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v1
4837; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4838; GCN-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
4839; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4840; GCN-HSA-NEXT:    s_lshr_b32 s4, s3, 16
4841; GCN-HSA-NEXT:    s_lshr_b32 s5, s2, 16
4842; GCN-HSA-NEXT:    s_and_b32 s6, s2, 0xffff
4843; GCN-HSA-NEXT:    s_and_b32 s2, s3, 0xffff
4844; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
4845; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
4846; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4847; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
4848; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s4
4849; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
4850; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4851; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
4852; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
4853; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
4854; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
4855; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4856; GCN-HSA-NEXT:    s_endpgm
4857;
4858; GCN-NOHSA-VI-LABEL: constant_zextload_v4i16_to_v4i64:
4859; GCN-NOHSA-VI:       ; %bb.0:
4860; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
4861; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
4862; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
4863; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
4864; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, v1
4865; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4866; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
4867; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
4868; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
4869; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4870; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s0, s2, 16
4871; GCN-NOHSA-VI-NEXT:    s_and_b32 s1, s2, 0xffff
4872; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s2, s3, 16
4873; GCN-NOHSA-VI-NEXT:    s_and_b32 s3, s3, 0xffff
4874; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s3
4875; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s2
4876; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16
4877; GCN-NOHSA-VI-NEXT:    s_nop 0
4878; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s1
4879; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s0
4880; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
4881; GCN-NOHSA-VI-NEXT:    s_endpgm
4882;
4883; EG-LABEL: constant_zextload_v4i16_to_v4i64:
4884; EG:       ; %bb.0:
4885; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4886; EG-NEXT:    TEX 0 @6
4887; EG-NEXT:    ALU 18, @9, KC0[CB0:0-32], KC1[]
4888; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T8.X, 0
4889; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T7.X, 1
4890; EG-NEXT:    CF_END
4891; EG-NEXT:    Fetch clause starting at 6:
4892; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
4893; EG-NEXT:    ALU clause starting at 8:
4894; EG-NEXT:     MOV * T5.X, KC0[2].Z,
4895; EG-NEXT:    ALU clause starting at 9:
4896; EG-NEXT:     MOV T2.X, T5.X,
4897; EG-NEXT:     MOV * T3.X, T5.Y,
4898; EG-NEXT:     MOV T0.Y, PV.X,
4899; EG-NEXT:     MOV * T0.Z, PS,
4900; EG-NEXT:     LSHR * T5.Z, PV.Z, literal.x,
4901; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4902; EG-NEXT:     AND_INT T5.X, T0.Z, literal.x,
4903; EG-NEXT:     MOV T5.Y, 0.0,
4904; EG-NEXT:     LSHR T6.Z, T0.Y, literal.y,
4905; EG-NEXT:     AND_INT * T6.X, T0.Y, literal.x,
4906; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
4907; EG-NEXT:     MOV T6.Y, 0.0,
4908; EG-NEXT:     MOV T5.W, 0.0,
4909; EG-NEXT:     MOV * T6.W, 0.0,
4910; EG-NEXT:     LSHR T7.X, KC0[2].Y, literal.x,
4911; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4912; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4913; EG-NEXT:     LSHR * T8.X, PV.W, literal.x,
4914; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4915  %load = load <4 x i16>, <4 x i16> addrspace(4)* %in
4916  %ext = zext <4 x i16> %load to <4 x i64>
4917  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
4918  ret void
4919}
4920
4921define amdgpu_kernel void @constant_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
4922; GCN-NOHSA-SI-LABEL: constant_sextload_v4i16_to_v4i64:
4923; GCN-NOHSA-SI:       ; %bb.0:
4924; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
4925; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4926; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
4927; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
4928; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
4929; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4930; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, s5
4931; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s8, s4, 16
4932; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[10:11], s[4:5], 0x100000
4933; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[4:5], s[4:5], 48
4934; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x100000
4935; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x100000
4936; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
4937; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s7
4938; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s4
4939; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s5
4940; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
4941; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
4942; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
4943; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s11
4944; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s8
4945; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s9
4946; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
4947; GCN-NOHSA-SI-NEXT:    s_endpgm
4948;
4949; GCN-HSA-LABEL: constant_sextload_v4i16_to_v4i64:
4950; GCN-HSA:       ; %bb.0:
4951; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
4952; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4953; GCN-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
4954; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4955; GCN-HSA-NEXT:    s_mov_b32 s4, s3
4956; GCN-HSA-NEXT:    s_lshr_b32 s6, s2, 16
4957; GCN-HSA-NEXT:    s_bfe_i64 s[8:9], s[2:3], 0x100000
4958; GCN-HSA-NEXT:    s_ashr_i64 s[2:3], s[2:3], 48
4959; GCN-HSA-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x100000
4960; GCN-HSA-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x100000
4961; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
4962; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
4963; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
4964; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4965; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
4966; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
4967; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
4968; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
4969; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4970; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
4971; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
4972; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s9
4973; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s6
4974; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s7
4975; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
4976; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4977; GCN-HSA-NEXT:    s_endpgm
4978;
4979; GCN-NOHSA-VI-LABEL: constant_sextload_v4i16_to_v4i64:
4980; GCN-NOHSA-VI:       ; %bb.0:
4981; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
4982; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4983; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
4984; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
4985; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
4986; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4987; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s5
4988; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s10, s5, 16
4989; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[6:7], s[4:5], 0x100000
4990; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s4, s4, 16
4991; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x100000
4992; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x100000
4993; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x100000
4994; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
4995; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s9
4996; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s10
4997; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s11
4998; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
4999; GCN-NOHSA-VI-NEXT:    s_nop 0
5000; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
5001; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s7
5002; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
5003; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s5
5004; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5005; GCN-NOHSA-VI-NEXT:    s_endpgm
5006;
5007; EG-LABEL: constant_sextload_v4i16_to_v4i64:
5008; EG:       ; %bb.0:
5009; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5010; EG-NEXT:    TEX 0 @6
5011; EG-NEXT:    ALU 20, @9, KC0[CB0:0-32], KC1[]
5012; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 0
5013; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1
5014; EG-NEXT:    CF_END
5015; EG-NEXT:    Fetch clause starting at 6:
5016; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
5017; EG-NEXT:    ALU clause starting at 8:
5018; EG-NEXT:     MOV * T5.X, KC0[2].Z,
5019; EG-NEXT:    ALU clause starting at 9:
5020; EG-NEXT:     MOV T2.X, T5.X,
5021; EG-NEXT:     MOV * T3.X, T5.Y,
5022; EG-NEXT:     MOV T0.Y, PS,
5023; EG-NEXT:     MOV * T0.Z, PV.X,
5024; EG-NEXT:     ASHR * T5.W, PV.Z, literal.x,
5025; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5026; EG-NEXT:     LSHR T6.X, KC0[2].Y, literal.x,
5027; EG-NEXT:     ASHR T5.Z, T0.Z, literal.y,
5028; EG-NEXT:     ASHR * T7.W, T0.Y, literal.z,
5029; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5030; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5031; EG-NEXT:     BFE_INT T5.X, T0.Z, 0.0, literal.x,
5032; EG-NEXT:     ASHR * T7.Z, T0.Y, literal.x,
5033; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5034; EG-NEXT:     BFE_INT T7.X, T0.Y, 0.0, literal.x,
5035; EG-NEXT:     ASHR T5.Y, PV.X, literal.y,
5036; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
5037; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
5038; EG-NEXT:     LSHR T8.X, PV.W, literal.x,
5039; EG-NEXT:     ASHR * T7.Y, PV.X, literal.y,
5040; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
5041  %load = load <4 x i16>, <4 x i16> addrspace(4)* %in
5042  %ext = sext <4 x i16> %load to <4 x i64>
5043  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
5044  ret void
5045}
5046
5047define amdgpu_kernel void @constant_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
5048; GCN-NOHSA-SI-LABEL: constant_zextload_v8i16_to_v8i64:
5049; GCN-NOHSA-SI:       ; %bb.0:
5050; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
5051; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5052; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
5053; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
5054; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
5055; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
5056; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v1
5057; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5058; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s8, s5, 16
5059; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s9, s7, 16
5060; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s10, s6, 16
5061; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s11, s4, 16
5062; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, 0xffff
5063; GCN-NOHSA-SI-NEXT:    s_and_b32 s6, s6, 0xffff
5064; GCN-NOHSA-SI-NEXT:    s_and_b32 s7, s7, 0xffff
5065; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, 0xffff
5066; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s7
5067; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s9
5068; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
5069; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5070; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
5071; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s8
5072; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
5073; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5074; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
5075; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s10
5076; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
5077; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5078; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
5079; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s11
5080; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5081; GCN-NOHSA-SI-NEXT:    s_endpgm
5082;
5083; GCN-HSA-LABEL: constant_zextload_v8i16_to_v8i64:
5084; GCN-HSA:       ; %bb.0:
5085; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5086; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
5087; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v1
5088; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5089; GCN-HSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
5090; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5091; GCN-HSA-NEXT:    s_lshr_b32 s8, s5, 16
5092; GCN-HSA-NEXT:    s_lshr_b32 s2, s7, 16
5093; GCN-HSA-NEXT:    s_lshr_b32 s9, s6, 16
5094; GCN-HSA-NEXT:    s_lshr_b32 s10, s4, 16
5095; GCN-HSA-NEXT:    s_and_b32 s4, s4, 0xffff
5096; GCN-HSA-NEXT:    s_and_b32 s6, s6, 0xffff
5097; GCN-HSA-NEXT:    s_and_b32 s5, s5, 0xffff
5098; GCN-HSA-NEXT:    s_and_b32 s3, s7, 0xffff
5099; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
5100; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
5101; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s3
5102; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5103; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5104; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5105; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
5106; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5107; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5108; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5109; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5110; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
5111; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s5
5112; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s8
5113; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5114; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5115; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5116; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
5117; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s9
5118; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5119; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5120; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
5121; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
5122; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s10
5123; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
5124; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5125; GCN-HSA-NEXT:    s_endpgm
5126;
5127; GCN-NOHSA-VI-LABEL: constant_zextload_v8i16_to_v8i64:
5128; GCN-NOHSA-VI:       ; %bb.0:
5129; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
5130; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
5131; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, v1
5132; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5133; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
5134; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
5135; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
5136; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5137; GCN-NOHSA-VI-NEXT:    s_and_b32 s11, s7, 0xffff
5138; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s7, s7, 16
5139; GCN-NOHSA-VI-NEXT:    s_and_b32 s10, s6, 0xffff
5140; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s6, s6, 16
5141; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s11
5142; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
5143; GCN-NOHSA-VI-NEXT:    s_and_b32 s9, s5, 0xffff
5144; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s5, s5, 16
5145; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
5146; GCN-NOHSA-VI-NEXT:    s_and_b32 s8, s4, 0xffff
5147; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
5148; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s6
5149; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s4, s4, 16
5150; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
5151; GCN-NOHSA-VI-NEXT:    s_nop 0
5152; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s9
5153; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
5154; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
5155; GCN-NOHSA-VI-NEXT:    s_nop 0
5156; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
5157; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
5158; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5159; GCN-NOHSA-VI-NEXT:    s_endpgm
5160;
5161; EG-LABEL: constant_zextload_v8i16_to_v8i64:
5162; EG:       ; %bb.0:
5163; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
5164; EG-NEXT:    TEX 0 @8
5165; EG-NEXT:    ALU 30, @11, KC0[CB0:0-32], KC1[]
5166; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T14.X, 0
5167; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T13.X, 0
5168; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T12.X, 0
5169; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T11.X, 1
5170; EG-NEXT:    CF_END
5171; EG-NEXT:    Fetch clause starting at 8:
5172; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
5173; EG-NEXT:    ALU clause starting at 10:
5174; EG-NEXT:     MOV * T7.X, KC0[2].Z,
5175; EG-NEXT:    ALU clause starting at 11:
5176; EG-NEXT:     LSHR * T8.Z, T7.W, literal.x,
5177; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5178; EG-NEXT:     AND_INT T8.X, T7.W, literal.x,
5179; EG-NEXT:     MOV T8.Y, 0.0,
5180; EG-NEXT:     LSHR T9.Z, T7.Z, literal.y,
5181; EG-NEXT:     AND_INT * T9.X, T7.Z, literal.x,
5182; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
5183; EG-NEXT:     MOV T9.Y, 0.0,
5184; EG-NEXT:     LSHR * T10.Z, T7.Y, literal.x,
5185; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5186; EG-NEXT:     AND_INT T10.X, T7.Y, literal.x,
5187; EG-NEXT:     MOV T10.Y, 0.0,
5188; EG-NEXT:     LSHR T7.Z, T7.X, literal.y,
5189; EG-NEXT:     AND_INT * T7.X, T7.X, literal.x,
5190; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
5191; EG-NEXT:     MOV T7.Y, 0.0,
5192; EG-NEXT:     MOV T8.W, 0.0,
5193; EG-NEXT:     MOV * T9.W, 0.0,
5194; EG-NEXT:     MOV T10.W, 0.0,
5195; EG-NEXT:     MOV * T7.W, 0.0,
5196; EG-NEXT:     LSHR T11.X, KC0[2].Y, literal.x,
5197; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5198; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5199; EG-NEXT:     LSHR T12.X, PV.W, literal.x,
5200; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5201; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
5202; EG-NEXT:     LSHR T13.X, PV.W, literal.x,
5203; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5204; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
5205; EG-NEXT:     LSHR * T14.X, PV.W, literal.x,
5206; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5207  %load = load <8 x i16>, <8 x i16> addrspace(4)* %in
5208  %ext = zext <8 x i16> %load to <8 x i64>
5209  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
5210  ret void
5211}
5212
5213define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
5214; GCN-NOHSA-SI-LABEL: constant_sextload_v8i16_to_v8i64:
5215; GCN-NOHSA-SI:       ; %bb.0:
5216; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
5217; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5218; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
5219; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
5220; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
5221; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5222; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s7
5223; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s5
5224; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s12, s6, 16
5225; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s14, s4, 16
5226; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[16:17], s[4:5], 0x100000
5227; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[18:19], s[6:7], 0x100000
5228; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[4:5], s[4:5], 48
5229; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[6:7], s[6:7], 48
5230; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x100000
5231; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x100000
5232; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x100000
5233; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x100000
5234; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
5235; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s9
5236; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s6
5237; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s7
5238; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
5239; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5240; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
5241; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s11
5242; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s4
5243; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s5
5244; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
5245; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5246; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s18
5247; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
5248; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s16
5249; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s17
5250; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s12
5251; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s13
5252; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
5253; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s14
5254; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s15
5255; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
5256; GCN-NOHSA-SI-NEXT:    s_endpgm
5257;
5258; GCN-HSA-LABEL: constant_sextload_v8i16_to_v8i64:
5259; GCN-HSA:       ; %bb.0:
5260; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5261; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5262; GCN-HSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
5263; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5264; GCN-HSA-NEXT:    s_mov_b32 s2, s7
5265; GCN-HSA-NEXT:    s_mov_b32 s8, s5
5266; GCN-HSA-NEXT:    s_lshr_b32 s10, s6, 16
5267; GCN-HSA-NEXT:    s_lshr_b32 s12, s4, 16
5268; GCN-HSA-NEXT:    s_bfe_i64 s[14:15], s[4:5], 0x100000
5269; GCN-HSA-NEXT:    s_bfe_i64 s[16:17], s[6:7], 0x100000
5270; GCN-HSA-NEXT:    s_ashr_i64 s[4:5], s[4:5], 48
5271; GCN-HSA-NEXT:    s_ashr_i64 s[6:7], s[6:7], 48
5272; GCN-HSA-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x100000
5273; GCN-HSA-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x100000
5274; GCN-HSA-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x100000
5275; GCN-HSA-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x100000
5276; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
5277; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
5278; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
5279; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5280; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5281; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5282; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
5283; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s6
5284; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s7
5285; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5286; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5287; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5288; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5289; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
5290; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
5291; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s9
5292; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s4
5293; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s5
5294; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5295; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5296; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5297; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s16
5298; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s17
5299; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s10
5300; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s11
5301; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5302; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5303; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
5304; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s14
5305; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s15
5306; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s12
5307; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s13
5308; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
5309; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5310; GCN-HSA-NEXT:    s_endpgm
5311;
5312; GCN-NOHSA-VI-LABEL: constant_sextload_v8i16_to_v8i64:
5313; GCN-NOHSA-VI:       ; %bb.0:
5314; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
5315; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5316; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
5317; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
5318; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
5319; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5320; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[14:15], s[6:7], 0x100000
5321; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s6, s6, 16
5322; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[16:17], s[6:7], 0x100000
5323; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, s7
5324; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[18:19], s[6:7], 0x100000
5325; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s6, s7, 16
5326; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x100000
5327; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s5
5328; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s12, s5, 16
5329; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s18
5330; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s19
5331; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s6
5332; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s7
5333; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[8:9], s[4:5], 0x100000
5334; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s4, s4, 16
5335; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x100000
5336; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x100000
5337; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
5338; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x100000
5339; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s14
5340; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s15
5341; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s16
5342; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s17
5343; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
5344; GCN-NOHSA-VI-NEXT:    s_nop 0
5345; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
5346; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s11
5347; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s12
5348; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s13
5349; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
5350; GCN-NOHSA-VI-NEXT:    s_nop 0
5351; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
5352; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s9
5353; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
5354; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s5
5355; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5356; GCN-NOHSA-VI-NEXT:    s_endpgm
5357;
5358; EG-LABEL: constant_sextload_v8i16_to_v8i64:
5359; EG:       ; %bb.0:
5360; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
5361; EG-NEXT:    TEX 0 @8
5362; EG-NEXT:    ALU 33, @11, KC0[CB0:0-32], KC1[]
5363; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T7.X, 0
5364; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T11.X, 0
5365; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T9.X, 0
5366; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T8.X, 1
5367; EG-NEXT:    CF_END
5368; EG-NEXT:    Fetch clause starting at 8:
5369; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
5370; EG-NEXT:    ALU clause starting at 10:
5371; EG-NEXT:     MOV * T7.X, KC0[2].Z,
5372; EG-NEXT:    ALU clause starting at 11:
5373; EG-NEXT:     LSHR T8.X, KC0[2].Y, literal.x,
5374; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5375; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5376; EG-NEXT:     LSHR T9.X, PV.W, literal.x,
5377; EG-NEXT:     ADD_INT T0.W, KC0[2].Y, literal.y,
5378; EG-NEXT:     ASHR * T10.W, T7.X, literal.z,
5379; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
5380; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5381; EG-NEXT:     LSHR T11.X, PV.W, literal.x,
5382; EG-NEXT:     ASHR T10.Z, T7.X, literal.y,
5383; EG-NEXT:     ASHR * T12.W, T7.Y, literal.z,
5384; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5385; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5386; EG-NEXT:     BFE_INT T10.X, T7.X, 0.0, literal.x,
5387; EG-NEXT:     ASHR T12.Z, T7.Y, literal.x,
5388; EG-NEXT:     ASHR * T13.W, T7.Z, literal.y,
5389; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
5390; EG-NEXT:     BFE_INT T12.X, T7.Y, 0.0, literal.x,
5391; EG-NEXT:     ASHR T10.Y, PV.X, literal.y,
5392; EG-NEXT:     ASHR T13.Z, T7.Z, literal.x,
5393; EG-NEXT:     ASHR * T14.W, T7.W, literal.y,
5394; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
5395; EG-NEXT:     BFE_INT T13.X, T7.Z, 0.0, literal.x,
5396; EG-NEXT:     ASHR T12.Y, PV.X, literal.y,
5397; EG-NEXT:     ASHR * T14.Z, T7.W, literal.x,
5398; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
5399; EG-NEXT:     BFE_INT T14.X, T7.W, 0.0, literal.x,
5400; EG-NEXT:     ASHR T13.Y, PV.X, literal.y,
5401; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
5402; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
5403; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
5404; EG-NEXT:     LSHR T7.X, PV.W, literal.x,
5405; EG-NEXT:     ASHR * T14.Y, PV.X, literal.y,
5406; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
5407  %load = load <8 x i16>, <8 x i16> addrspace(4)* %in
5408  %ext = sext <8 x i16> %load to <8 x i64>
5409  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
5410  ret void
5411}
5412
5413define amdgpu_kernel void @constant_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
5414; GCN-NOHSA-SI-LABEL: constant_zextload_v16i16_to_v16i64:
5415; GCN-NOHSA-SI:       ; %bb.0:
5416; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
5417; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5418; GCN-NOHSA-SI-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
5419; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
5420; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
5421; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
5422; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v1
5423; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5424; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s12, s5, 16
5425; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s13, s7, 16
5426; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s14, s11, 16
5427; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s15, s9, 16
5428; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s16, s8, 16
5429; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s17, s10, 16
5430; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s18, s6, 16
5431; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s19, s4, 16
5432; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, 0xffff
5433; GCN-NOHSA-SI-NEXT:    s_and_b32 s6, s6, 0xffff
5434; GCN-NOHSA-SI-NEXT:    s_and_b32 s10, s10, 0xffff
5435; GCN-NOHSA-SI-NEXT:    s_and_b32 s8, s8, 0xffff
5436; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, 0xffff
5437; GCN-NOHSA-SI-NEXT:    s_and_b32 s7, s7, 0xffff
5438; GCN-NOHSA-SI-NEXT:    s_and_b32 s9, s9, 0xffff
5439; GCN-NOHSA-SI-NEXT:    s_and_b32 s11, s11, 0xffff
5440; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s9
5441; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s15
5442; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
5443; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5444; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s11
5445; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s14
5446; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
5447; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5448; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s7
5449; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s13
5450; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
5451; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5452; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
5453; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s12
5454; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
5455; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5456; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
5457; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s16
5458; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
5459; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5460; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
5461; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s17
5462; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
5463; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5464; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
5465; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s18
5466; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
5467; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5468; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
5469; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s19
5470; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5471; GCN-NOHSA-SI-NEXT:    s_endpgm
5472;
5473; GCN-HSA-LABEL: constant_zextload_v16i16_to_v16i64:
5474; GCN-HSA:       ; %bb.0:
5475; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5476; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
5477; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v1
5478; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5479; GCN-HSA-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
5480; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5481; GCN-HSA-NEXT:    s_lshr_b32 s12, s5, 16
5482; GCN-HSA-NEXT:    s_lshr_b32 s13, s7, 16
5483; GCN-HSA-NEXT:    s_lshr_b32 s14, s11, 16
5484; GCN-HSA-NEXT:    s_lshr_b32 s2, s9, 16
5485; GCN-HSA-NEXT:    s_lshr_b32 s15, s8, 16
5486; GCN-HSA-NEXT:    s_lshr_b32 s16, s10, 16
5487; GCN-HSA-NEXT:    s_lshr_b32 s17, s6, 16
5488; GCN-HSA-NEXT:    s_lshr_b32 s18, s4, 16
5489; GCN-HSA-NEXT:    s_and_b32 s4, s4, 0xffff
5490; GCN-HSA-NEXT:    s_and_b32 s6, s6, 0xffff
5491; GCN-HSA-NEXT:    s_and_b32 s10, s10, 0xffff
5492; GCN-HSA-NEXT:    s_and_b32 s8, s8, 0xffff
5493; GCN-HSA-NEXT:    s_and_b32 s5, s5, 0xffff
5494; GCN-HSA-NEXT:    s_and_b32 s7, s7, 0xffff
5495; GCN-HSA-NEXT:    s_and_b32 s11, s11, 0xffff
5496; GCN-HSA-NEXT:    s_and_b32 s3, s9, 0xffff
5497; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
5498; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x50
5499; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s3
5500; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5501; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5502; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5503; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x70
5504; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5505; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5506; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5507; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5508; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
5509; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s11
5510; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s14
5511; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5512; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5513; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5514; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5515; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
5516; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s7
5517; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s13
5518; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5519; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5520; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5521; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5522; GCN-HSA-NEXT:    s_add_u32 s2, s0, 64
5523; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s5
5524; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s12
5525; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5526; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5527; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5528; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5529; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x60
5530; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
5531; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s15
5532; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5533; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5534; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5535; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5536; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
5537; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s10
5538; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s16
5539; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5540; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5541; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5542; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
5543; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s17
5544; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5545; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5546; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
5547; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
5548; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s18
5549; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
5550; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5551; GCN-HSA-NEXT:    s_endpgm
5552;
5553; GCN-NOHSA-VI-LABEL: constant_zextload_v16i16_to_v16i64:
5554; GCN-NOHSA-VI:       ; %bb.0:
5555; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
5556; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
5557; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, v1
5558; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5559; GCN-NOHSA-VI-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
5560; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
5561; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
5562; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5563; GCN-NOHSA-VI-NEXT:    s_and_b32 s19, s9, 0xffff
5564; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s9, s9, 16
5565; GCN-NOHSA-VI-NEXT:    s_and_b32 s18, s8, 0xffff
5566; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s8, s8, 16
5567; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s19
5568; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
5569; GCN-NOHSA-VI-NEXT:    s_and_b32 s17, s11, 0xffff
5570; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s11, s11, 16
5571; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
5572; GCN-NOHSA-VI-NEXT:    s_and_b32 s16, s10, 0xffff
5573; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s18
5574; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s8
5575; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s10, s10, 16
5576; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
5577; GCN-NOHSA-VI-NEXT:    s_and_b32 s15, s7, 0xffff
5578; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s17
5579; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
5580; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s7, s7, 16
5581; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
5582; GCN-NOHSA-VI-NEXT:    s_and_b32 s14, s6, 0xffff
5583; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s16
5584; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s10
5585; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s6, s6, 16
5586; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
5587; GCN-NOHSA-VI-NEXT:    s_and_b32 s13, s5, 0xffff
5588; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s15
5589; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
5590; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s5, s5, 16
5591; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
5592; GCN-NOHSA-VI-NEXT:    s_and_b32 s12, s4, 0xffff
5593; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s14
5594; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s6
5595; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s4, s4, 16
5596; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
5597; GCN-NOHSA-VI-NEXT:    s_nop 0
5598; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s13
5599; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
5600; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
5601; GCN-NOHSA-VI-NEXT:    s_nop 0
5602; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s12
5603; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
5604; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5605; GCN-NOHSA-VI-NEXT:    s_endpgm
5606;
5607; EG-LABEL: constant_zextload_v16i16_to_v16i64:
5608; EG:       ; %bb.0:
5609; EG-NEXT:    ALU 0, @16, KC0[CB0:0-32], KC1[]
5610; EG-NEXT:    TEX 1 @12
5611; EG-NEXT:    ALU 62, @17, KC0[CB0:0-32], KC1[]
5612; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T26.X, 0
5613; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T25.X, 0
5614; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T24.X, 0
5615; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T23.X, 0
5616; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T22.X, 0
5617; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T21.X, 0
5618; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T20.X, 0
5619; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T19.X, 1
5620; EG-NEXT:    CF_END
5621; EG-NEXT:    Fetch clause starting at 12:
5622; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
5623; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
5624; EG-NEXT:    ALU clause starting at 16:
5625; EG-NEXT:     MOV * T11.X, KC0[2].Z,
5626; EG-NEXT:    ALU clause starting at 17:
5627; EG-NEXT:     LSHR * T13.Z, T12.W, literal.x,
5628; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5629; EG-NEXT:     AND_INT T13.X, T12.W, literal.x,
5630; EG-NEXT:     MOV T13.Y, 0.0,
5631; EG-NEXT:     LSHR T14.Z, T12.Z, literal.y,
5632; EG-NEXT:     AND_INT * T14.X, T12.Z, literal.x,
5633; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
5634; EG-NEXT:     MOV T14.Y, 0.0,
5635; EG-NEXT:     LSHR * T15.Z, T12.Y, literal.x,
5636; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5637; EG-NEXT:     AND_INT T15.X, T12.Y, literal.x,
5638; EG-NEXT:     MOV T15.Y, 0.0,
5639; EG-NEXT:     LSHR T12.Z, T12.X, literal.y,
5640; EG-NEXT:     AND_INT * T12.X, T12.X, literal.x,
5641; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
5642; EG-NEXT:     MOV T12.Y, 0.0,
5643; EG-NEXT:     LSHR * T16.Z, T11.W, literal.x,
5644; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5645; EG-NEXT:     AND_INT T16.X, T11.W, literal.x,
5646; EG-NEXT:     MOV T16.Y, 0.0,
5647; EG-NEXT:     LSHR T17.Z, T11.Z, literal.y,
5648; EG-NEXT:     AND_INT * T17.X, T11.Z, literal.x,
5649; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
5650; EG-NEXT:     MOV T17.Y, 0.0,
5651; EG-NEXT:     LSHR * T18.Z, T11.Y, literal.x,
5652; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5653; EG-NEXT:     AND_INT T18.X, T11.Y, literal.x,
5654; EG-NEXT:     MOV T18.Y, 0.0,
5655; EG-NEXT:     LSHR T11.Z, T11.X, literal.y,
5656; EG-NEXT:     AND_INT * T11.X, T11.X, literal.x,
5657; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
5658; EG-NEXT:     MOV T11.Y, 0.0,
5659; EG-NEXT:     MOV T13.W, 0.0,
5660; EG-NEXT:     MOV * T14.W, 0.0,
5661; EG-NEXT:     MOV T15.W, 0.0,
5662; EG-NEXT:     MOV * T12.W, 0.0,
5663; EG-NEXT:     MOV T16.W, 0.0,
5664; EG-NEXT:     MOV * T17.W, 0.0,
5665; EG-NEXT:     MOV T18.W, 0.0,
5666; EG-NEXT:     MOV * T11.W, 0.0,
5667; EG-NEXT:     LSHR T19.X, KC0[2].Y, literal.x,
5668; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5669; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5670; EG-NEXT:     LSHR T20.X, PV.W, literal.x,
5671; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5672; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
5673; EG-NEXT:     LSHR T21.X, PV.W, literal.x,
5674; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5675; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
5676; EG-NEXT:     LSHR T22.X, PV.W, literal.x,
5677; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5678; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
5679; EG-NEXT:     LSHR T23.X, PV.W, literal.x,
5680; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5681; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
5682; EG-NEXT:     LSHR T24.X, PV.W, literal.x,
5683; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5684; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
5685; EG-NEXT:     LSHR T25.X, PV.W, literal.x,
5686; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5687; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
5688; EG-NEXT:     LSHR * T26.X, PV.W, literal.x,
5689; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5690  %load = load <16 x i16>, <16 x i16> addrspace(4)* %in
5691  %ext = zext <16 x i16> %load to <16 x i64>
5692  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
5693  ret void
5694}
5695
5696define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
5697; GCN-NOHSA-SI-LABEL: constant_sextload_v16i16_to_v16i64:
5698; GCN-NOHSA-SI:       ; %bb.0:
5699; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
5700; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5701; GCN-NOHSA-SI-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
5702; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
5703; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
5704; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5705; GCN-NOHSA-SI-NEXT:    s_mov_b32 s12, s11
5706; GCN-NOHSA-SI-NEXT:    s_mov_b32 s14, s9
5707; GCN-NOHSA-SI-NEXT:    s_mov_b32 s16, s7
5708; GCN-NOHSA-SI-NEXT:    s_mov_b32 s18, s5
5709; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s20, s10, 16
5710; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s22, s8, 16
5711; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s24, s6, 16
5712; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s26, s4, 16
5713; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[28:29], s[4:5], 0x100000
5714; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[30:31], s[6:7], 0x100000
5715; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[34:35], s[8:9], 0x100000
5716; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[36:37], s[10:11], 0x100000
5717; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[4:5], s[4:5], 48
5718; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[6:7], s[6:7], 48
5719; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[8:9], s[8:9], 48
5720; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[10:11], s[10:11], 48
5721; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x100000
5722; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x100000
5723; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x100000
5724; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x100000
5725; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x100000
5726; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x100000
5727; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x100000
5728; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x100000
5729; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s12
5730; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s13
5731; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s10
5732; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s11
5733; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
5734; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5735; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s14
5736; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s15
5737; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s8
5738; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s9
5739; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
5740; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5741; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s16
5742; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s17
5743; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s6
5744; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s7
5745; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
5746; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5747; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s18
5748; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
5749; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s4
5750; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s5
5751; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
5752; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5753; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s36
5754; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s37
5755; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s34
5756; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s35
5757; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s30
5758; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s31
5759; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s28
5760; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s29
5761; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s20
5762; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s21
5763; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
5764; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s22
5765; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s23
5766; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64
5767; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s24
5768; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s25
5769; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
5770; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s26
5771; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s27
5772; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0
5773; GCN-NOHSA-SI-NEXT:    s_endpgm
5774;
5775; GCN-HSA-LABEL: constant_sextload_v16i16_to_v16i64:
5776; GCN-HSA:       ; %bb.0:
5777; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5778; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5779; GCN-HSA-NEXT:    s_load_dwordx8 s[8:15], s[2:3], 0x0
5780; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5781; GCN-HSA-NEXT:    s_mov_b32 s6, s15
5782; GCN-HSA-NEXT:    s_mov_b32 s16, s13
5783; GCN-HSA-NEXT:    s_mov_b32 s18, s11
5784; GCN-HSA-NEXT:    s_mov_b32 s20, s9
5785; GCN-HSA-NEXT:    s_lshr_b32 s22, s14, 16
5786; GCN-HSA-NEXT:    s_lshr_b32 s24, s12, 16
5787; GCN-HSA-NEXT:    s_lshr_b32 s26, s10, 16
5788; GCN-HSA-NEXT:    s_lshr_b32 s28, s8, 16
5789; GCN-HSA-NEXT:    s_bfe_i64 s[34:35], s[14:15], 0x100000
5790; GCN-HSA-NEXT:    s_ashr_i64 s[14:15], s[14:15], 48
5791; GCN-HSA-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x100000
5792; GCN-HSA-NEXT:    s_bfe_i64 s[2:3], s[8:9], 0x100000
5793; GCN-HSA-NEXT:    s_bfe_i64 s[4:5], s[10:11], 0x100000
5794; GCN-HSA-NEXT:    s_bfe_i64 s[30:31], s[12:13], 0x100000
5795; GCN-HSA-NEXT:    s_ashr_i64 s[8:9], s[8:9], 48
5796; GCN-HSA-NEXT:    s_ashr_i64 s[10:11], s[10:11], 48
5797; GCN-HSA-NEXT:    s_ashr_i64 s[12:13], s[12:13], 48
5798; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
5799; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s7
5800; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s14
5801; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s15
5802; GCN-HSA-NEXT:    s_bfe_i64 s[6:7], s[28:29], 0x100000
5803; GCN-HSA-NEXT:    s_bfe_i64 s[14:15], s[26:27], 0x100000
5804; GCN-HSA-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x100000
5805; GCN-HSA-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x100000
5806; GCN-HSA-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x100000
5807; GCN-HSA-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x100000
5808; GCN-HSA-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x100000
5809; GCN-HSA-NEXT:    s_add_u32 s26, s0, 0x70
5810; GCN-HSA-NEXT:    s_addc_u32 s27, s1, 0
5811; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s26
5812; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s12
5813; GCN-HSA-NEXT:    s_add_u32 s12, s0, 0x50
5814; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s27
5815; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s13
5816; GCN-HSA-NEXT:    s_addc_u32 s13, s1, 0
5817; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s12
5818; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s16
5819; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s17
5820; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s13
5821; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
5822; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
5823; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s10
5824; GCN-HSA-NEXT:    s_add_u32 s10, s0, 48
5825; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s11
5826; GCN-HSA-NEXT:    s_addc_u32 s11, s1, 0
5827; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s10
5828; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s18
5829; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s19
5830; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s11
5831; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5832; GCN-HSA-NEXT:    s_nop 0
5833; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s8
5834; GCN-HSA-NEXT:    s_add_u32 s8, s0, 16
5835; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s9
5836; GCN-HSA-NEXT:    s_addc_u32 s9, s1, 0
5837; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s8
5838; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s20
5839; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s21
5840; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s9
5841; GCN-HSA-NEXT:    s_add_u32 s8, s0, 0x60
5842; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5843; GCN-HSA-NEXT:    s_addc_u32 s9, s1, 0
5844; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s8
5845; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s34
5846; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s35
5847; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s22
5848; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s23
5849; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s9
5850; GCN-HSA-NEXT:    s_add_u32 s8, s0, 64
5851; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5852; GCN-HSA-NEXT:    s_addc_u32 s9, s1, 0
5853; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s8
5854; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s30
5855; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s31
5856; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s24
5857; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s25
5858; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s9
5859; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5860; GCN-HSA-NEXT:    s_nop 0
5861; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
5862; GCN-HSA-NEXT:    s_add_u32 s4, s0, 32
5863; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
5864; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
5865; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
5866; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s14
5867; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s15
5868; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
5869; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5870; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
5871; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
5872; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
5873; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s6
5874; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s7
5875; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
5876; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5877; GCN-HSA-NEXT:    s_endpgm
5878;
5879; GCN-NOHSA-VI-LABEL: constant_sextload_v16i16_to_v16i64:
5880; GCN-NOHSA-VI:       ; %bb.0:
5881; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x24
5882; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5883; GCN-NOHSA-VI-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
5884; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, 0xf000
5885; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, -1
5886; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5887; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[30:31], s[4:5], 0x100000
5888; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s4, s4, 16
5889; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[34:35], s[4:5], 0x100000
5890; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s5
5891; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[24:25], s[6:7], 0x100000
5892; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s6, s6, 16
5893; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[36:37], s[4:5], 0x100000
5894; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s4, s5, 16
5895; GCN-NOHSA-VI-NEXT:    s_mov_b32 s14, s1
5896; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s18, s1, 16
5897; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[26:27], s[6:7], 0x100000
5898; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, s7
5899; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x100000
5900; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[16:17], s[14:15], 0x100000
5901; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[14:15], s[18:19], 0x100000
5902; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[18:19], s[2:3], 0x100000
5903; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s2, s2, 16
5904; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[28:29], s[6:7], 0x100000
5905; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s6, s7, 16
5906; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s36
5907; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s37
5908; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
5909; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s5
5910; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[20:21], s[2:3], 0x100000
5911; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, s3
5912; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x100000
5913; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:80
5914; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[22:23], s[2:3], 0x100000
5915; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s30
5916; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s31
5917; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s34
5918; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s35
5919; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s2, s3, 16
5920; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:64
5921; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x100000
5922; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s28
5923; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s29
5924; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s6
5925; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s7
5926; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:112
5927; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[12:13], s[0:1], 0x100000
5928; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s24
5929; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s25
5930; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s26
5931; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s27
5932; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:96
5933; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s0, s0, 16
5934; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s22
5935; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s23
5936; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s2
5937; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s3
5938; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:48
5939; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[0:1], s[0:1], 0x100000
5940; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s18
5941; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s19
5942; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s20
5943; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s21
5944; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:32
5945; GCN-NOHSA-VI-NEXT:    s_nop 0
5946; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s16
5947; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s17
5948; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s14
5949; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s15
5950; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16
5951; GCN-NOHSA-VI-NEXT:    s_nop 0
5952; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s12
5953; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s13
5954; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s0
5955; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s1
5956; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
5957; GCN-NOHSA-VI-NEXT:    s_endpgm
5958;
5959; EG-LABEL: constant_sextload_v16i16_to_v16i64:
5960; EG:       ; %bb.0:
5961; EG-NEXT:    ALU 0, @16, KC0[CB0:0-32], KC1[]
5962; EG-NEXT:    TEX 1 @12
5963; EG-NEXT:    ALU 65, @17, KC0[CB0:0-32], KC1[]
5964; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T12.X, 0
5965; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T20.X, 0
5966; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T18.X, 0
5967; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T17.X, 0
5968; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T16.X, 0
5969; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T15.X, 0
5970; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T14.X, 0
5971; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T13.X, 1
5972; EG-NEXT:    CF_END
5973; EG-NEXT:    Fetch clause starting at 12:
5974; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
5975; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
5976; EG-NEXT:    ALU clause starting at 16:
5977; EG-NEXT:     MOV * T11.X, KC0[2].Z,
5978; EG-NEXT:    ALU clause starting at 17:
5979; EG-NEXT:     LSHR T13.X, KC0[2].Y, literal.x,
5980; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5981; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5982; EG-NEXT:     LSHR T14.X, PV.W, literal.x,
5983; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5984; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
5985; EG-NEXT:     LSHR T15.X, PV.W, literal.x,
5986; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5987; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
5988; EG-NEXT:     LSHR T16.X, PV.W, literal.x,
5989; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5990; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
5991; EG-NEXT:     LSHR T17.X, PV.W, literal.x,
5992; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5993; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
5994; EG-NEXT:     LSHR T18.X, PV.W, literal.x,
5995; EG-NEXT:     ADD_INT T0.W, KC0[2].Y, literal.y,
5996; EG-NEXT:     ASHR * T19.W, T11.X, literal.z,
5997; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
5998; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5999; EG-NEXT:     LSHR T20.X, PV.W, literal.x,
6000; EG-NEXT:     ASHR T19.Z, T11.X, literal.y,
6001; EG-NEXT:     ASHR * T21.W, T11.Y, literal.z,
6002; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6003; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6004; EG-NEXT:     BFE_INT T19.X, T11.X, 0.0, literal.x,
6005; EG-NEXT:     ASHR T21.Z, T11.Y, literal.x,
6006; EG-NEXT:     ASHR * T22.W, T11.Z, literal.y,
6007; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6008; EG-NEXT:     BFE_INT T21.X, T11.Y, 0.0, literal.x,
6009; EG-NEXT:     ASHR T19.Y, PV.X, literal.y,
6010; EG-NEXT:     ASHR T22.Z, T11.Z, literal.x,
6011; EG-NEXT:     ASHR * T23.W, T11.W, literal.y,
6012; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6013; EG-NEXT:     BFE_INT T22.X, T11.Z, 0.0, literal.x,
6014; EG-NEXT:     ASHR T21.Y, PV.X, literal.y,
6015; EG-NEXT:     ASHR T23.Z, T11.W, literal.x,
6016; EG-NEXT:     ASHR * T24.W, T12.X, literal.y,
6017; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6018; EG-NEXT:     BFE_INT T23.X, T11.W, 0.0, literal.x,
6019; EG-NEXT:     ASHR T22.Y, PV.X, literal.y,
6020; EG-NEXT:     ASHR T24.Z, T12.X, literal.x,
6021; EG-NEXT:     ASHR * T11.W, T12.Y, literal.y,
6022; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6023; EG-NEXT:     BFE_INT T24.X, T12.X, 0.0, literal.x,
6024; EG-NEXT:     ASHR T23.Y, PV.X, literal.y,
6025; EG-NEXT:     ASHR T11.Z, T12.Y, literal.x,
6026; EG-NEXT:     ASHR * T25.W, T12.Z, literal.y,
6027; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6028; EG-NEXT:     BFE_INT T11.X, T12.Y, 0.0, literal.x,
6029; EG-NEXT:     ASHR T24.Y, PV.X, literal.y,
6030; EG-NEXT:     ASHR T25.Z, T12.Z, literal.x,
6031; EG-NEXT:     ASHR * T26.W, T12.W, literal.y,
6032; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6033; EG-NEXT:     BFE_INT T25.X, T12.Z, 0.0, literal.x,
6034; EG-NEXT:     ASHR T11.Y, PV.X, literal.y,
6035; EG-NEXT:     ASHR * T26.Z, T12.W, literal.x,
6036; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6037; EG-NEXT:     BFE_INT T26.X, T12.W, 0.0, literal.x,
6038; EG-NEXT:     ASHR T25.Y, PV.X, literal.y,
6039; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
6040; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6041; EG-NEXT:    112(1.569454e-43), 0(0.000000e+00)
6042; EG-NEXT:     LSHR T12.X, PV.W, literal.x,
6043; EG-NEXT:     ASHR * T26.Y, PV.X, literal.y,
6044; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
6045  %load = load <16 x i16>, <16 x i16> addrspace(4)* %in
6046  %ext = sext <16 x i16> %load to <16 x i64>
6047  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
6048  ret void
6049}
6050
6051define amdgpu_kernel void @constant_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
6052; GCN-NOHSA-SI-LABEL: constant_zextload_v32i16_to_v32i64:
6053; GCN-NOHSA-SI:       ; %bb.0:
6054; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
6055; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
6056; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[4:19], s[2:3], 0x0
6057; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
6058; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s20, s5, 16
6059; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s21, s7, 16
6060; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s22, s9, 16
6061; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s23, s11, 16
6062; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s24, s13, 16
6063; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s25, s15, 16
6064; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s26, s17, 16
6065; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s27, s19, 16
6066; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s28, s18, 16
6067; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s29, s16, 16
6068; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s30, s14, 16
6069; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s31, s12, 16
6070; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s33, s10, 16
6071; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s34, s8, 16
6072; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s35, s6, 16
6073; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s36, s4, 16
6074; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, 0xffff
6075; GCN-NOHSA-SI-NEXT:    s_and_b32 s6, s6, 0xffff
6076; GCN-NOHSA-SI-NEXT:    s_and_b32 s8, s8, 0xffff
6077; GCN-NOHSA-SI-NEXT:    s_and_b32 s10, s10, 0xffff
6078; GCN-NOHSA-SI-NEXT:    s_and_b32 s12, s12, 0xffff
6079; GCN-NOHSA-SI-NEXT:    s_and_b32 s14, s14, 0xffff
6080; GCN-NOHSA-SI-NEXT:    s_and_b32 s16, s16, 0xffff
6081; GCN-NOHSA-SI-NEXT:    s_and_b32 s18, s18, 0xffff
6082; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, 0xffff
6083; GCN-NOHSA-SI-NEXT:    s_and_b32 s7, s7, 0xffff
6084; GCN-NOHSA-SI-NEXT:    s_and_b32 s9, s9, 0xffff
6085; GCN-NOHSA-SI-NEXT:    s_and_b32 s11, s11, 0xffff
6086; GCN-NOHSA-SI-NEXT:    s_and_b32 s13, s13, 0xffff
6087; GCN-NOHSA-SI-NEXT:    s_and_b32 s15, s15, 0xffff
6088; GCN-NOHSA-SI-NEXT:    s_and_b32 s17, s17, 0xffff
6089; GCN-NOHSA-SI-NEXT:    s_and_b32 s19, s19, 0xffff
6090; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
6091; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
6092; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
6093; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v1
6094; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s19
6095; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s27
6096; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
6097; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6098; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s17
6099; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s26
6100; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
6101; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6102; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s15
6103; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s25
6104; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
6105; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6106; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s13
6107; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s24
6108; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
6109; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6110; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s11
6111; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s23
6112; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
6113; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6114; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s9
6115; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s22
6116; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
6117; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6118; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s7
6119; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s21
6120; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
6121; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6122; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
6123; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s20
6124; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
6125; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6126; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s18
6127; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s28
6128; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
6129; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6130; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s16
6131; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s29
6132; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
6133; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6134; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s14
6135; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s30
6136; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
6137; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6138; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s12
6139; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s31
6140; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
6141; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6142; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
6143; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s33
6144; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
6145; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6146; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
6147; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s34
6148; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
6149; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6150; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
6151; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s35
6152; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
6153; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6154; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
6155; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s36
6156; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
6157; GCN-NOHSA-SI-NEXT:    s_endpgm
6158;
6159; GCN-HSA-LABEL: constant_zextload_v32i16_to_v32i64:
6160; GCN-HSA:       ; %bb.0:
6161; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
6162; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
6163; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v1
6164; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6165; GCN-HSA-NEXT:    s_load_dwordx16 s[4:19], s[2:3], 0x0
6166; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6167; GCN-HSA-NEXT:    s_lshr_b32 s3, s5, 16
6168; GCN-HSA-NEXT:    s_lshr_b32 s20, s7, 16
6169; GCN-HSA-NEXT:    s_lshr_b32 s21, s9, 16
6170; GCN-HSA-NEXT:    s_lshr_b32 s22, s11, 16
6171; GCN-HSA-NEXT:    s_lshr_b32 s23, s13, 16
6172; GCN-HSA-NEXT:    s_lshr_b32 s24, s15, 16
6173; GCN-HSA-NEXT:    s_lshr_b32 s25, s17, 16
6174; GCN-HSA-NEXT:    s_lshr_b32 s26, s19, 16
6175; GCN-HSA-NEXT:    s_lshr_b32 s27, s18, 16
6176; GCN-HSA-NEXT:    s_lshr_b32 s28, s16, 16
6177; GCN-HSA-NEXT:    s_lshr_b32 s29, s14, 16
6178; GCN-HSA-NEXT:    s_lshr_b32 s30, s12, 16
6179; GCN-HSA-NEXT:    s_lshr_b32 s31, s10, 16
6180; GCN-HSA-NEXT:    s_lshr_b32 s33, s8, 16
6181; GCN-HSA-NEXT:    s_lshr_b32 s34, s6, 16
6182; GCN-HSA-NEXT:    s_lshr_b32 s2, s4, 16
6183; GCN-HSA-NEXT:    s_and_b32 s35, s4, 0xffff
6184; GCN-HSA-NEXT:    s_and_b32 s6, s6, 0xffff
6185; GCN-HSA-NEXT:    s_and_b32 s8, s8, 0xffff
6186; GCN-HSA-NEXT:    s_and_b32 s10, s10, 0xffff
6187; GCN-HSA-NEXT:    s_and_b32 s12, s12, 0xffff
6188; GCN-HSA-NEXT:    s_and_b32 s14, s14, 0xffff
6189; GCN-HSA-NEXT:    s_and_b32 s16, s16, 0xffff
6190; GCN-HSA-NEXT:    s_and_b32 s18, s18, 0xffff
6191; GCN-HSA-NEXT:    s_and_b32 s36, s5, 0xffff
6192; GCN-HSA-NEXT:    s_and_b32 s7, s7, 0xffff
6193; GCN-HSA-NEXT:    s_and_b32 s9, s9, 0xffff
6194; GCN-HSA-NEXT:    s_and_b32 s11, s11, 0xffff
6195; GCN-HSA-NEXT:    s_and_b32 s13, s13, 0xffff
6196; GCN-HSA-NEXT:    s_and_b32 s15, s15, 0xffff
6197; GCN-HSA-NEXT:    s_and_b32 s17, s17, 0xffff
6198; GCN-HSA-NEXT:    s_and_b32 s19, s19, 0xffff
6199; GCN-HSA-NEXT:    s_add_u32 s4, s0, 0xf0
6200; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
6201; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
6202; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
6203; GCN-HSA-NEXT:    s_add_u32 s4, s0, 0xd0
6204; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
6205; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s5
6206; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s4
6207; GCN-HSA-NEXT:    s_add_u32 s4, s0, 0xb0
6208; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
6209; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s5
6210; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s4
6211; GCN-HSA-NEXT:    s_add_u32 s4, s0, 0x90
6212; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
6213; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s19
6214; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s26
6215; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s5
6216; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6217; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s4
6218; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s17
6219; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s25
6220; GCN-HSA-NEXT:    flat_store_dwordx4 v[6:7], v[0:3]
6221; GCN-HSA-NEXT:    s_add_u32 s4, s0, 0x70
6222; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s15
6223; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s24
6224; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
6225; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
6226; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s13
6227; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s23
6228; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
6229; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[0:3]
6230; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
6231; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s11
6232; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s22
6233; GCN-HSA-NEXT:    s_add_u32 s4, s0, 0x50
6234; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6235; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
6236; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
6237; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s9
6238; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s21
6239; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
6240; GCN-HSA-NEXT:    s_add_u32 s4, s0, 48
6241; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6242; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
6243; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
6244; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s7
6245; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s20
6246; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
6247; GCN-HSA-NEXT:    s_add_u32 s4, s0, 16
6248; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6249; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
6250; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
6251; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s36
6252; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s3
6253; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
6254; GCN-HSA-NEXT:    s_add_u32 s4, s0, 0xe0
6255; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6256; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
6257; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
6258; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s18
6259; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s27
6260; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
6261; GCN-HSA-NEXT:    s_add_u32 s4, s0, 0xc0
6262; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6263; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
6264; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
6265; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s16
6266; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s28
6267; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
6268; GCN-HSA-NEXT:    s_add_u32 s4, s0, 0xa0
6269; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6270; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
6271; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
6272; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s14
6273; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s29
6274; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
6275; GCN-HSA-NEXT:    s_add_u32 s4, s0, 0x80
6276; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6277; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
6278; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
6279; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s12
6280; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s30
6281; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
6282; GCN-HSA-NEXT:    s_add_u32 s4, s0, 0x60
6283; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6284; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
6285; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
6286; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s10
6287; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s31
6288; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
6289; GCN-HSA-NEXT:    s_add_u32 s4, s0, 64
6290; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6291; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
6292; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
6293; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
6294; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s33
6295; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
6296; GCN-HSA-NEXT:    s_add_u32 s4, s0, 32
6297; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6298; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
6299; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
6300; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
6301; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s34
6302; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
6303; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6304; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
6305; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s35
6306; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
6307; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
6308; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6309; GCN-HSA-NEXT:    s_endpgm
6310;
6311; GCN-NOHSA-VI-LABEL: constant_zextload_v32i16_to_v32i64:
6312; GCN-NOHSA-VI:       ; %bb.0:
6313; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x24
6314; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
6315; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, v1
6316; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
6317; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
6318; GCN-NOHSA-VI-NEXT:    s_mov_b32 s19, 0xf000
6319; GCN-NOHSA-VI-NEXT:    s_mov_b32 s18, -1
6320; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
6321; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s27, s15, 16
6322; GCN-NOHSA-VI-NEXT:    s_and_b32 s15, s15, 0xffff
6323; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s26, s13, 16
6324; GCN-NOHSA-VI-NEXT:    s_and_b32 s13, s13, 0xffff
6325; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s15
6326; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s27
6327; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s25, s11, 16
6328; GCN-NOHSA-VI-NEXT:    s_and_b32 s11, s11, 0xffff
6329; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:240
6330; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s24, s9, 16
6331; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s13
6332; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s26
6333; GCN-NOHSA-VI-NEXT:    s_and_b32 s9, s9, 0xffff
6334; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:208
6335; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s23, s7, 16
6336; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s11
6337; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s25
6338; GCN-NOHSA-VI-NEXT:    s_and_b32 s7, s7, 0xffff
6339; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:176
6340; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s22, s5, 16
6341; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s9
6342; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s24
6343; GCN-NOHSA-VI-NEXT:    s_and_b32 s5, s5, 0xffff
6344; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:144
6345; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s21, s3, 16
6346; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s7
6347; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s23
6348; GCN-NOHSA-VI-NEXT:    s_and_b32 s3, s3, 0xffff
6349; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:112
6350; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s20, s1, 16
6351; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s5
6352; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s22
6353; GCN-NOHSA-VI-NEXT:    s_and_b32 s1, s1, 0xffff
6354; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:80
6355; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s28, s14, 16
6356; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s3
6357; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s21
6358; GCN-NOHSA-VI-NEXT:    s_and_b32 s14, s14, 0xffff
6359; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:48
6360; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s29, s12, 16
6361; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s1
6362; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s20
6363; GCN-NOHSA-VI-NEXT:    s_and_b32 s12, s12, 0xffff
6364; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:16
6365; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s30, s10, 16
6366; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s14
6367; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s28
6368; GCN-NOHSA-VI-NEXT:    s_and_b32 s10, s10, 0xffff
6369; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:224
6370; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s31, s8, 16
6371; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s12
6372; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s29
6373; GCN-NOHSA-VI-NEXT:    s_and_b32 s8, s8, 0xffff
6374; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:192
6375; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s33, s6, 16
6376; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
6377; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s30
6378; GCN-NOHSA-VI-NEXT:    s_and_b32 s6, s6, 0xffff
6379; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:160
6380; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s34, s4, 16
6381; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
6382; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s31
6383; GCN-NOHSA-VI-NEXT:    s_and_b32 s4, s4, 0xffff
6384; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:128
6385; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s35, s2, 16
6386; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
6387; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s33
6388; GCN-NOHSA-VI-NEXT:    s_and_b32 s2, s2, 0xffff
6389; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:96
6390; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s36, s0, 16
6391; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
6392; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s34
6393; GCN-NOHSA-VI-NEXT:    s_and_b32 s0, s0, 0xffff
6394; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:64
6395; GCN-NOHSA-VI-NEXT:    s_nop 0
6396; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
6397; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s35
6398; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:32
6399; GCN-NOHSA-VI-NEXT:    s_nop 0
6400; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
6401; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s36
6402; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
6403; GCN-NOHSA-VI-NEXT:    s_endpgm
6404;
6405; EG-LABEL: constant_zextload_v32i16_to_v32i64:
6406; EG:       ; %bb.0:
6407; EG-NEXT:    ALU 0, @30, KC0[CB0:0-32], KC1[]
6408; EG-NEXT:    TEX 2 @22
6409; EG-NEXT:    ALU 33, @31, KC0[], KC1[]
6410; EG-NEXT:    TEX 0 @28
6411; EG-NEXT:    ALU 92, @65, KC0[CB0:0-32], KC1[]
6412; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T50.X, 0
6413; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T49.X, 0
6414; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T48.X, 0
6415; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T47.X, 0
6416; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T46.X, 0
6417; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T45.X, 0
6418; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T44.X, 0
6419; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T43.X, 0
6420; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T42.X, 0
6421; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T41.X, 0
6422; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T40.X, 0
6423; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T39.X, 0
6424; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T38.X, 0
6425; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T37.X, 0
6426; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T36.X, 0
6427; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T35.X, 1
6428; EG-NEXT:    CF_END
6429; EG-NEXT:    Fetch clause starting at 22:
6430; EG-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 48, #1
6431; EG-NEXT:     VTX_READ_128 T21.XYZW, T19.X, 16, #1
6432; EG-NEXT:     VTX_READ_128 T22.XYZW, T19.X, 32, #1
6433; EG-NEXT:    Fetch clause starting at 28:
6434; EG-NEXT:     VTX_READ_128 T29.XYZW, T19.X, 0, #1
6435; EG-NEXT:    ALU clause starting at 30:
6436; EG-NEXT:     MOV * T19.X, KC0[2].Z,
6437; EG-NEXT:    ALU clause starting at 31:
6438; EG-NEXT:     LSHR * T23.Z, T20.W, literal.x,
6439; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6440; EG-NEXT:     AND_INT T23.X, T20.W, literal.x,
6441; EG-NEXT:     MOV T23.Y, 0.0,
6442; EG-NEXT:     LSHR T24.Z, T20.Z, literal.y,
6443; EG-NEXT:     AND_INT * T24.X, T20.Z, literal.x,
6444; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6445; EG-NEXT:     MOV T24.Y, 0.0,
6446; EG-NEXT:     LSHR * T25.Z, T20.Y, literal.x,
6447; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6448; EG-NEXT:     AND_INT T25.X, T20.Y, literal.x,
6449; EG-NEXT:     MOV T25.Y, 0.0,
6450; EG-NEXT:     LSHR T20.Z, T20.X, literal.y,
6451; EG-NEXT:     AND_INT * T20.X, T20.X, literal.x,
6452; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6453; EG-NEXT:     MOV T20.Y, 0.0,
6454; EG-NEXT:     LSHR * T26.Z, T22.W, literal.x,
6455; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6456; EG-NEXT:     AND_INT T26.X, T22.W, literal.x,
6457; EG-NEXT:     MOV T26.Y, 0.0,
6458; EG-NEXT:     LSHR T27.Z, T22.Z, literal.y,
6459; EG-NEXT:     AND_INT * T27.X, T22.Z, literal.x,
6460; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6461; EG-NEXT:     MOV T27.Y, 0.0,
6462; EG-NEXT:     LSHR * T28.Z, T22.Y, literal.x,
6463; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6464; EG-NEXT:     AND_INT T28.X, T22.Y, literal.x,
6465; EG-NEXT:     MOV T28.Y, 0.0,
6466; EG-NEXT:     LSHR T22.Z, T22.X, literal.y,
6467; EG-NEXT:     AND_INT * T22.X, T22.X, literal.x,
6468; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6469; EG-NEXT:     MOV T22.Y, 0.0,
6470; EG-NEXT:     LSHR * T19.Z, T21.W, literal.x,
6471; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6472; EG-NEXT:    ALU clause starting at 65:
6473; EG-NEXT:     AND_INT T19.X, T21.W, literal.x,
6474; EG-NEXT:     MOV T19.Y, 0.0,
6475; EG-NEXT:     LSHR T30.Z, T21.Z, literal.y,
6476; EG-NEXT:     AND_INT * T30.X, T21.Z, literal.x,
6477; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6478; EG-NEXT:     MOV T30.Y, 0.0,
6479; EG-NEXT:     LSHR * T31.Z, T21.Y, literal.x,
6480; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6481; EG-NEXT:     AND_INT T31.X, T21.Y, literal.x,
6482; EG-NEXT:     MOV T31.Y, 0.0,
6483; EG-NEXT:     LSHR T21.Z, T21.X, literal.y,
6484; EG-NEXT:     AND_INT * T21.X, T21.X, literal.x,
6485; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6486; EG-NEXT:     MOV T21.Y, 0.0,
6487; EG-NEXT:     LSHR * T32.Z, T29.W, literal.x,
6488; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6489; EG-NEXT:     AND_INT T32.X, T29.W, literal.x,
6490; EG-NEXT:     MOV T32.Y, 0.0,
6491; EG-NEXT:     LSHR T33.Z, T29.Z, literal.y,
6492; EG-NEXT:     AND_INT * T33.X, T29.Z, literal.x,
6493; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6494; EG-NEXT:     MOV T33.Y, 0.0,
6495; EG-NEXT:     LSHR * T34.Z, T29.Y, literal.x,
6496; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6497; EG-NEXT:     AND_INT T34.X, T29.Y, literal.x,
6498; EG-NEXT:     MOV T34.Y, 0.0,
6499; EG-NEXT:     LSHR T29.Z, T29.X, literal.y,
6500; EG-NEXT:     AND_INT * T29.X, T29.X, literal.x,
6501; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6502; EG-NEXT:     MOV T29.Y, 0.0,
6503; EG-NEXT:     MOV T23.W, 0.0,
6504; EG-NEXT:     MOV * T24.W, 0.0,
6505; EG-NEXT:     MOV T25.W, 0.0,
6506; EG-NEXT:     MOV * T20.W, 0.0,
6507; EG-NEXT:     MOV T26.W, 0.0,
6508; EG-NEXT:     MOV * T27.W, 0.0,
6509; EG-NEXT:     MOV T28.W, 0.0,
6510; EG-NEXT:     MOV * T22.W, 0.0,
6511; EG-NEXT:     MOV T19.W, 0.0,
6512; EG-NEXT:     MOV * T30.W, 0.0,
6513; EG-NEXT:     MOV T31.W, 0.0,
6514; EG-NEXT:     MOV * T21.W, 0.0,
6515; EG-NEXT:     MOV T32.W, 0.0,
6516; EG-NEXT:     MOV * T33.W, 0.0,
6517; EG-NEXT:     MOV T34.W, 0.0,
6518; EG-NEXT:     MOV * T29.W, 0.0,
6519; EG-NEXT:     LSHR T35.X, KC0[2].Y, literal.x,
6520; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6521; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6522; EG-NEXT:     LSHR T36.X, PV.W, literal.x,
6523; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6524; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
6525; EG-NEXT:     LSHR T37.X, PV.W, literal.x,
6526; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6527; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
6528; EG-NEXT:     LSHR T38.X, PV.W, literal.x,
6529; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6530; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
6531; EG-NEXT:     LSHR T39.X, PV.W, literal.x,
6532; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6533; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
6534; EG-NEXT:     LSHR T40.X, PV.W, literal.x,
6535; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6536; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
6537; EG-NEXT:     LSHR T41.X, PV.W, literal.x,
6538; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6539; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
6540; EG-NEXT:     LSHR T42.X, PV.W, literal.x,
6541; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6542; EG-NEXT:    2(2.802597e-45), 128(1.793662e-43)
6543; EG-NEXT:     LSHR T43.X, PV.W, literal.x,
6544; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6545; EG-NEXT:    2(2.802597e-45), 144(2.017870e-43)
6546; EG-NEXT:     LSHR T44.X, PV.W, literal.x,
6547; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6548; EG-NEXT:    2(2.802597e-45), 160(2.242078e-43)
6549; EG-NEXT:     LSHR T45.X, PV.W, literal.x,
6550; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6551; EG-NEXT:    2(2.802597e-45), 176(2.466285e-43)
6552; EG-NEXT:     LSHR T46.X, PV.W, literal.x,
6553; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6554; EG-NEXT:    2(2.802597e-45), 192(2.690493e-43)
6555; EG-NEXT:     LSHR T47.X, PV.W, literal.x,
6556; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6557; EG-NEXT:    2(2.802597e-45), 208(2.914701e-43)
6558; EG-NEXT:     LSHR T48.X, PV.W, literal.x,
6559; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6560; EG-NEXT:    2(2.802597e-45), 224(3.138909e-43)
6561; EG-NEXT:     LSHR T49.X, PV.W, literal.x,
6562; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6563; EG-NEXT:    2(2.802597e-45), 240(3.363116e-43)
6564; EG-NEXT:     LSHR * T50.X, PV.W, literal.x,
6565; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
6566  %load = load <32 x i16>, <32 x i16> addrspace(4)* %in
6567  %ext = zext <32 x i16> %load to <32 x i64>
6568  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
6569  ret void
6570}
6571
6572define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
6573; GCN-NOHSA-SI-LABEL: constant_sextload_v32i16_to_v32i64:
6574; GCN-NOHSA-SI:       ; %bb.0:
6575; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x9
6576; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
6577; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
6578; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
6579; GCN-NOHSA-SI-NEXT:    s_mov_b32 s18, s15
6580; GCN-NOHSA-SI-NEXT:    s_mov_b32 s20, s13
6581; GCN-NOHSA-SI-NEXT:    s_mov_b32 s36, s11
6582; GCN-NOHSA-SI-NEXT:    s_mov_b32 s40, s9
6583; GCN-NOHSA-SI-NEXT:    s_mov_b32 s44, s7
6584; GCN-NOHSA-SI-NEXT:    s_mov_b32 s46, s5
6585; GCN-NOHSA-SI-NEXT:    s_mov_b32 s38, s3
6586; GCN-NOHSA-SI-NEXT:    s_mov_b32 s42, s1
6587; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s24, s14, 16
6588; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s26, s12, 16
6589; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s28, s10, 16
6590; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s34, s8, 16
6591; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[48:49], s[20:21], 0x100000
6592; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[50:51], s[18:19], 0x100000
6593; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s52, s6, 16
6594; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s54, s4, 16
6595; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s56, s2, 16
6596; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s58, s0, 16
6597; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[18:19], s[0:1], 0x100000
6598; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[20:21], s[2:3], 0x100000
6599; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[22:23], s[4:5], 0x100000
6600; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[30:31], s[6:7], 0x100000
6601; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[60:61], s[8:9], 0x100000
6602; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[62:63], s[10:11], 0x100000
6603; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[64:65], s[12:13], 0x100000
6604; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[66:67], s[14:15], 0x100000
6605; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[68:69], s[0:1], 48
6606; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[70:71], s[2:3], 48
6607; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[6:7], s[6:7], 48
6608; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[8:9], s[8:9], 48
6609; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[10:11], s[10:11], 48
6610; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[2:3], s[12:13], 48
6611; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[12:13], s[14:15], 48
6612; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[4:5], s[4:5], 48
6613; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s16
6614; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s17
6615; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s50
6616; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s51
6617; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s12
6618; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s13
6619; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s48
6620; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s49
6621; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s2
6622; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s3
6623; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
6624; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
6625; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[12:13], s[46:47], 0x100000
6626; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[14:15], s[44:45], 0x100000
6627; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[16:17], s[40:41], 0x100000
6628; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[36:37], s[36:37], 0x100000
6629; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[40:41], s[42:43], 0x100000
6630; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[38:39], s[38:39], 0x100000
6631; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s36
6632; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s37
6633; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s10
6634; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s11
6635; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s16
6636; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s17
6637; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s8
6638; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s9
6639; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s14
6640; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s15
6641; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, s6
6642; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, s7
6643; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v20, s12
6644; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v21, s13
6645; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v22, s4
6646; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v23, s5
6647; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
6648; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[4:5], s[58:59], 0x100000
6649; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[6:7], s[56:57], 0x100000
6650; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[8:9], s[54:55], 0x100000
6651; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[10:11], s[52:53], 0x100000
6652; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[12:13], s[34:35], 0x100000
6653; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[14:15], s[28:29], 0x100000
6654; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[16:17], s[26:27], 0x100000
6655; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x100000
6656; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:208
6657; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:176
6658; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:144
6659; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:112
6660; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:80
6661; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(5)
6662; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s38
6663; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s39
6664; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s70
6665; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s71
6666; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
6667; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6668; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s40
6669; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s41
6670; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s68
6671; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s69
6672; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
6673; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6674; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s66
6675; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s67
6676; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s64
6677; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s65
6678; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s62
6679; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s63
6680; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s60
6681; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s61
6682; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s30
6683; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s31
6684; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v20, s22
6685; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v21, s23
6686; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v24, s20
6687; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v25, s21
6688; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s24
6689; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s25
6690; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
6691; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6692; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s18
6693; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
6694; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s16
6695; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s17
6696; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:192
6697; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s14
6698; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s15
6699; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:160
6700; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s12
6701; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s13
6702; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:128
6703; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, s10
6704; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, s11
6705; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:96
6706; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v22, s8
6707; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v23, s9
6708; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:64
6709; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v26, s6
6710; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v27, s7
6711; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:32
6712; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s4
6713; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s5
6714; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
6715; GCN-NOHSA-SI-NEXT:    s_endpgm
6716;
6717; GCN-HSA-LABEL: constant_sextload_v32i16_to_v32i64:
6718; GCN-HSA:       ; %bb.0:
6719; GCN-HSA-NEXT:    s_load_dwordx4 s[16:19], s[4:5], 0x0
6720; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6721; GCN-HSA-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
6722; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6723; GCN-HSA-NEXT:    s_mov_b32 s42, s15
6724; GCN-HSA-NEXT:    s_mov_b32 s44, s13
6725; GCN-HSA-NEXT:    s_mov_b32 s46, s11
6726; GCN-HSA-NEXT:    s_mov_b32 s48, s9
6727; GCN-HSA-NEXT:    s_mov_b32 s50, s7
6728; GCN-HSA-NEXT:    s_mov_b32 s52, s5
6729; GCN-HSA-NEXT:    s_mov_b32 s54, s3
6730; GCN-HSA-NEXT:    s_mov_b32 s56, s1
6731; GCN-HSA-NEXT:    s_lshr_b32 s58, s14, 16
6732; GCN-HSA-NEXT:    s_lshr_b32 s60, s12, 16
6733; GCN-HSA-NEXT:    s_lshr_b32 s62, s10, 16
6734; GCN-HSA-NEXT:    s_lshr_b32 s64, s8, 16
6735; GCN-HSA-NEXT:    s_lshr_b32 s66, s6, 16
6736; GCN-HSA-NEXT:    s_lshr_b32 s68, s4, 16
6737; GCN-HSA-NEXT:    s_lshr_b32 s70, s2, 16
6738; GCN-HSA-NEXT:    s_lshr_b32 s72, s0, 16
6739; GCN-HSA-NEXT:    s_bfe_i64 s[18:19], s[0:1], 0x100000
6740; GCN-HSA-NEXT:    s_bfe_i64 s[20:21], s[2:3], 0x100000
6741; GCN-HSA-NEXT:    s_ashr_i64 s[36:37], s[0:1], 48
6742; GCN-HSA-NEXT:    s_ashr_i64 s[38:39], s[2:3], 48
6743; GCN-HSA-NEXT:    s_ashr_i64 s[0:1], s[14:15], 48
6744; GCN-HSA-NEXT:    s_bfe_i64 s[2:3], s[42:43], 0x100000
6745; GCN-HSA-NEXT:    s_bfe_i64 s[22:23], s[4:5], 0x100000
6746; GCN-HSA-NEXT:    s_bfe_i64 s[24:25], s[6:7], 0x100000
6747; GCN-HSA-NEXT:    s_bfe_i64 s[26:27], s[8:9], 0x100000
6748; GCN-HSA-NEXT:    s_bfe_i64 s[28:29], s[10:11], 0x100000
6749; GCN-HSA-NEXT:    s_bfe_i64 s[30:31], s[12:13], 0x100000
6750; GCN-HSA-NEXT:    s_bfe_i64 s[34:35], s[14:15], 0x100000
6751; GCN-HSA-NEXT:    s_ashr_i64 s[40:41], s[4:5], 48
6752; GCN-HSA-NEXT:    s_ashr_i64 s[74:75], s[6:7], 48
6753; GCN-HSA-NEXT:    s_ashr_i64 s[76:77], s[8:9], 48
6754; GCN-HSA-NEXT:    s_ashr_i64 s[78:79], s[10:11], 48
6755; GCN-HSA-NEXT:    s_ashr_i64 s[80:81], s[12:13], 48
6756; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
6757; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
6758; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
6759; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
6760; GCN-HSA-NEXT:    s_bfe_i64 s[0:1], s[72:73], 0x100000
6761; GCN-HSA-NEXT:    s_bfe_i64 s[2:3], s[70:71], 0x100000
6762; GCN-HSA-NEXT:    s_bfe_i64 s[4:5], s[68:69], 0x100000
6763; GCN-HSA-NEXT:    s_bfe_i64 s[6:7], s[66:67], 0x100000
6764; GCN-HSA-NEXT:    s_bfe_i64 s[8:9], s[64:65], 0x100000
6765; GCN-HSA-NEXT:    s_bfe_i64 s[10:11], s[62:63], 0x100000
6766; GCN-HSA-NEXT:    s_bfe_i64 s[12:13], s[60:61], 0x100000
6767; GCN-HSA-NEXT:    s_bfe_i64 s[14:15], s[58:59], 0x100000
6768; GCN-HSA-NEXT:    s_bfe_i64 s[42:43], s[56:57], 0x100000
6769; GCN-HSA-NEXT:    s_bfe_i64 s[54:55], s[54:55], 0x100000
6770; GCN-HSA-NEXT:    s_bfe_i64 s[52:53], s[52:53], 0x100000
6771; GCN-HSA-NEXT:    s_bfe_i64 s[50:51], s[50:51], 0x100000
6772; GCN-HSA-NEXT:    s_bfe_i64 s[48:49], s[48:49], 0x100000
6773; GCN-HSA-NEXT:    s_bfe_i64 s[46:47], s[46:47], 0x100000
6774; GCN-HSA-NEXT:    s_bfe_i64 s[44:45], s[44:45], 0x100000
6775; GCN-HSA-NEXT:    s_add_u32 s56, s16, 0xf0
6776; GCN-HSA-NEXT:    s_addc_u32 s57, s17, 0
6777; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s44
6778; GCN-HSA-NEXT:    s_add_u32 s44, s16, 0xd0
6779; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s45
6780; GCN-HSA-NEXT:    s_addc_u32 s45, s17, 0
6781; GCN-HSA-NEXT:    v_mov_b32_e32 v24, s44
6782; GCN-HSA-NEXT:    v_mov_b32_e32 v25, s45
6783; GCN-HSA-NEXT:    s_add_u32 s44, s16, 0xb0
6784; GCN-HSA-NEXT:    s_addc_u32 s45, s17, 0
6785; GCN-HSA-NEXT:    v_mov_b32_e32 v26, s44
6786; GCN-HSA-NEXT:    v_mov_b32_e32 v27, s45
6787; GCN-HSA-NEXT:    s_add_u32 s44, s16, 0x90
6788; GCN-HSA-NEXT:    s_addc_u32 s45, s17, 0
6789; GCN-HSA-NEXT:    v_mov_b32_e32 v28, s44
6790; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s56
6791; GCN-HSA-NEXT:    v_mov_b32_e32 v29, s45
6792; GCN-HSA-NEXT:    s_add_u32 s44, s16, 0x70
6793; GCN-HSA-NEXT:    v_mov_b32_e32 v23, s57
6794; GCN-HSA-NEXT:    s_addc_u32 s45, s17, 0
6795; GCN-HSA-NEXT:    flat_store_dwordx4 v[22:23], v[0:3]
6796; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s40
6797; GCN-HSA-NEXT:    s_add_u32 s40, s16, 0x50
6798; GCN-HSA-NEXT:    v_mov_b32_e32 v23, s41
6799; GCN-HSA-NEXT:    s_addc_u32 s41, s17, 0
6800; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s80
6801; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s81
6802; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s38
6803; GCN-HSA-NEXT:    s_add_u32 s38, s16, 48
6804; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s39
6805; GCN-HSA-NEXT:    flat_store_dwordx4 v[24:25], v[4:7]
6806; GCN-HSA-NEXT:    s_addc_u32 s39, s17, 0
6807; GCN-HSA-NEXT:    v_mov_b32_e32 v24, s38
6808; GCN-HSA-NEXT:    v_mov_b32_e32 v25, s39
6809; GCN-HSA-NEXT:    s_add_u32 s38, s16, 16
6810; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s46
6811; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s47
6812; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s78
6813; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s79
6814; GCN-HSA-NEXT:    s_addc_u32 s39, s17, 0
6815; GCN-HSA-NEXT:    flat_store_dwordx4 v[26:27], v[8:11]
6816; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s48
6817; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s14
6818; GCN-HSA-NEXT:    s_add_u32 s14, s16, 0xe0
6819; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s49
6820; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s76
6821; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s77
6822; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s15
6823; GCN-HSA-NEXT:    s_addc_u32 s15, s17, 0
6824; GCN-HSA-NEXT:    v_mov_b32_e32 v30, s44
6825; GCN-HSA-NEXT:    flat_store_dwordx4 v[28:29], v[12:15]
6826; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s50
6827; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s12
6828; GCN-HSA-NEXT:    s_add_u32 s12, s16, 0xc0
6829; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s51
6830; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s74
6831; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s75
6832; GCN-HSA-NEXT:    v_mov_b32_e32 v31, s45
6833; GCN-HSA-NEXT:    v_mov_b32_e32 v32, s40
6834; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s13
6835; GCN-HSA-NEXT:    s_addc_u32 s13, s17, 0
6836; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s52
6837; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s53
6838; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s54
6839; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s55
6840; GCN-HSA-NEXT:    v_mov_b32_e32 v33, s41
6841; GCN-HSA-NEXT:    v_mov_b32_e32 v34, s38
6842; GCN-HSA-NEXT:    flat_store_dwordx4 v[30:31], v[16:19]
6843; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s42
6844; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s15
6845; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s13
6846; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s43
6847; GCN-HSA-NEXT:    v_mov_b32_e32 v35, s39
6848; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s36
6849; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s37
6850; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s34
6851; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s35
6852; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s30
6853; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s31
6854; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s14
6855; GCN-HSA-NEXT:    flat_store_dwordx4 v[32:33], v[20:23]
6856; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s12
6857; GCN-HSA-NEXT:    flat_store_dwordx4 v[24:25], v[0:3]
6858; GCN-HSA-NEXT:    flat_store_dwordx4 v[34:35], v[4:7]
6859; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
6860; GCN-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[12:15]
6861; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s10
6862; GCN-HSA-NEXT:    s_add_u32 s10, s16, 0xa0
6863; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s11
6864; GCN-HSA-NEXT:    s_addc_u32 s11, s17, 0
6865; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s10
6866; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s28
6867; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s29
6868; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s11
6869; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6870; GCN-HSA-NEXT:    s_nop 0
6871; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s8
6872; GCN-HSA-NEXT:    s_add_u32 s8, s16, 0x80
6873; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s9
6874; GCN-HSA-NEXT:    s_addc_u32 s9, s17, 0
6875; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s8
6876; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s26
6877; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s27
6878; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s9
6879; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6880; GCN-HSA-NEXT:    s_nop 0
6881; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s6
6882; GCN-HSA-NEXT:    s_add_u32 s6, s16, 0x60
6883; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s7
6884; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
6885; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s6
6886; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s24
6887; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s25
6888; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s7
6889; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6890; GCN-HSA-NEXT:    s_nop 0
6891; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s4
6892; GCN-HSA-NEXT:    s_add_u32 s4, s16, 64
6893; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s5
6894; GCN-HSA-NEXT:    s_addc_u32 s5, s17, 0
6895; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
6896; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s22
6897; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s23
6898; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
6899; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6900; GCN-HSA-NEXT:    s_nop 0
6901; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
6902; GCN-HSA-NEXT:    s_add_u32 s2, s16, 32
6903; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
6904; GCN-HSA-NEXT:    s_addc_u32 s3, s17, 0
6905; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6906; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s20
6907; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s21
6908; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6909; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6910; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s16
6911; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s18
6912; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s19
6913; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
6914; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
6915; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s17
6916; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6917; GCN-HSA-NEXT:    s_endpgm
6918;
6919; GCN-NOHSA-VI-LABEL: constant_sextload_v32i16_to_v32i64:
6920; GCN-NOHSA-VI:       ; %bb.0:
6921; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x24
6922; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
6923; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
6924; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
6925; GCN-NOHSA-VI-NEXT:    s_mov_b32 s36, s15
6926; GCN-NOHSA-VI-NEXT:    s_mov_b32 s38, s13
6927; GCN-NOHSA-VI-NEXT:    s_ashr_i64 s[82:83], s[14:15], 48
6928; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[36:37], s[36:37], 0x100000
6929; GCN-NOHSA-VI-NEXT:    s_mov_b32 s40, s11
6930; GCN-NOHSA-VI-NEXT:    s_mov_b32 s48, s3
6931; GCN-NOHSA-VI-NEXT:    s_mov_b32 s50, s1
6932; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s64, s2, 16
6933; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s66, s0, 16
6934; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[18:19], s[0:1], 0x100000
6935; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[20:21], s[2:3], 0x100000
6936; GCN-NOHSA-VI-NEXT:    s_ashr_i64 s[68:69], s[0:1], 48
6937; GCN-NOHSA-VI-NEXT:    s_ashr_i64 s[70:71], s[2:3], 48
6938; GCN-NOHSA-VI-NEXT:    s_ashr_i64 s[80:81], s[12:13], 48
6939; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
6940; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
6941; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s16
6942; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s17
6943; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[38:39], s[38:39], 0x100000
6944; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s36
6945; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s37
6946; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s82
6947; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s83
6948; GCN-NOHSA-VI-NEXT:    s_mov_b32 s42, s9
6949; GCN-NOHSA-VI-NEXT:    s_ashr_i64 s[78:79], s[10:11], 48
6950; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[40:41], s[40:41], 0x100000
6951; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
6952; GCN-NOHSA-VI-NEXT:    s_mov_b32 s44, s7
6953; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s38
6954; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s39
6955; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s80
6956; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s81
6957; GCN-NOHSA-VI-NEXT:    s_ashr_i64 s[76:77], s[8:9], 48
6958; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[42:43], s[42:43], 0x100000
6959; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
6960; GCN-NOHSA-VI-NEXT:    s_mov_b32 s46, s5
6961; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s40
6962; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s41
6963; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s78
6964; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s79
6965; GCN-NOHSA-VI-NEXT:    s_ashr_i64 s[74:75], s[6:7], 48
6966; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[44:45], s[44:45], 0x100000
6967; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
6968; GCN-NOHSA-VI-NEXT:    s_ashr_i64 s[72:73], s[4:5], 48
6969; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s42
6970; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s43
6971; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s76
6972; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s77
6973; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[46:47], s[46:47], 0x100000
6974; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
6975; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[48:49], s[48:49], 0x100000
6976; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s44
6977; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s45
6978; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s74
6979; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s75
6980; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
6981; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s52, s14, 16
6982; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s46
6983; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s47
6984; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s72
6985; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s73
6986; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[50:51], s[50:51], 0x100000
6987; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
6988; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s54, s12, 16
6989; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s48
6990; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s49
6991; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s70
6992; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s71
6993; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[34:35], s[14:15], 0x100000
6994; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[52:53], s[52:53], 0x100000
6995; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
6996; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s56, s10, 16
6997; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s50
6998; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s51
6999; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s68
7000; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s69
7001; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[30:31], s[12:13], 0x100000
7002; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[16:17], s[54:55], 0x100000
7003; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
7004; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s58, s8, 16
7005; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s34
7006; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s35
7007; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s52
7008; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s53
7009; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[28:29], s[10:11], 0x100000
7010; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[14:15], s[56:57], 0x100000
7011; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
7012; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s60, s6, 16
7013; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s30
7014; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s31
7015; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s16
7016; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s17
7017; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[26:27], s[8:9], 0x100000
7018; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[12:13], s[58:59], 0x100000
7019; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
7020; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s62, s4, 16
7021; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s28
7022; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s29
7023; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s14
7024; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s15
7025; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[24:25], s[6:7], 0x100000
7026; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[10:11], s[60:61], 0x100000
7027; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
7028; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[22:23], s[4:5], 0x100000
7029; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s26
7030; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s27
7031; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s12
7032; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s13
7033; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[8:9], s[62:63], 0x100000
7034; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
7035; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[6:7], s[64:65], 0x100000
7036; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s24
7037; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s25
7038; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s10
7039; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s11
7040; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
7041; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[4:5], s[66:67], 0x100000
7042; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s22
7043; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s23
7044; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s8
7045; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s9
7046; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
7047; GCN-NOHSA-VI-NEXT:    s_nop 0
7048; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s20
7049; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s21
7050; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s6
7051; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s7
7052; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
7053; GCN-NOHSA-VI-NEXT:    s_nop 0
7054; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s18
7055; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s19
7056; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
7057; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s5
7058; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
7059; GCN-NOHSA-VI-NEXT:    s_endpgm
7060;
7061; EG-LABEL: constant_sextload_v32i16_to_v32i64:
7062; EG:       ; %bb.0:
7063; EG-NEXT:    ALU 0, @30, KC0[CB0:0-32], KC1[]
7064; EG-NEXT:    TEX 0 @22
7065; EG-NEXT:    ALU 55, @31, KC0[CB0:0-32], KC1[]
7066; EG-NEXT:    TEX 2 @24
7067; EG-NEXT:    ALU 74, @87, KC0[CB0:0-32], KC1[]
7068; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T38.X, 0
7069; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T36.X, 0
7070; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T34.X, 0
7071; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T33.X, 0
7072; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T32.X, 0
7073; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T31.X, 0
7074; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T30.X, 0
7075; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T29.X, 0
7076; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T28.X, 0
7077; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T27.X, 0
7078; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T26.X, 0
7079; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T25.X, 0
7080; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T24.X, 0
7081; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T23.X, 0
7082; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T22.X, 0
7083; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T21.X, 1
7084; EG-NEXT:    CF_END
7085; EG-NEXT:    Fetch clause starting at 22:
7086; EG-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 0, #1
7087; EG-NEXT:    Fetch clause starting at 24:
7088; EG-NEXT:     VTX_READ_128 T38.XYZW, T19.X, 48, #1
7089; EG-NEXT:     VTX_READ_128 T39.XYZW, T19.X, 32, #1
7090; EG-NEXT:     VTX_READ_128 T40.XYZW, T19.X, 16, #1
7091; EG-NEXT:    ALU clause starting at 30:
7092; EG-NEXT:     MOV * T19.X, KC0[2].Z,
7093; EG-NEXT:    ALU clause starting at 31:
7094; EG-NEXT:     LSHR T21.X, KC0[2].Y, literal.x,
7095; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7096; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
7097; EG-NEXT:     LSHR T22.X, PV.W, literal.x,
7098; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7099; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
7100; EG-NEXT:     LSHR T23.X, PV.W, literal.x,
7101; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7102; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
7103; EG-NEXT:     LSHR T24.X, PV.W, literal.x,
7104; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7105; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
7106; EG-NEXT:     LSHR T25.X, PV.W, literal.x,
7107; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7108; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
7109; EG-NEXT:     LSHR T26.X, PV.W, literal.x,
7110; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7111; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
7112; EG-NEXT:     LSHR T27.X, PV.W, literal.x,
7113; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7114; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
7115; EG-NEXT:     LSHR T28.X, PV.W, literal.x,
7116; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7117; EG-NEXT:    2(2.802597e-45), 128(1.793662e-43)
7118; EG-NEXT:     LSHR T29.X, PV.W, literal.x,
7119; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7120; EG-NEXT:    2(2.802597e-45), 144(2.017870e-43)
7121; EG-NEXT:     LSHR T30.X, PV.W, literal.x,
7122; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7123; EG-NEXT:    2(2.802597e-45), 160(2.242078e-43)
7124; EG-NEXT:     LSHR T31.X, PV.W, literal.x,
7125; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7126; EG-NEXT:    2(2.802597e-45), 176(2.466285e-43)
7127; EG-NEXT:     LSHR T32.X, PV.W, literal.x,
7128; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7129; EG-NEXT:    2(2.802597e-45), 192(2.690493e-43)
7130; EG-NEXT:     LSHR T33.X, PV.W, literal.x,
7131; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7132; EG-NEXT:    2(2.802597e-45), 208(2.914701e-43)
7133; EG-NEXT:     LSHR T34.X, PV.W, literal.x,
7134; EG-NEXT:     ADD_INT T0.W, KC0[2].Y, literal.y,
7135; EG-NEXT:     ASHR * T35.W, T20.X, literal.z,
7136; EG-NEXT:    2(2.802597e-45), 224(3.138909e-43)
7137; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7138; EG-NEXT:     LSHR T36.X, PV.W, literal.x,
7139; EG-NEXT:     ASHR T35.Z, T20.X, literal.y,
7140; EG-NEXT:     ASHR * T37.W, T20.Y, literal.z,
7141; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
7142; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7143; EG-NEXT:     BFE_INT T35.X, T20.X, 0.0, literal.x,
7144; EG-NEXT:     ASHR * T37.Z, T20.Y, literal.x,
7145; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7146; EG-NEXT:     BFE_INT T37.X, T20.Y, 0.0, literal.x,
7147; EG-NEXT:     ASHR T35.Y, PV.X, literal.y,
7148; EG-NEXT:     ASHR * T19.W, T20.Z, literal.y,
7149; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7150; EG-NEXT:    ALU clause starting at 87:
7151; EG-NEXT:     ASHR T19.Z, T20.Z, literal.x,
7152; EG-NEXT:     ASHR * T41.W, T20.W, literal.y,
7153; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7154; EG-NEXT:     BFE_INT T19.X, T20.Z, 0.0, literal.x,
7155; EG-NEXT:     ASHR T37.Y, T37.X, literal.y,
7156; EG-NEXT:     ASHR T41.Z, T20.W, literal.x,
7157; EG-NEXT:     ASHR * T42.W, T40.X, literal.y, BS:VEC_120/SCL_212
7158; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7159; EG-NEXT:     BFE_INT T41.X, T20.W, 0.0, literal.x,
7160; EG-NEXT:     ASHR T19.Y, PV.X, literal.y,
7161; EG-NEXT:     ASHR T42.Z, T40.X, literal.x,
7162; EG-NEXT:     ASHR * T20.W, T40.Y, literal.y,
7163; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7164; EG-NEXT:     BFE_INT T42.X, T40.X, 0.0, literal.x,
7165; EG-NEXT:     ASHR T41.Y, PV.X, literal.y,
7166; EG-NEXT:     ASHR T20.Z, T40.Y, literal.x,
7167; EG-NEXT:     ASHR * T43.W, T40.Z, literal.y,
7168; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7169; EG-NEXT:     BFE_INT T20.X, T40.Y, 0.0, literal.x,
7170; EG-NEXT:     ASHR T42.Y, PV.X, literal.y,
7171; EG-NEXT:     ASHR T43.Z, T40.Z, literal.x,
7172; EG-NEXT:     ASHR * T44.W, T40.W, literal.y,
7173; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7174; EG-NEXT:     BFE_INT T43.X, T40.Z, 0.0, literal.x,
7175; EG-NEXT:     ASHR T20.Y, PV.X, literal.y,
7176; EG-NEXT:     ASHR T44.Z, T40.W, literal.x,
7177; EG-NEXT:     ASHR * T45.W, T39.X, literal.y,
7178; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7179; EG-NEXT:     BFE_INT T44.X, T40.W, 0.0, literal.x,
7180; EG-NEXT:     ASHR T43.Y, PV.X, literal.y,
7181; EG-NEXT:     ASHR T45.Z, T39.X, literal.x,
7182; EG-NEXT:     ASHR * T40.W, T39.Y, literal.y,
7183; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7184; EG-NEXT:     BFE_INT T45.X, T39.X, 0.0, literal.x,
7185; EG-NEXT:     ASHR T44.Y, PV.X, literal.y,
7186; EG-NEXT:     ASHR T40.Z, T39.Y, literal.x,
7187; EG-NEXT:     ASHR * T46.W, T39.Z, literal.y,
7188; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7189; EG-NEXT:     BFE_INT T40.X, T39.Y, 0.0, literal.x,
7190; EG-NEXT:     ASHR T45.Y, PV.X, literal.y,
7191; EG-NEXT:     ASHR T46.Z, T39.Z, literal.x,
7192; EG-NEXT:     ASHR * T47.W, T39.W, literal.y,
7193; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7194; EG-NEXT:     BFE_INT T46.X, T39.Z, 0.0, literal.x,
7195; EG-NEXT:     ASHR T40.Y, PV.X, literal.y,
7196; EG-NEXT:     ASHR T47.Z, T39.W, literal.x,
7197; EG-NEXT:     ASHR * T48.W, T38.X, literal.y,
7198; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7199; EG-NEXT:     BFE_INT T47.X, T39.W, 0.0, literal.x,
7200; EG-NEXT:     ASHR T46.Y, PV.X, literal.y,
7201; EG-NEXT:     ASHR T48.Z, T38.X, literal.x,
7202; EG-NEXT:     ASHR * T39.W, T38.Y, literal.y,
7203; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7204; EG-NEXT:     BFE_INT T48.X, T38.X, 0.0, literal.x,
7205; EG-NEXT:     ASHR T47.Y, PV.X, literal.y,
7206; EG-NEXT:     ASHR T39.Z, T38.Y, literal.x,
7207; EG-NEXT:     ASHR * T49.W, T38.Z, literal.y,
7208; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7209; EG-NEXT:     BFE_INT T39.X, T38.Y, 0.0, literal.x,
7210; EG-NEXT:     ASHR T48.Y, PV.X, literal.y,
7211; EG-NEXT:     ASHR T49.Z, T38.Z, literal.x,
7212; EG-NEXT:     ASHR * T50.W, T38.W, literal.y,
7213; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7214; EG-NEXT:     BFE_INT T49.X, T38.Z, 0.0, literal.x,
7215; EG-NEXT:     ASHR T39.Y, PV.X, literal.y,
7216; EG-NEXT:     ASHR * T50.Z, T38.W, literal.x,
7217; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7218; EG-NEXT:     BFE_INT T50.X, T38.W, 0.0, literal.x,
7219; EG-NEXT:     ASHR T49.Y, PV.X, literal.y,
7220; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
7221; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7222; EG-NEXT:    240(3.363116e-43), 0(0.000000e+00)
7223; EG-NEXT:     LSHR T38.X, PV.W, literal.x,
7224; EG-NEXT:     ASHR * T50.Y, PV.X, literal.y,
7225; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
7226  %load = load <32 x i16>, <32 x i16> addrspace(4)* %in
7227  %ext = sext <32 x i16> %load to <32 x i64>
7228  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
7229  ret void
7230}
7231
7232; These trigger undefined register machine verifier errors
7233
7234; define amdgpu_kernel void @constant_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
7235;   %load = load <64 x i16>, <64 x i16> addrspace(4)* %in
7236;   %ext = zext <64 x i16> %load to <64 x i64>
7237;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
7238;   ret void
7239; }
7240
7241; define amdgpu_kernel void @constant_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
7242;   %load = load <64 x i16>, <64 x i16> addrspace(4)* %in
7243;   %ext = sext <64 x i16> %load to <64 x i64>
7244;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
7245;   ret void
7246; }
7247
7248attributes #0 = { nounwind }
7249