1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=GCN-NOHSA-SI %s
3; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=GCN-HSA %s
4; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=GCN-NOHSA-VI %s
5; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=EG %s
6; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=CM %s
7
8; FIXME: r600 is broken because the bigger testcases spill and it's not implemented
9
10define amdgpu_kernel void @global_load_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
11; GCN-NOHSA-SI-LABEL: global_load_i16:
12; GCN-NOHSA-SI:       ; %bb.0: ; %entry
13; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
14; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
15; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
16; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
17; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
18; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
19; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
20; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
21; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
22; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
23; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
24; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
25; GCN-NOHSA-SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
26; GCN-NOHSA-SI-NEXT:    s_endpgm
27;
28; GCN-HSA-LABEL: global_load_i16:
29; GCN-HSA:       ; %bb.0: ; %entry
30; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
31; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
32; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
33; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
34; GCN-HSA-NEXT:    flat_load_ushort v2, v[0:1]
35; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
36; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
37; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
38; GCN-HSA-NEXT:    flat_store_short v[0:1], v2
39; GCN-HSA-NEXT:    s_endpgm
40;
41; GCN-NOHSA-VI-LABEL: global_load_i16:
42; GCN-NOHSA-VI:       ; %bb.0: ; %entry
43; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
44; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
45; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
46; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
47; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
48; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
49; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
50; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
51; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
52; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
53; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
54; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
55; GCN-NOHSA-VI-NEXT:    buffer_store_short v0, off, s[4:7], 0
56; GCN-NOHSA-VI-NEXT:    s_endpgm
57;
58; EG-LABEL: global_load_i16:
59; EG:       ; %bb.0: ; %entry
60; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
61; EG-NEXT:    TEX 0 @6
62; EG-NEXT:    ALU 11, @9, KC0[CB0:0-32], KC1[]
63; EG-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
64; EG-NEXT:    CF_END
65; EG-NEXT:    PAD
66; EG-NEXT:    Fetch clause starting at 6:
67; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
68; EG-NEXT:    ALU clause starting at 8:
69; EG-NEXT:     MOV * T0.X, KC0[2].Z,
70; EG-NEXT:    ALU clause starting at 9:
71; EG-NEXT:     AND_INT T0.W, KC0[2].Y, literal.x,
72; EG-NEXT:     AND_INT * T1.W, T0.X, literal.y,
73; EG-NEXT:    3(4.203895e-45), 65535(9.183409e-41)
74; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
75; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
76; EG-NEXT:     LSHL T0.X, T1.W, PV.W,
77; EG-NEXT:     LSHL * T0.W, literal.x, PV.W,
78; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
79; EG-NEXT:     MOV T0.Y, 0.0,
80; EG-NEXT:     MOV * T0.Z, 0.0,
81; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
82; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
83;
84; CM-LABEL: global_load_i16:
85; CM:       ; %bb.0: ; %entry
86; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
87; CM-NEXT:    TEX 0 @6
88; CM-NEXT:    ALU 11, @9, KC0[CB0:0-32], KC1[]
89; CM-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
90; CM-NEXT:    CF_END
91; CM-NEXT:    PAD
92; CM-NEXT:    Fetch clause starting at 6:
93; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
94; CM-NEXT:    ALU clause starting at 8:
95; CM-NEXT:     MOV * T0.X, KC0[2].Z,
96; CM-NEXT:    ALU clause starting at 9:
97; CM-NEXT:     AND_INT * T0.W, KC0[2].Y, literal.x,
98; CM-NEXT:    3(4.203895e-45), 0(0.000000e+00)
99; CM-NEXT:     AND_INT T0.Z, T0.X, literal.x,
100; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
101; CM-NEXT:    65535(9.183409e-41), 3(4.203895e-45)
102; CM-NEXT:     LSHL T0.X, PV.Z, PV.W,
103; CM-NEXT:     LSHL * T0.W, literal.x, PV.W,
104; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
105; CM-NEXT:     MOV T0.Y, 0.0,
106; CM-NEXT:     MOV * T0.Z, 0.0,
107; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
108; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
109entry:
110  %ld = load i16, i16 addrspace(1)* %in
111  store i16 %ld, i16 addrspace(1)* %out
112  ret void
113}
114
115define amdgpu_kernel void @global_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
116; GCN-NOHSA-SI-LABEL: global_load_v2i16:
117; GCN-NOHSA-SI:       ; %bb.0: ; %entry
118; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
119; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
120; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
121; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
122; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
123; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
124; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
125; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
126; GCN-NOHSA-SI-NEXT:    buffer_load_dword v0, off, s[8:11], 0
127; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
128; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
129; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
130; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
131; GCN-NOHSA-SI-NEXT:    s_endpgm
132;
133; GCN-HSA-LABEL: global_load_v2i16:
134; GCN-HSA:       ; %bb.0: ; %entry
135; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
136; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
137; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
138; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
139; GCN-HSA-NEXT:    flat_load_dword v2, v[0:1]
140; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
141; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
142; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
143; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
144; GCN-HSA-NEXT:    s_endpgm
145;
146; GCN-NOHSA-VI-LABEL: global_load_v2i16:
147; GCN-NOHSA-VI:       ; %bb.0: ; %entry
148; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
149; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
150; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
151; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
152; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
153; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
154; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
155; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
156; GCN-NOHSA-VI-NEXT:    buffer_load_dword v0, off, s[8:11], 0
157; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
158; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
159; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
160; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
161; GCN-NOHSA-VI-NEXT:    s_endpgm
162;
163; EG-LABEL: global_load_v2i16:
164; EG:       ; %bb.0: ; %entry
165; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
166; EG-NEXT:    TEX 0 @6
167; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
168; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
169; EG-NEXT:    CF_END
170; EG-NEXT:    PAD
171; EG-NEXT:    Fetch clause starting at 6:
172; EG-NEXT:     VTX_READ_32 T0.X, T0.X, 0, #1
173; EG-NEXT:    ALU clause starting at 8:
174; EG-NEXT:     MOV * T0.X, KC0[2].Z,
175; EG-NEXT:    ALU clause starting at 9:
176; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
177; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
178;
179; CM-LABEL: global_load_v2i16:
180; CM:       ; %bb.0: ; %entry
181; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
182; CM-NEXT:    TEX 0 @6
183; CM-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
184; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
185; CM-NEXT:    CF_END
186; CM-NEXT:    PAD
187; CM-NEXT:    Fetch clause starting at 6:
188; CM-NEXT:     VTX_READ_32 T0.X, T0.X, 0, #1
189; CM-NEXT:    ALU clause starting at 8:
190; CM-NEXT:     MOV * T0.X, KC0[2].Z,
191; CM-NEXT:    ALU clause starting at 9:
192; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
193; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
194entry:
195  %ld = load <2 x i16>, <2 x i16> addrspace(1)* %in
196  store <2 x i16> %ld, <2 x i16> addrspace(1)* %out
197  ret void
198}
199
200define amdgpu_kernel void @global_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
201; GCN-NOHSA-SI-LABEL: global_load_v3i16:
202; GCN-NOHSA-SI:       ; %bb.0: ; %entry
203; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
204; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
205; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
206; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
207; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
208; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
209; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
210; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
211; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
212; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
213; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
214; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
215; GCN-NOHSA-SI-NEXT:    buffer_store_short v1, off, s[4:7], 0 offset:4
216; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
217; GCN-NOHSA-SI-NEXT:    s_endpgm
218;
219; GCN-HSA-LABEL: global_load_v3i16:
220; GCN-HSA:       ; %bb.0: ; %entry
221; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
222; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
223; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
224; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
225; GCN-HSA-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
226; GCN-HSA-NEXT:    s_add_u32 s2, s0, 4
227; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
228; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
229; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
230; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
231; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
232; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
233; GCN-HSA-NEXT:    flat_store_short v[4:5], v1
234; GCN-HSA-NEXT:    flat_store_dword v[2:3], v0
235; GCN-HSA-NEXT:    s_endpgm
236;
237; GCN-NOHSA-VI-LABEL: global_load_v3i16:
238; GCN-NOHSA-VI:       ; %bb.0: ; %entry
239; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
240; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
241; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
242; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
243; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
244; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
245; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
246; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
247; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
248; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
249; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
250; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
251; GCN-NOHSA-VI-NEXT:    buffer_store_short v1, off, s[4:7], 0 offset:4
252; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
253; GCN-NOHSA-VI-NEXT:    s_endpgm
254;
255; EG-LABEL: global_load_v3i16:
256; EG:       ; %bb.0: ; %entry
257; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
258; EG-NEXT:    TEX 2 @6
259; EG-NEXT:    ALU 19, @13, KC0[CB0:0-32], KC1[]
260; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0
261; EG-NEXT:    MEM_RAT MSKOR T5.XW, T8.X
262; EG-NEXT:    CF_END
263; EG-NEXT:    Fetch clause starting at 6:
264; EG-NEXT:     VTX_READ_16 T6.X, T5.X, 0, #1
265; EG-NEXT:     VTX_READ_16 T7.X, T5.X, 2, #1
266; EG-NEXT:     VTX_READ_16 T5.X, T5.X, 4, #1
267; EG-NEXT:    ALU clause starting at 12:
268; EG-NEXT:     MOV * T5.X, KC0[2].Z,
269; EG-NEXT:    ALU clause starting at 13:
270; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
271; EG-NEXT:    4(5.605194e-45), 0(0.000000e+00)
272; EG-NEXT:     AND_INT T1.W, PV.W, literal.x,
273; EG-NEXT:     AND_INT * T2.W, T5.X, literal.y,
274; EG-NEXT:    3(4.203895e-45), 65535(9.183409e-41)
275; EG-NEXT:     LSHL * T1.W, PV.W, literal.x,
276; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
277; EG-NEXT:     LSHL T5.X, T2.W, PV.W,
278; EG-NEXT:     LSHL * T5.W, literal.x, PV.W,
279; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
280; EG-NEXT:     MOV T5.Y, 0.0,
281; EG-NEXT:     MOV * T5.Z, 0.0,
282; EG-NEXT:     LSHR T8.X, T0.W, literal.x,
283; EG-NEXT:     LSHL T0.W, T7.X, literal.y,
284; EG-NEXT:     AND_INT * T1.W, T6.X, literal.z,
285; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
286; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
287; EG-NEXT:     OR_INT T6.X, PV.W, PS,
288; EG-NEXT:     LSHR * T7.X, KC0[2].Y, literal.x,
289; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
290;
291; CM-LABEL: global_load_v3i16:
292; CM:       ; %bb.0: ; %entry
293; CM-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
294; CM-NEXT:    TEX 2 @6
295; CM-NEXT:    ALU 19, @13, KC0[CB0:0-32], KC1[]
296; CM-NEXT:    MEM_RAT MSKOR T5.XW, T8.X
297; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T6.X, T7.X
298; CM-NEXT:    CF_END
299; CM-NEXT:    Fetch clause starting at 6:
300; CM-NEXT:     VTX_READ_16 T6.X, T5.X, 0, #1
301; CM-NEXT:     VTX_READ_16 T7.X, T5.X, 2, #1
302; CM-NEXT:     VTX_READ_16 T5.X, T5.X, 4, #1
303; CM-NEXT:    ALU clause starting at 12:
304; CM-NEXT:     MOV * T5.X, KC0[2].Z,
305; CM-NEXT:    ALU clause starting at 13:
306; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
307; CM-NEXT:    4(5.605194e-45), 0(0.000000e+00)
308; CM-NEXT:     AND_INT * T1.W, PV.W, literal.x,
309; CM-NEXT:    3(4.203895e-45), 0(0.000000e+00)
310; CM-NEXT:     AND_INT T0.Z, T5.X, literal.x,
311; CM-NEXT:     LSHL * T1.W, PV.W, literal.y,
312; CM-NEXT:    65535(9.183409e-41), 3(4.203895e-45)
313; CM-NEXT:     LSHL T5.X, PV.Z, PV.W,
314; CM-NEXT:     LSHL * T5.W, literal.x, PV.W,
315; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
316; CM-NEXT:     MOV T5.Y, 0.0,
317; CM-NEXT:     MOV * T5.Z, 0.0,
318; CM-NEXT:     LSHL T0.Z, T7.X, literal.x,
319; CM-NEXT:     AND_INT * T1.W, T6.X, literal.y, BS:VEC_120/SCL_212
320; CM-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
321; CM-NEXT:     OR_INT * T6.X, PV.Z, PV.W,
322; CM-NEXT:     LSHR * T7.X, KC0[2].Y, literal.x,
323; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
324; CM-NEXT:     LSHR * T8.X, T0.W, literal.x,
325; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
326entry:
327  %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
328  store <3 x i16> %ld, <3 x i16> addrspace(1)* %out
329  ret void
330}
331
332define amdgpu_kernel void @global_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
333; GCN-NOHSA-SI-LABEL: global_load_v4i16:
334; GCN-NOHSA-SI:       ; %bb.0: ; %entry
335; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
336; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
337; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
338; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
339; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
340; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
341; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
342; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
343; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
344; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
345; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
346; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
347; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
348; GCN-NOHSA-SI-NEXT:    s_endpgm
349;
350; GCN-HSA-LABEL: global_load_v4i16:
351; GCN-HSA:       ; %bb.0: ; %entry
352; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
353; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
354; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
355; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
356; GCN-HSA-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
357; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
358; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
359; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
360; GCN-HSA-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
361; GCN-HSA-NEXT:    s_endpgm
362;
363; GCN-NOHSA-VI-LABEL: global_load_v4i16:
364; GCN-NOHSA-VI:       ; %bb.0: ; %entry
365; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
366; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
367; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
368; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
369; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
370; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
371; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
372; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
373; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
374; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
375; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
376; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
377; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
378; GCN-NOHSA-VI-NEXT:    s_endpgm
379;
380; EG-LABEL: global_load_v4i16:
381; EG:       ; %bb.0: ; %entry
382; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
383; EG-NEXT:    TEX 0 @6
384; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
385; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
386; EG-NEXT:    CF_END
387; EG-NEXT:    PAD
388; EG-NEXT:    Fetch clause starting at 6:
389; EG-NEXT:     VTX_READ_64 T0.XY, T0.X, 0, #1
390; EG-NEXT:    ALU clause starting at 8:
391; EG-NEXT:     MOV * T0.X, KC0[2].Z,
392; EG-NEXT:    ALU clause starting at 9:
393; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
394; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
395;
396; CM-LABEL: global_load_v4i16:
397; CM:       ; %bb.0: ; %entry
398; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
399; CM-NEXT:    TEX 0 @6
400; CM-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
401; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
402; CM-NEXT:    CF_END
403; CM-NEXT:    PAD
404; CM-NEXT:    Fetch clause starting at 6:
405; CM-NEXT:     VTX_READ_64 T0.XY, T0.X, 0, #1
406; CM-NEXT:    ALU clause starting at 8:
407; CM-NEXT:     MOV * T0.X, KC0[2].Z,
408; CM-NEXT:    ALU clause starting at 9:
409; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
410; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
411entry:
412  %ld = load <4 x i16>, <4 x i16> addrspace(1)* %in
413  store <4 x i16> %ld, <4 x i16> addrspace(1)* %out
414  ret void
415}
416
417define amdgpu_kernel void @global_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) {
418; GCN-NOHSA-SI-LABEL: global_load_v8i16:
419; GCN-NOHSA-SI:       ; %bb.0: ; %entry
420; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
421; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
422; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
423; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
424; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
425; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
426; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
427; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
428; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
429; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
430; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
431; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
432; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
433; GCN-NOHSA-SI-NEXT:    s_endpgm
434;
435; GCN-HSA-LABEL: global_load_v8i16:
436; GCN-HSA:       ; %bb.0: ; %entry
437; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
438; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
439; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
440; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
441; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
442; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
443; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
444; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
445; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
446; GCN-HSA-NEXT:    s_endpgm
447;
448; GCN-NOHSA-VI-LABEL: global_load_v8i16:
449; GCN-NOHSA-VI:       ; %bb.0: ; %entry
450; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
451; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
452; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
453; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
454; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
455; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
456; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
457; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
458; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
459; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
460; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
461; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
462; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
463; GCN-NOHSA-VI-NEXT:    s_endpgm
464;
465; EG-LABEL: global_load_v8i16:
466; EG:       ; %bb.0: ; %entry
467; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
468; EG-NEXT:    TEX 0 @6
469; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
470; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
471; EG-NEXT:    CF_END
472; EG-NEXT:    PAD
473; EG-NEXT:    Fetch clause starting at 6:
474; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
475; EG-NEXT:    ALU clause starting at 8:
476; EG-NEXT:     MOV * T0.X, KC0[2].Z,
477; EG-NEXT:    ALU clause starting at 9:
478; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
479; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
480;
481; CM-LABEL: global_load_v8i16:
482; CM:       ; %bb.0: ; %entry
483; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
484; CM-NEXT:    TEX 0 @6
485; CM-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
486; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
487; CM-NEXT:    CF_END
488; CM-NEXT:    PAD
489; CM-NEXT:    Fetch clause starting at 6:
490; CM-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
491; CM-NEXT:    ALU clause starting at 8:
492; CM-NEXT:     MOV * T0.X, KC0[2].Z,
493; CM-NEXT:    ALU clause starting at 9:
494; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
495; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
496entry:
497  %ld = load <8 x i16>, <8 x i16> addrspace(1)* %in
498  store <8 x i16> %ld, <8 x i16> addrspace(1)* %out
499  ret void
500}
501
502define amdgpu_kernel void @global_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) {
503; GCN-NOHSA-SI-LABEL: global_load_v16i16:
504; GCN-NOHSA-SI:       ; %bb.0: ; %entry
505; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
506; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
507; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
508; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
509; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
510; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
511; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
512; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
513; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:16
514; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0
515; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
516; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
517; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(1)
518; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16
519; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(1)
520; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
521; GCN-NOHSA-SI-NEXT:    s_endpgm
522;
523; GCN-HSA-LABEL: global_load_v16i16:
524; GCN-HSA:       ; %bb.0: ; %entry
525; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
526; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
527; GCN-HSA-NEXT:    s_add_u32 s4, s0, 16
528; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
529; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
530; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
531; GCN-HSA-NEXT:    s_add_u32 s2, s2, 16
532; GCN-HSA-NEXT:    s_addc_u32 s3, s3, 0
533; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
534; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
535; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
536; GCN-HSA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
537; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s5
538; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s1
539; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s4
540; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s0
541; GCN-HSA-NEXT:    s_waitcnt vmcnt(1)
542; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[0:3]
543; GCN-HSA-NEXT:    s_waitcnt vmcnt(1)
544; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[4:7]
545; GCN-HSA-NEXT:    s_endpgm
546;
547; GCN-NOHSA-VI-LABEL: global_load_v16i16:
548; GCN-NOHSA-VI:       ; %bb.0: ; %entry
549; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
550; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
551; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
552; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
553; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
554; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
555; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
556; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
557; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:16
558; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0
559; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
560; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
561; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(1)
562; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16
563; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(1)
564; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
565; GCN-NOHSA-VI-NEXT:    s_endpgm
566;
567; EG-LABEL: global_load_v16i16:
568; EG:       ; %bb.0: ; %entry
569; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
570; EG-NEXT:    TEX 0 @8
571; EG-NEXT:    ALU 1, @13, KC0[CB0:0-32], KC1[]
572; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0
573; EG-NEXT:    TEX 0 @10
574; EG-NEXT:    ALU 3, @15, KC0[CB0:0-32], KC1[]
575; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
576; EG-NEXT:    CF_END
577; EG-NEXT:    Fetch clause starting at 8:
578; EG-NEXT:     VTX_READ_128 T1.XYZW, T0.X, 0, #1
579; EG-NEXT:    Fetch clause starting at 10:
580; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 16, #1
581; EG-NEXT:    ALU clause starting at 12:
582; EG-NEXT:     MOV * T0.X, KC0[2].Z,
583; EG-NEXT:    ALU clause starting at 13:
584; EG-NEXT:     LSHR * T2.X, KC0[2].Y, literal.x,
585; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
586; EG-NEXT:    ALU clause starting at 15:
587; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.x,
588; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
589; EG-NEXT:     LSHR * T1.X, PV.W, literal.x,
590; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
591;
592; CM-LABEL: global_load_v16i16:
593; CM:       ; %bb.0: ; %entry
594; CM-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
595; CM-NEXT:    TEX 0 @8
596; CM-NEXT:    ALU 1, @13, KC0[CB0:0-32], KC1[]
597; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T1, T2.X
598; CM-NEXT:    TEX 0 @10
599; CM-NEXT:    ALU 3, @15, KC0[CB0:0-32], KC1[]
600; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
601; CM-NEXT:    CF_END
602; CM-NEXT:    Fetch clause starting at 8:
603; CM-NEXT:     VTX_READ_128 T1.XYZW, T0.X, 0, #1
604; CM-NEXT:    Fetch clause starting at 10:
605; CM-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 16, #1
606; CM-NEXT:    ALU clause starting at 12:
607; CM-NEXT:     MOV * T0.X, KC0[2].Z,
608; CM-NEXT:    ALU clause starting at 13:
609; CM-NEXT:     LSHR * T2.X, KC0[2].Y, literal.x,
610; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
611; CM-NEXT:    ALU clause starting at 15:
612; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.x,
613; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
614; CM-NEXT:     LSHR * T1.X, PV.W, literal.x,
615; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
616entry:
617  %ld = load <16 x i16>, <16 x i16> addrspace(1)* %in
618  store <16 x i16> %ld, <16 x i16> addrspace(1)* %out
619  ret void
620}
621
622define amdgpu_kernel void @global_load_v16i16_align2(<16 x i16> addrspace(1)* %in, <16 x i16> addrspace(1)* %out) #0 {
623; GCN-NOHSA-SI-LABEL: global_load_v16i16_align2:
624; GCN-NOHSA-SI:       ; %bb.0: ; %entry
625; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
626; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
627; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
628; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
629; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
630; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
631; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
632; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
633; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
634; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
635; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
636; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v1, off, s[4:7], 0 offset:2
637; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v4, off, s[4:7], 0 offset:4
638; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v2, off, s[4:7], 0 offset:6
639; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v5, off, s[4:7], 0 offset:8
640; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v3, off, s[4:7], 0 offset:10
641; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v6, off, s[4:7], 0 offset:12
642; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v7, off, s[4:7], 0 offset:14
643; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v8, off, s[4:7], 0 offset:16
644; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v9, off, s[4:7], 0 offset:18
645; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v10, off, s[4:7], 0 offset:20
646; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v11, off, s[4:7], 0 offset:22
647; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v12, off, s[4:7], 0 offset:24
648; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v13, off, s[4:7], 0 offset:26
649; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v14, off, s[4:7], 0 offset:28
650; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v15, off, s[4:7], 0 offset:30
651; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(8)
652; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
653; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v16, 16, v3
654; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v17, 16, v2
655; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v18, 16, v1
656; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
657; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v15, 16, v15
658; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v13, 16, v13
659; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v11, 16, v11
660; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v9, 16, v9
661; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v3, v7, v6
662; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v2, v16, v5
663; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v1, v17, v4
664; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v0, v18, v0
665; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v7, v15, v14
666; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v6, v13, v12
667; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v5, v11, v10
668; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v4, v9, v8
669; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[8:11], 0 offset:16
670; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
671; GCN-NOHSA-SI-NEXT:    s_endpgm
672;
673; GCN-HSA-LABEL: global_load_v16i16_align2:
674; GCN-HSA:       ; %bb.0: ; %entry
675; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
676; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
677; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
678; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
679; GCN-HSA-NEXT:    s_add_u32 s0, s0, 16
680; GCN-HSA-NEXT:    s_addc_u32 s1, s1, 0
681; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
682; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
683; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
684; GCN-HSA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
685; GCN-HSA-NEXT:    s_add_u32 s0, s2, 16
686; GCN-HSA-NEXT:    s_addc_u32 s1, s3, 0
687; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s1
688; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s3
689; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s0
690; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s2
691; GCN-HSA-NEXT:    s_waitcnt vmcnt(1)
692; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[0:3]
693; GCN-HSA-NEXT:    s_waitcnt vmcnt(1)
694; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[4:7]
695; GCN-HSA-NEXT:    s_endpgm
696;
697; GCN-NOHSA-VI-LABEL: global_load_v16i16_align2:
698; GCN-NOHSA-VI:       ; %bb.0: ; %entry
699; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
700; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
701; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
702; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
703; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
704; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
705; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0 offset:14
706; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v1, off, s[4:7], 0 offset:10
707; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v2, off, s[4:7], 0 offset:6
708; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v3, off, s[4:7], 0 offset:2
709; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v4, off, s[4:7], 0 offset:30
710; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v5, off, s[4:7], 0 offset:26
711; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v6, off, s[4:7], 0 offset:22
712; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v7, off, s[4:7], 0 offset:18
713; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v8, off, s[4:7], 0 offset:12
714; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v9, off, s[4:7], 0 offset:8
715; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v10, off, s[4:7], 0 offset:4
716; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v11, off, s[4:7], 0
717; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v12, off, s[4:7], 0 offset:28
718; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v13, off, s[4:7], 0 offset:24
719; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v14, off, s[4:7], 0 offset:20
720; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v15, off, s[4:7], 0 offset:16
721; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s2
722; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s3
723; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(14)
724; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
725; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
726; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(13)
727; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v16, 16, v2
728; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(12)
729; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v17, 16, v3
730; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(11)
731; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
732; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(10)
733; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
734; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(9)
735; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v18, 16, v6
736; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(8)
737; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v19, 16, v7
738; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(7)
739; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v3, v8, v0
740; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(6)
741; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v2, v9, v1
742; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(5)
743; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v1, v10, v16
744; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(4)
745; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v0, v11, v17
746; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(3)
747; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v7, v12, v4
748; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(2)
749; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v6, v13, v5
750; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(1)
751; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v5, v14, v18
752; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
753; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v4, v15, v19
754; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16
755; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
756; GCN-NOHSA-VI-NEXT:    s_endpgm
757;
758; EG-LABEL: global_load_v16i16_align2:
759; EG:       ; %bb.0: ; %entry
760; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
761; EG-NEXT:    TEX 1 @6
762; EG-NEXT:    ALU 4, @11, KC0[CB0:0-32], KC1[]
763; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T3.X, 0
764; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T2.X, 1
765; EG-NEXT:    CF_END
766; EG-NEXT:    Fetch clause starting at 6:
767; EG-NEXT:     VTX_READ_128 T1.XYZW, T0.X, 16, #1
768; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
769; EG-NEXT:    ALU clause starting at 10:
770; EG-NEXT:     MOV * T0.X, KC0[2].Y,
771; EG-NEXT:    ALU clause starting at 11:
772; EG-NEXT:     LSHR T2.X, KC0[2].Z, literal.x,
773; EG-NEXT:     ADD_INT * T2.W, KC0[2].Z, literal.y,
774; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
775; EG-NEXT:     LSHR * T3.X, PV.W, literal.x,
776; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
777;
778; CM-LABEL: global_load_v16i16_align2:
779; CM:       ; %bb.0: ; %entry
780; CM-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
781; CM-NEXT:    TEX 1 @6
782; CM-NEXT:    ALU 5, @11, KC0[CB0:0-32], KC1[]
783; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0, T3.X
784; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T1, T2.X
785; CM-NEXT:    CF_END
786; CM-NEXT:    Fetch clause starting at 6:
787; CM-NEXT:     VTX_READ_128 T1.XYZW, T0.X, 16, #1
788; CM-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
789; CM-NEXT:    ALU clause starting at 10:
790; CM-NEXT:     MOV * T0.X, KC0[2].Y,
791; CM-NEXT:    ALU clause starting at 11:
792; CM-NEXT:     ADD_INT * T2.W, KC0[2].Z, literal.x,
793; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
794; CM-NEXT:     LSHR * T2.X, PV.W, literal.x,
795; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
796; CM-NEXT:     LSHR * T3.X, KC0[2].Z, literal.x,
797; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
798entry:
799  %ld =  load <16 x i16>, <16 x i16> addrspace(1)* %in, align 2
800  store <16 x i16> %ld, <16 x i16> addrspace(1)* %out, align 32
801  ret void
802}
803
804define amdgpu_kernel void @global_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
805; GCN-NOHSA-SI-LABEL: global_zextload_i16_to_i32:
806; GCN-NOHSA-SI:       ; %bb.0:
807; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
808; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
809; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
810; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
811; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
812; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
813; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
814; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
815; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
816; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
817; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
818; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
819; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
820; GCN-NOHSA-SI-NEXT:    s_endpgm
821;
822; GCN-HSA-LABEL: global_zextload_i16_to_i32:
823; GCN-HSA:       ; %bb.0:
824; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
825; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
826; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
827; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
828; GCN-HSA-NEXT:    flat_load_ushort v2, v[0:1]
829; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
830; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
831; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
832; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
833; GCN-HSA-NEXT:    s_endpgm
834;
835; GCN-NOHSA-VI-LABEL: global_zextload_i16_to_i32:
836; GCN-NOHSA-VI:       ; %bb.0:
837; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
838; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
839; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
840; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
841; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
842; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
843; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
844; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
845; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
846; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
847; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
848; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
849; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
850; GCN-NOHSA-VI-NEXT:    s_endpgm
851;
852; EG-LABEL: global_zextload_i16_to_i32:
853; EG:       ; %bb.0:
854; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
855; EG-NEXT:    TEX 0 @6
856; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
857; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
858; EG-NEXT:    CF_END
859; EG-NEXT:    PAD
860; EG-NEXT:    Fetch clause starting at 6:
861; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
862; EG-NEXT:    ALU clause starting at 8:
863; EG-NEXT:     MOV * T0.X, KC0[2].Z,
864; EG-NEXT:    ALU clause starting at 9:
865; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
866; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
867;
868; CM-LABEL: global_zextload_i16_to_i32:
869; CM:       ; %bb.0:
870; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
871; CM-NEXT:    TEX 0 @6
872; CM-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
873; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
874; CM-NEXT:    CF_END
875; CM-NEXT:    PAD
876; CM-NEXT:    Fetch clause starting at 6:
877; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
878; CM-NEXT:    ALU clause starting at 8:
879; CM-NEXT:     MOV * T0.X, KC0[2].Z,
880; CM-NEXT:    ALU clause starting at 9:
881; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
882; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
883  %a = load i16, i16 addrspace(1)* %in
884  %ext = zext i16 %a to i32
885  store i32 %ext, i32 addrspace(1)* %out
886  ret void
887}
888
889define amdgpu_kernel void @global_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
890; GCN-NOHSA-SI-LABEL: global_sextload_i16_to_i32:
891; GCN-NOHSA-SI:       ; %bb.0:
892; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
893; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
894; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
895; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
896; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
897; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
898; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
899; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
900; GCN-NOHSA-SI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
901; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
902; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
903; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
904; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
905; GCN-NOHSA-SI-NEXT:    s_endpgm
906;
907; GCN-HSA-LABEL: global_sextload_i16_to_i32:
908; GCN-HSA:       ; %bb.0:
909; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
910; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
911; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
912; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
913; GCN-HSA-NEXT:    flat_load_sshort v2, v[0:1]
914; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
915; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
916; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
917; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
918; GCN-HSA-NEXT:    s_endpgm
919;
920; GCN-NOHSA-VI-LABEL: global_sextload_i16_to_i32:
921; GCN-NOHSA-VI:       ; %bb.0:
922; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
923; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
924; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
925; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
926; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
927; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
928; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
929; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
930; GCN-NOHSA-VI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
931; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
932; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
933; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
934; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
935; GCN-NOHSA-VI-NEXT:    s_endpgm
936;
937; EG-LABEL: global_sextload_i16_to_i32:
938; EG:       ; %bb.0:
939; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
940; EG-NEXT:    TEX 0 @6
941; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
942; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
943; EG-NEXT:    CF_END
944; EG-NEXT:    PAD
945; EG-NEXT:    Fetch clause starting at 6:
946; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
947; EG-NEXT:    ALU clause starting at 8:
948; EG-NEXT:     MOV * T0.X, KC0[2].Z,
949; EG-NEXT:    ALU clause starting at 9:
950; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
951; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
952; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
953;
954; CM-LABEL: global_sextload_i16_to_i32:
955; CM:       ; %bb.0:
956; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
957; CM-NEXT:    TEX 0 @6
958; CM-NEXT:    ALU 3, @9, KC0[CB0:0-32], KC1[]
959; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
960; CM-NEXT:    CF_END
961; CM-NEXT:    PAD
962; CM-NEXT:    Fetch clause starting at 6:
963; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
964; CM-NEXT:    ALU clause starting at 8:
965; CM-NEXT:     MOV * T0.X, KC0[2].Z,
966; CM-NEXT:    ALU clause starting at 9:
967; CM-NEXT:     BFE_INT * T0.X, T0.X, 0.0, literal.x,
968; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
969; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
970; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
971  %a = load i16, i16 addrspace(1)* %in
972  %ext = sext i16 %a to i32
973  store i32 %ext, i32 addrspace(1)* %out
974  ret void
975}
976
977define amdgpu_kernel void @global_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
978; GCN-NOHSA-SI-LABEL: global_zextload_v1i16_to_v1i32:
979; GCN-NOHSA-SI:       ; %bb.0:
980; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
981; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
982; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
983; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
984; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
985; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
986; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
987; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
988; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
989; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
990; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
991; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
992; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
993; GCN-NOHSA-SI-NEXT:    s_endpgm
994;
995; GCN-HSA-LABEL: global_zextload_v1i16_to_v1i32:
996; GCN-HSA:       ; %bb.0:
997; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
998; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
999; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1000; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1001; GCN-HSA-NEXT:    flat_load_ushort v2, v[0:1]
1002; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
1003; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
1004; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
1005; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
1006; GCN-HSA-NEXT:    s_endpgm
1007;
1008; GCN-NOHSA-VI-LABEL: global_zextload_v1i16_to_v1i32:
1009; GCN-NOHSA-VI:       ; %bb.0:
1010; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
1011; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
1012; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
1013; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
1014; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
1015; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1016; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
1017; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
1018; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
1019; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
1020; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
1021; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
1022; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1023; GCN-NOHSA-VI-NEXT:    s_endpgm
1024;
1025; EG-LABEL: global_zextload_v1i16_to_v1i32:
1026; EG:       ; %bb.0:
1027; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1028; EG-NEXT:    TEX 0 @6
1029; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
1030; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
1031; EG-NEXT:    CF_END
1032; EG-NEXT:    PAD
1033; EG-NEXT:    Fetch clause starting at 6:
1034; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
1035; EG-NEXT:    ALU clause starting at 8:
1036; EG-NEXT:     MOV * T0.X, KC0[2].Z,
1037; EG-NEXT:    ALU clause starting at 9:
1038; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
1039; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1040;
1041; CM-LABEL: global_zextload_v1i16_to_v1i32:
1042; CM:       ; %bb.0:
1043; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1044; CM-NEXT:    TEX 0 @6
1045; CM-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
1046; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
1047; CM-NEXT:    CF_END
1048; CM-NEXT:    PAD
1049; CM-NEXT:    Fetch clause starting at 6:
1050; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
1051; CM-NEXT:    ALU clause starting at 8:
1052; CM-NEXT:     MOV * T0.X, KC0[2].Z,
1053; CM-NEXT:    ALU clause starting at 9:
1054; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
1055; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1056  %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
1057  %ext = zext <1 x i16> %load to <1 x i32>
1058  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
1059  ret void
1060}
1061
1062define amdgpu_kernel void @global_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
1063; GCN-NOHSA-SI-LABEL: global_sextload_v1i16_to_v1i32:
1064; GCN-NOHSA-SI:       ; %bb.0:
1065; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1066; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
1067; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
1068; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
1069; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
1070; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1071; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
1072; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
1073; GCN-NOHSA-SI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
1074; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
1075; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
1076; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
1077; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1078; GCN-NOHSA-SI-NEXT:    s_endpgm
1079;
1080; GCN-HSA-LABEL: global_sextload_v1i16_to_v1i32:
1081; GCN-HSA:       ; %bb.0:
1082; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1083; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1084; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1085; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1086; GCN-HSA-NEXT:    flat_load_sshort v2, v[0:1]
1087; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
1088; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
1089; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
1090; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
1091; GCN-HSA-NEXT:    s_endpgm
1092;
1093; GCN-NOHSA-VI-LABEL: global_sextload_v1i16_to_v1i32:
1094; GCN-NOHSA-VI:       ; %bb.0:
1095; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
1096; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
1097; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
1098; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
1099; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
1100; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1101; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
1102; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
1103; GCN-NOHSA-VI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
1104; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
1105; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
1106; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
1107; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1108; GCN-NOHSA-VI-NEXT:    s_endpgm
1109;
1110; EG-LABEL: global_sextload_v1i16_to_v1i32:
1111; EG:       ; %bb.0:
1112; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1113; EG-NEXT:    TEX 0 @6
1114; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
1115; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
1116; EG-NEXT:    CF_END
1117; EG-NEXT:    PAD
1118; EG-NEXT:    Fetch clause starting at 6:
1119; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
1120; EG-NEXT:    ALU clause starting at 8:
1121; EG-NEXT:     MOV * T0.X, KC0[2].Z,
1122; EG-NEXT:    ALU clause starting at 9:
1123; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
1124; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
1125; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
1126;
1127; CM-LABEL: global_sextload_v1i16_to_v1i32:
1128; CM:       ; %bb.0:
1129; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1130; CM-NEXT:    TEX 0 @6
1131; CM-NEXT:    ALU 3, @9, KC0[CB0:0-32], KC1[]
1132; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
1133; CM-NEXT:    CF_END
1134; CM-NEXT:    PAD
1135; CM-NEXT:    Fetch clause starting at 6:
1136; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
1137; CM-NEXT:    ALU clause starting at 8:
1138; CM-NEXT:     MOV * T0.X, KC0[2].Z,
1139; CM-NEXT:    ALU clause starting at 9:
1140; CM-NEXT:     BFE_INT * T0.X, T0.X, 0.0, literal.x,
1141; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1142; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
1143; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1144  %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
1145  %ext = sext <1 x i16> %load to <1 x i32>
1146  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
1147  ret void
1148}
1149
1150define amdgpu_kernel void @global_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
1151; GCN-NOHSA-SI-LABEL: global_zextload_v2i16_to_v2i32:
1152; GCN-NOHSA-SI:       ; %bb.0:
1153; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1154; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
1155; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
1156; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
1157; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
1158; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1159; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
1160; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
1161; GCN-NOHSA-SI-NEXT:    buffer_load_dword v0, off, s[8:11], 0
1162; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
1163; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
1164; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
1165; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
1166; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1167; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1168; GCN-NOHSA-SI-NEXT:    s_endpgm
1169;
1170; GCN-HSA-LABEL: global_zextload_v2i16_to_v2i32:
1171; GCN-HSA:       ; %bb.0:
1172; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1173; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1174; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1175; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1176; GCN-HSA-NEXT:    flat_load_dword v2, v[0:1]
1177; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
1178; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
1179; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
1180; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
1181; GCN-HSA-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1182; GCN-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
1183; GCN-HSA-NEXT:    s_endpgm
1184;
1185; GCN-NOHSA-VI-LABEL: global_zextload_v2i16_to_v2i32:
1186; GCN-NOHSA-VI:       ; %bb.0:
1187; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
1188; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
1189; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
1190; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
1191; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
1192; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1193; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
1194; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
1195; GCN-NOHSA-VI-NEXT:    buffer_load_dword v0, off, s[8:11], 0
1196; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
1197; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
1198; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
1199; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
1200; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1201; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1202; GCN-NOHSA-VI-NEXT:    s_endpgm
1203;
1204; EG-LABEL: global_zextload_v2i16_to_v2i32:
1205; EG:       ; %bb.0:
1206; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1207; EG-NEXT:    TEX 0 @6
1208; EG-NEXT:    ALU 4, @9, KC0[CB0:0-32], KC1[]
1209; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XY, T5.X, 1
1210; EG-NEXT:    CF_END
1211; EG-NEXT:    PAD
1212; EG-NEXT:    Fetch clause starting at 6:
1213; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
1214; EG-NEXT:    ALU clause starting at 8:
1215; EG-NEXT:     MOV * T4.X, KC0[2].Z,
1216; EG-NEXT:    ALU clause starting at 9:
1217; EG-NEXT:     LSHR * T4.Y, T4.X, literal.x,
1218; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1219; EG-NEXT:     AND_INT T4.X, T4.X, literal.x,
1220; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
1221; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
1222;
1223; CM-LABEL: global_zextload_v2i16_to_v2i32:
1224; CM:       ; %bb.0:
1225; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1226; CM-NEXT:    TEX 0 @6
1227; CM-NEXT:    ALU 5, @9, KC0[CB0:0-32], KC1[]
1228; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T4, T5.X
1229; CM-NEXT:    CF_END
1230; CM-NEXT:    PAD
1231; CM-NEXT:    Fetch clause starting at 6:
1232; CM-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
1233; CM-NEXT:    ALU clause starting at 8:
1234; CM-NEXT:     MOV * T4.X, KC0[2].Z,
1235; CM-NEXT:    ALU clause starting at 9:
1236; CM-NEXT:     LSHR * T4.Y, T4.X, literal.x,
1237; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1238; CM-NEXT:     AND_INT * T4.X, T4.X, literal.x,
1239; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1240; CM-NEXT:     LSHR * T5.X, KC0[2].Y, literal.x,
1241; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1242  %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
1243  %ext = zext <2 x i16> %load to <2 x i32>
1244  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
1245  ret void
1246}
1247
1248; TODO: This should use ASHR instead of LSHR + BFE
1249define amdgpu_kernel void @global_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
1250; GCN-NOHSA-SI-LABEL: global_sextload_v2i16_to_v2i32:
1251; GCN-NOHSA-SI:       ; %bb.0:
1252; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1253; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
1254; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
1255; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
1256; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
1257; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1258; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
1259; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
1260; GCN-NOHSA-SI-NEXT:    buffer_load_dword v0, off, s[8:11], 0
1261; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
1262; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
1263; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
1264; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 16, v0
1265; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v0, 0, 16
1266; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1267; GCN-NOHSA-SI-NEXT:    s_endpgm
1268;
1269; GCN-HSA-LABEL: global_sextload_v2i16_to_v2i32:
1270; GCN-HSA:       ; %bb.0:
1271; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1272; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1273; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1274; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1275; GCN-HSA-NEXT:    flat_load_dword v2, v[0:1]
1276; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
1277; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
1278; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
1279; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 16, v2
1280; GCN-HSA-NEXT:    v_bfe_i32 v2, v2, 0, 16
1281; GCN-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
1282; GCN-HSA-NEXT:    s_endpgm
1283;
1284; GCN-NOHSA-VI-LABEL: global_sextload_v2i16_to_v2i32:
1285; GCN-NOHSA-VI:       ; %bb.0:
1286; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
1287; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
1288; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
1289; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
1290; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
1291; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1292; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
1293; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
1294; GCN-NOHSA-VI-NEXT:    buffer_load_dword v0, off, s[8:11], 0
1295; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
1296; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
1297; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
1298; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 16, v0
1299; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v0, 0, 16
1300; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1301; GCN-NOHSA-VI-NEXT:    s_endpgm
1302;
1303; EG-LABEL: global_sextload_v2i16_to_v2i32:
1304; EG:       ; %bb.0:
1305; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1306; EG-NEXT:    TEX 0 @6
1307; EG-NEXT:    ALU 5, @9, KC0[CB0:0-32], KC1[]
1308; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XY, T4.X, 1
1309; EG-NEXT:    CF_END
1310; EG-NEXT:    PAD
1311; EG-NEXT:    Fetch clause starting at 6:
1312; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
1313; EG-NEXT:    ALU clause starting at 8:
1314; EG-NEXT:     MOV * T4.X, KC0[2].Z,
1315; EG-NEXT:    ALU clause starting at 9:
1316; EG-NEXT:     BFE_INT T5.X, T4.X, 0.0, literal.x,
1317; EG-NEXT:     LSHR T0.W, T4.X, literal.x,
1318; EG-NEXT:     LSHR * T4.X, KC0[2].Y, literal.y,
1319; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
1320; EG-NEXT:     BFE_INT * T5.Y, PV.W, 0.0, literal.x,
1321; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1322;
1323; CM-LABEL: global_sextload_v2i16_to_v2i32:
1324; CM:       ; %bb.0:
1325; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1326; CM-NEXT:    TEX 0 @6
1327; CM-NEXT:    ALU 5, @9, KC0[CB0:0-32], KC1[]
1328; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T5, T4.X
1329; CM-NEXT:    CF_END
1330; CM-NEXT:    PAD
1331; CM-NEXT:    Fetch clause starting at 6:
1332; CM-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
1333; CM-NEXT:    ALU clause starting at 8:
1334; CM-NEXT:     MOV * T4.X, KC0[2].Z,
1335; CM-NEXT:    ALU clause starting at 9:
1336; CM-NEXT:     BFE_INT T5.X, T4.X, 0.0, literal.x,
1337; CM-NEXT:     LSHR * T0.W, T4.X, literal.x,
1338; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1339; CM-NEXT:     LSHR T4.X, KC0[2].Y, literal.x,
1340; CM-NEXT:     BFE_INT * T5.Y, PV.W, 0.0, literal.y,
1341; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1342  %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
1343  %ext = sext <2 x i16> %load to <2 x i32>
1344  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
1345  ret void
1346}
1347
1348define amdgpu_kernel void @global_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
1349; GCN-NOHSA-SI-LABEL: global_zextload_v3i16_to_v3i32:
1350; GCN-NOHSA-SI:       ; %bb.0: ; %entry
1351; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1352; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
1353; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
1354; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
1355; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
1356; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1357; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
1358; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
1359; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
1360; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
1361; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
1362; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
1363; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
1364; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v2, 0xffff, v0
1365; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v0, 0xffff, v1
1366; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0 offset:8
1367; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[2:3], off, s[4:7], 0
1368; GCN-NOHSA-SI-NEXT:    s_endpgm
1369;
1370; GCN-HSA-LABEL: global_zextload_v3i16_to_v3i32:
1371; GCN-HSA:       ; %bb.0: ; %entry
1372; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1373; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1374; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1375; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1376; GCN-HSA-NEXT:    flat_load_dwordx2 v[3:4], v[0:1]
1377; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s0
1378; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s1
1379; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
1380; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
1381; GCN-HSA-NEXT:    v_and_b32_e32 v2, 0xffff, v4
1382; GCN-HSA-NEXT:    v_and_b32_e32 v0, 0xffff, v3
1383; GCN-HSA-NEXT:    flat_store_dwordx3 v[5:6], v[0:2]
1384; GCN-HSA-NEXT:    s_endpgm
1385;
1386; GCN-NOHSA-VI-LABEL: global_zextload_v3i16_to_v3i32:
1387; GCN-NOHSA-VI:       ; %bb.0: ; %entry
1388; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
1389; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
1390; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
1391; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
1392; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
1393; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1394; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
1395; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
1396; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
1397; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
1398; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
1399; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
1400; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v2, 0xffff, v1
1401; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
1402; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1403; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx3 v[0:2], off, s[4:7], 0
1404; GCN-NOHSA-VI-NEXT:    s_endpgm
1405;
1406; EG-LABEL: global_zextload_v3i16_to_v3i32:
1407; EG:       ; %bb.0: ; %entry
1408; EG-NEXT:    ALU 4, @12, KC0[CB0:0-32], KC1[]
1409; EG-NEXT:    TEX 2 @6
1410; EG-NEXT:    ALU 2, @17, KC0[], KC1[]
1411; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T2.X, T4.X, 0
1412; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T3.XY, T0.X, 1
1413; EG-NEXT:    CF_END
1414; EG-NEXT:    Fetch clause starting at 6:
1415; EG-NEXT:     VTX_READ_16 T2.X, T1.X, 4, #1
1416; EG-NEXT:     VTX_READ_16 T3.X, T1.X, 0, #1
1417; EG-NEXT:     VTX_READ_16 T1.X, T1.X, 2, #1
1418; EG-NEXT:    ALU clause starting at 12:
1419; EG-NEXT:     LSHR T0.X, KC0[2].Y, literal.x,
1420; EG-NEXT:     MOV * T1.X, KC0[2].Z,
1421; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1422; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
1423; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
1424; EG-NEXT:    ALU clause starting at 17:
1425; EG-NEXT:     LSHR T4.X, T0.W, literal.x,
1426; EG-NEXT:     MOV * T3.Y, T1.X,
1427; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1428;
1429; CM-LABEL: global_zextload_v3i16_to_v3i32:
1430; CM:       ; %bb.0: ; %entry
1431; CM-NEXT:    ALU 4, @12, KC0[CB0:0-32], KC1[]
1432; CM-NEXT:    TEX 2 @6
1433; CM-NEXT:    ALU 2, @17, KC0[CB0:0-32], KC1[]
1434; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T3, T4.X
1435; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T2.X, T0.X
1436; CM-NEXT:    CF_END
1437; CM-NEXT:    Fetch clause starting at 6:
1438; CM-NEXT:     VTX_READ_16 T2.X, T1.X, 4, #1
1439; CM-NEXT:     VTX_READ_16 T3.X, T1.X, 0, #1
1440; CM-NEXT:     VTX_READ_16 T1.X, T1.X, 2, #1
1441; CM-NEXT:    ALU clause starting at 12:
1442; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
1443; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
1444; CM-NEXT:     LSHR * T0.X, PV.W, literal.x,
1445; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1446; CM-NEXT:     MOV * T1.X, KC0[2].Z,
1447; CM-NEXT:    ALU clause starting at 17:
1448; CM-NEXT:     LSHR T4.X, KC0[2].Y, literal.x,
1449; CM-NEXT:     MOV * T3.Y, T1.X,
1450; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1451entry:
1452  %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
1453  %ext = zext <3 x i16> %ld to <3 x i32>
1454  store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
1455  ret void
1456}
1457
1458define amdgpu_kernel void @global_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
1459; GCN-NOHSA-SI-LABEL: global_sextload_v3i16_to_v3i32:
1460; GCN-NOHSA-SI:       ; %bb.0: ; %entry
1461; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1462; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
1463; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
1464; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
1465; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
1466; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1467; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
1468; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
1469; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
1470; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
1471; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
1472; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
1473; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v3, 16, v0
1474; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v2, v0, 0, 16
1475; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v1, 0, 16
1476; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0 offset:8
1477; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[2:3], off, s[4:7], 0
1478; GCN-NOHSA-SI-NEXT:    s_endpgm
1479;
1480; GCN-HSA-LABEL: global_sextload_v3i16_to_v3i32:
1481; GCN-HSA:       ; %bb.0: ; %entry
1482; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1483; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1484; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1485; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1486; GCN-HSA-NEXT:    flat_load_dwordx2 v[3:4], v[0:1]
1487; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s0
1488; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s1
1489; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
1490; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 16, v3
1491; GCN-HSA-NEXT:    v_bfe_i32 v2, v4, 0, 16
1492; GCN-HSA-NEXT:    v_bfe_i32 v0, v3, 0, 16
1493; GCN-HSA-NEXT:    flat_store_dwordx3 v[5:6], v[0:2]
1494; GCN-HSA-NEXT:    s_endpgm
1495;
1496; GCN-NOHSA-VI-LABEL: global_sextload_v3i16_to_v3i32:
1497; GCN-NOHSA-VI:       ; %bb.0: ; %entry
1498; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
1499; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
1500; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
1501; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
1502; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
1503; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1504; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
1505; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
1506; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx2 v[3:4], off, s[8:11], 0
1507; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
1508; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
1509; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
1510; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 16, v3
1511; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v2, v4, 0, 16
1512; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v3, 0, 16
1513; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx3 v[0:2], off, s[4:7], 0
1514; GCN-NOHSA-VI-NEXT:    s_endpgm
1515;
1516; EG-LABEL: global_sextload_v3i16_to_v3i32:
1517; EG:       ; %bb.0: ; %entry
1518; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
1519; EG-NEXT:    TEX 2 @6
1520; EG-NEXT:    ALU 9, @13, KC0[CB0:0-32], KC1[]
1521; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0
1522; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
1523; EG-NEXT:    CF_END
1524; EG-NEXT:    Fetch clause starting at 6:
1525; EG-NEXT:     VTX_READ_16 T1.X, T0.X, 2, #1
1526; EG-NEXT:     VTX_READ_16 T2.X, T0.X, 4, #1
1527; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
1528; EG-NEXT:    ALU clause starting at 12:
1529; EG-NEXT:     MOV * T0.X, KC0[2].Z,
1530; EG-NEXT:    ALU clause starting at 13:
1531; EG-NEXT:     BFE_INT * T0.Y, T1.X, 0.0, literal.x,
1532; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1533; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
1534; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
1535; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
1536; EG-NEXT:     BFE_INT T2.X, T2.X, 0.0, literal.x,
1537; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1538; EG-NEXT:    16(2.242078e-44), 8(1.121039e-44)
1539; EG-NEXT:     LSHR * T3.X, PV.W, literal.x,
1540; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1541;
1542; CM-LABEL: global_sextload_v3i16_to_v3i32:
1543; CM:       ; %bb.0: ; %entry
1544; CM-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
1545; CM-NEXT:    TEX 2 @6
1546; CM-NEXT:    ALU 9, @13, KC0[CB0:0-32], KC1[]
1547; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0, T2.X
1548; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T1.X, T3.X
1549; CM-NEXT:    CF_END
1550; CM-NEXT:    Fetch clause starting at 6:
1551; CM-NEXT:     VTX_READ_16 T1.X, T0.X, 4, #1
1552; CM-NEXT:     VTX_READ_16 T2.X, T0.X, 0, #1
1553; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 2, #1
1554; CM-NEXT:    ALU clause starting at 12:
1555; CM-NEXT:     MOV * T0.X, KC0[2].Z,
1556; CM-NEXT:    ALU clause starting at 13:
1557; CM-NEXT:     BFE_INT T1.X, T1.X, 0.0, literal.x,
1558; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1559; CM-NEXT:    16(2.242078e-44), 8(1.121039e-44)
1560; CM-NEXT:     LSHR T3.X, PV.W, literal.x,
1561; CM-NEXT:     BFE_INT * T0.Y, T0.X, 0.0, literal.y,
1562; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1563; CM-NEXT:     BFE_INT * T0.X, T2.X, 0.0, literal.x,
1564; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1565; CM-NEXT:     LSHR * T2.X, KC0[2].Y, literal.x,
1566; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1567entry:
1568  %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
1569  %ext = sext <3 x i16> %ld to <3 x i32>
1570  store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
1571  ret void
1572}
1573
1574; TODO: This should use DST, but for some there are redundant MOVs
1575define amdgpu_kernel void @global_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
1576; GCN-NOHSA-SI-LABEL: global_zextload_v4i16_to_v4i32:
1577; GCN-NOHSA-SI:       ; %bb.0:
1578; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1579; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
1580; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
1581; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
1582; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
1583; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1584; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
1585; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
1586; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx2 v[4:5], off, s[8:11], 0
1587; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
1588; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
1589; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
1590; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v3, 16, v5
1591; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
1592; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v2, 0xffff, v5
1593; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v0, 0xffff, v4
1594; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1595; GCN-NOHSA-SI-NEXT:    s_endpgm
1596;
1597; GCN-HSA-LABEL: global_zextload_v4i16_to_v4i32:
1598; GCN-HSA:       ; %bb.0:
1599; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1600; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1601; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1602; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1603; GCN-HSA-NEXT:    flat_load_dwordx2 v[4:5], v[0:1]
1604; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s0
1605; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s1
1606; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
1607; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v3, 16, v5
1608; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
1609; GCN-HSA-NEXT:    v_and_b32_e32 v2, 0xffff, v5
1610; GCN-HSA-NEXT:    v_and_b32_e32 v0, 0xffff, v4
1611; GCN-HSA-NEXT:    flat_store_dwordx4 v[6:7], v[0:3]
1612; GCN-HSA-NEXT:    s_endpgm
1613;
1614; GCN-NOHSA-VI-LABEL: global_zextload_v4i16_to_v4i32:
1615; GCN-NOHSA-VI:       ; %bb.0:
1616; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
1617; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
1618; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
1619; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
1620; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
1621; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1622; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
1623; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
1624; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
1625; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
1626; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
1627; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
1628; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
1629; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v2, 0xffff, v1
1630; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
1631; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1632; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1633; GCN-NOHSA-VI-NEXT:    s_endpgm
1634;
1635; EG-LABEL: global_zextload_v4i16_to_v4i32:
1636; EG:       ; %bb.0:
1637; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1638; EG-NEXT:    TEX 0 @6
1639; EG-NEXT:    ALU 12, @9, KC0[CB0:0-32], KC1[]
1640; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1
1641; EG-NEXT:    CF_END
1642; EG-NEXT:    PAD
1643; EG-NEXT:    Fetch clause starting at 6:
1644; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
1645; EG-NEXT:    ALU clause starting at 8:
1646; EG-NEXT:     MOV * T5.X, KC0[2].Z,
1647; EG-NEXT:    ALU clause starting at 9:
1648; EG-NEXT:     MOV T2.X, T5.X,
1649; EG-NEXT:     MOV * T3.X, T5.Y,
1650; EG-NEXT:     MOV T0.Y, PV.X,
1651; EG-NEXT:     MOV * T0.Z, PS,
1652; EG-NEXT:     LSHR * T5.W, PV.Z, literal.x,
1653; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1654; EG-NEXT:     AND_INT * T5.Z, T0.Z, literal.x,
1655; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1656; EG-NEXT:     LSHR * T5.Y, T0.Y, literal.x,
1657; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1658; EG-NEXT:     AND_INT T5.X, T0.Y, literal.x,
1659; EG-NEXT:     LSHR * T6.X, KC0[2].Y, literal.y,
1660; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
1661;
1662; CM-LABEL: global_zextload_v4i16_to_v4i32:
1663; CM:       ; %bb.0:
1664; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1665; CM-NEXT:    TEX 0 @6
1666; CM-NEXT:    ALU 13, @9, KC0[CB0:0-32], KC1[]
1667; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T5, T6.X
1668; CM-NEXT:    CF_END
1669; CM-NEXT:    PAD
1670; CM-NEXT:    Fetch clause starting at 6:
1671; CM-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
1672; CM-NEXT:    ALU clause starting at 8:
1673; CM-NEXT:     MOV * T5.X, KC0[2].Z,
1674; CM-NEXT:    ALU clause starting at 9:
1675; CM-NEXT:     MOV * T2.X, T5.X,
1676; CM-NEXT:     MOV T3.X, T5.Y,
1677; CM-NEXT:     MOV * T0.Y, PV.X,
1678; CM-NEXT:     MOV * T0.Z, PV.X,
1679; CM-NEXT:     LSHR * T5.W, PV.Z, literal.x,
1680; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1681; CM-NEXT:     AND_INT * T5.Z, T0.Z, literal.x,
1682; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1683; CM-NEXT:     LSHR * T5.Y, T0.Y, literal.x,
1684; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1685; CM-NEXT:     AND_INT * T5.X, T0.Y, literal.x,
1686; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1687; CM-NEXT:     LSHR * T6.X, KC0[2].Y, literal.x,
1688; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1689  %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
1690  %ext = zext <4 x i16> %load to <4 x i32>
1691  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
1692  ret void
1693}
1694
1695; TODO: We should use ASHR instead of LSHR + BFE
1696; TODO: This should use DST, but for some there are redundant MOVs
1697define amdgpu_kernel void @global_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
1698; GCN-NOHSA-SI-LABEL: global_sextload_v4i16_to_v4i32:
1699; GCN-NOHSA-SI:       ; %bb.0:
1700; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1701; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
1702; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
1703; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
1704; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
1705; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1706; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
1707; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
1708; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx2 v[3:4], off, s[8:11], 0
1709; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
1710; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
1711; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
1712; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 16, v3
1713; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[5:6], v[3:4], 48
1714; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v2, v4, 0, 16
1715; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v3, 0, 16
1716; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v5
1717; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1718; GCN-NOHSA-SI-NEXT:    s_endpgm
1719;
1720; GCN-HSA-LABEL: global_sextload_v4i16_to_v4i32:
1721; GCN-HSA:       ; %bb.0:
1722; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1723; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1724; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1725; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1726; GCN-HSA-NEXT:    flat_load_dwordx2 v[3:4], v[0:1]
1727; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s0
1728; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s1
1729; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
1730; GCN-HSA-NEXT:    v_ashr_i64 v[7:8], v[3:4], 48
1731; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 16, v3
1732; GCN-HSA-NEXT:    v_bfe_i32 v2, v4, 0, 16
1733; GCN-HSA-NEXT:    v_bfe_i32 v0, v3, 0, 16
1734; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v7
1735; GCN-HSA-NEXT:    flat_store_dwordx4 v[5:6], v[0:3]
1736; GCN-HSA-NEXT:    s_endpgm
1737;
1738; GCN-NOHSA-VI-LABEL: global_sextload_v4i16_to_v4i32:
1739; GCN-NOHSA-VI:       ; %bb.0:
1740; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
1741; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
1742; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
1743; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
1744; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
1745; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1746; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
1747; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
1748; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx2 v[4:5], off, s[8:11], 0
1749; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
1750; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
1751; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
1752; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v3, 16, v5
1753; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 16, v4
1754; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v2, v5, 0, 16
1755; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v4, 0, 16
1756; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1757; GCN-NOHSA-VI-NEXT:    s_endpgm
1758;
1759; EG-LABEL: global_sextload_v4i16_to_v4i32:
1760; EG:       ; %bb.0:
1761; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1762; EG-NEXT:    TEX 0 @6
1763; EG-NEXT:    ALU 14, @9, KC0[CB0:0-32], KC1[]
1764; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1
1765; EG-NEXT:    CF_END
1766; EG-NEXT:    PAD
1767; EG-NEXT:    Fetch clause starting at 6:
1768; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
1769; EG-NEXT:    ALU clause starting at 8:
1770; EG-NEXT:     MOV * T5.X, KC0[2].Z,
1771; EG-NEXT:    ALU clause starting at 9:
1772; EG-NEXT:     MOV T2.X, T5.X,
1773; EG-NEXT:     MOV * T3.X, T5.Y,
1774; EG-NEXT:     MOV T0.Y, PV.X,
1775; EG-NEXT:     MOV * T0.Z, PS,
1776; EG-NEXT:     BFE_INT * T5.Z, PV.Z, 0.0, literal.x,
1777; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1778; EG-NEXT:     BFE_INT T5.X, T0.Y, 0.0, literal.x,
1779; EG-NEXT:     LSHR * T0.W, T0.Z, literal.x,
1780; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1781; EG-NEXT:     BFE_INT T5.W, PV.W, 0.0, literal.x,
1782; EG-NEXT:     LSHR * T0.W, T0.Y, literal.x,
1783; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1784; EG-NEXT:     LSHR T6.X, KC0[2].Y, literal.x,
1785; EG-NEXT:     BFE_INT * T5.Y, PS, 0.0, literal.y,
1786; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1787;
1788; CM-LABEL: global_sextload_v4i16_to_v4i32:
1789; CM:       ; %bb.0:
1790; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1791; CM-NEXT:    TEX 0 @6
1792; CM-NEXT:    ALU 14, @9, KC0[CB0:0-32], KC1[]
1793; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T5, T6.X
1794; CM-NEXT:    CF_END
1795; CM-NEXT:    PAD
1796; CM-NEXT:    Fetch clause starting at 6:
1797; CM-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
1798; CM-NEXT:    ALU clause starting at 8:
1799; CM-NEXT:     MOV * T5.X, KC0[2].Z,
1800; CM-NEXT:    ALU clause starting at 9:
1801; CM-NEXT:     MOV * T2.X, T5.X,
1802; CM-NEXT:     MOV T3.X, T5.Y,
1803; CM-NEXT:     MOV * T0.Y, PV.X,
1804; CM-NEXT:     MOV * T0.Z, PV.X,
1805; CM-NEXT:     BFE_INT * T5.Z, PV.Z, 0.0, literal.x,
1806; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1807; CM-NEXT:     BFE_INT T5.X, T0.Y, 0.0, literal.x,
1808; CM-NEXT:     LSHR * T0.W, T0.Z, literal.x,
1809; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1810; CM-NEXT:     LSHR T0.Z, T0.Y, literal.x,
1811; CM-NEXT:     BFE_INT * T5.W, PV.W, 0.0, literal.x,
1812; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1813; CM-NEXT:     LSHR T6.X, KC0[2].Y, literal.x,
1814; CM-NEXT:     BFE_INT * T5.Y, PV.Z, 0.0, literal.y,
1815; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1816  %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
1817  %ext = sext <4 x i16> %load to <4 x i32>
1818  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
1819  ret void
1820}
1821
1822; TODO: These should use LSHR instead of BFE_UINT
1823define amdgpu_kernel void @global_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
1824; GCN-NOHSA-SI-LABEL: global_zextload_v8i16_to_v8i32:
1825; GCN-NOHSA-SI:       ; %bb.0:
1826; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1827; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
1828; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
1829; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
1830; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
1831; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1832; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
1833; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
1834; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
1835; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
1836; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
1837; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
1838; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
1839; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
1840; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v11, 16, v3
1841; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v9, 16, v2
1842; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v6, 0xffff, v1
1843; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v4, 0xffff, v0
1844; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v10, 0xffff, v3
1845; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v8, 0xffff, v2
1846; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:16
1847; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
1848; GCN-NOHSA-SI-NEXT:    s_endpgm
1849;
1850; GCN-HSA-LABEL: global_zextload_v8i16_to_v8i32:
1851; GCN-HSA:       ; %bb.0:
1852; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1853; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1854; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1855; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1856; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
1857; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
1858; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
1859; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s3
1860; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s1
1861; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s2
1862; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s0
1863; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
1864; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v11, 16, v3
1865; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v9, 16, v2
1866; GCN-HSA-NEXT:    v_and_b32_e32 v10, 0xffff, v3
1867; GCN-HSA-NEXT:    v_and_b32_e32 v8, 0xffff, v2
1868; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
1869; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
1870; GCN-HSA-NEXT:    v_and_b32_e32 v6, 0xffff, v1
1871; GCN-HSA-NEXT:    v_and_b32_e32 v4, 0xffff, v0
1872; GCN-HSA-NEXT:    flat_store_dwordx4 v[14:15], v[8:11]
1873; GCN-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[4:7]
1874; GCN-HSA-NEXT:    s_endpgm
1875;
1876; GCN-NOHSA-VI-LABEL: global_zextload_v8i16_to_v8i32:
1877; GCN-NOHSA-VI:       ; %bb.0:
1878; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
1879; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
1880; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
1881; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
1882; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
1883; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1884; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
1885; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
1886; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
1887; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
1888; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
1889; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
1890; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v11, 16, v3
1891; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v10, 0xffff, v3
1892; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v9, 16, v2
1893; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v8, 0xffff, v2
1894; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
1895; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v6, 0xffff, v1
1896; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
1897; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v4, 0xffff, v0
1898; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:16
1899; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
1900; GCN-NOHSA-VI-NEXT:    s_endpgm
1901;
1902; EG-LABEL: global_zextload_v8i16_to_v8i32:
1903; EG:       ; %bb.0:
1904; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1905; EG-NEXT:    TEX 0 @6
1906; EG-NEXT:    ALU 17, @9, KC0[CB0:0-32], KC1[]
1907; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
1908; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
1909; EG-NEXT:    CF_END
1910; EG-NEXT:    Fetch clause starting at 6:
1911; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
1912; EG-NEXT:    ALU clause starting at 8:
1913; EG-NEXT:     MOV * T7.X, KC0[2].Z,
1914; EG-NEXT:    ALU clause starting at 9:
1915; EG-NEXT:     LSHR * T8.W, T7.Y, literal.x,
1916; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1917; EG-NEXT:     AND_INT * T8.Z, T7.Y, literal.x,
1918; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1919; EG-NEXT:     LSHR T8.Y, T7.X, literal.x,
1920; EG-NEXT:     LSHR * T9.W, T7.W, literal.x,
1921; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1922; EG-NEXT:     AND_INT T8.X, T7.X, literal.x,
1923; EG-NEXT:     AND_INT T9.Z, T7.W, literal.x,
1924; EG-NEXT:     LSHR * T7.X, KC0[2].Y, literal.y,
1925; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
1926; EG-NEXT:     LSHR * T9.Y, T7.Z, literal.x,
1927; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1928; EG-NEXT:     AND_INT T9.X, T7.Z, literal.x,
1929; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1930; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
1931; EG-NEXT:     LSHR * T10.X, PV.W, literal.x,
1932; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1933;
1934; CM-LABEL: global_zextload_v8i16_to_v8i32:
1935; CM:       ; %bb.0:
1936; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1937; CM-NEXT:    TEX 0 @6
1938; CM-NEXT:    ALU 17, @9, KC0[CB0:0-32], KC1[]
1939; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T7, T10.X
1940; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T8, T9.X
1941; CM-NEXT:    CF_END
1942; CM-NEXT:    Fetch clause starting at 6:
1943; CM-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
1944; CM-NEXT:    ALU clause starting at 8:
1945; CM-NEXT:     MOV * T7.X, KC0[2].Z,
1946; CM-NEXT:    ALU clause starting at 9:
1947; CM-NEXT:     LSHR * T8.W, T7.W, literal.x,
1948; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1949; CM-NEXT:     AND_INT * T8.Z, T7.W, literal.x,
1950; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1951; CM-NEXT:     LSHR T8.Y, T7.Z, literal.x,
1952; CM-NEXT:     LSHR * T7.W, T7.Y, literal.x,
1953; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1954; CM-NEXT:     AND_INT T8.X, T7.Z, literal.x,
1955; CM-NEXT:     AND_INT T7.Z, T7.Y, literal.x,
1956; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1957; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
1958; CM-NEXT:     LSHR T9.X, PV.W, literal.x,
1959; CM-NEXT:     LSHR * T7.Y, T7.X, literal.y,
1960; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1961; CM-NEXT:     AND_INT * T7.X, T7.X, literal.x,
1962; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1963; CM-NEXT:     LSHR * T10.X, KC0[2].Y, literal.x,
1964; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1965  %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
1966  %ext = zext <8 x i16> %load to <8 x i32>
1967  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
1968  ret void
1969}
1970
1971; TODO: These should use ASHR instead of LSHR + BFE_INT
1972define amdgpu_kernel void @global_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
1973; GCN-NOHSA-SI-LABEL: global_sextload_v8i16_to_v8i32:
1974; GCN-NOHSA-SI:       ; %bb.0:
1975; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1976; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
1977; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
1978; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
1979; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
1980; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1981; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
1982; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
1983; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
1984; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
1985; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
1986; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
1987; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v7, 16, v1
1988; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v5, 16, v0
1989; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v6, v1, 0, 16
1990; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v4, v0, 0, 16
1991; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v11, 16, v3
1992; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v9, 16, v2
1993; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v10, v3, 0, 16
1994; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v8, v2, 0, 16
1995; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:16
1996; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
1997; GCN-NOHSA-SI-NEXT:    s_endpgm
1998;
1999; GCN-HSA-LABEL: global_sextload_v8i16_to_v8i32:
2000; GCN-HSA:       ; %bb.0:
2001; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
2002; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2003; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
2004; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
2005; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
2006; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
2007; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2008; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s3
2009; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s1
2010; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s2
2011; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s0
2012; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
2013; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v11, 16, v3
2014; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v9, 16, v2
2015; GCN-HSA-NEXT:    v_bfe_i32 v10, v3, 0, 16
2016; GCN-HSA-NEXT:    v_bfe_i32 v8, v2, 0, 16
2017; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v7, 16, v1
2018; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v5, 16, v0
2019; GCN-HSA-NEXT:    v_bfe_i32 v6, v1, 0, 16
2020; GCN-HSA-NEXT:    v_bfe_i32 v4, v0, 0, 16
2021; GCN-HSA-NEXT:    flat_store_dwordx4 v[14:15], v[8:11]
2022; GCN-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[4:7]
2023; GCN-HSA-NEXT:    s_endpgm
2024;
2025; GCN-NOHSA-VI-LABEL: global_sextload_v8i16_to_v8i32:
2026; GCN-NOHSA-VI:       ; %bb.0:
2027; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
2028; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
2029; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
2030; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
2031; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
2032; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
2033; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
2034; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
2035; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
2036; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
2037; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
2038; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
2039; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v11, 16, v3
2040; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v9, 16, v2
2041; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v10, v3, 0, 16
2042; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v8, v2, 0, 16
2043; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v7, 16, v1
2044; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v5, 16, v0
2045; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v6, v1, 0, 16
2046; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v4, v0, 0, 16
2047; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:16
2048; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
2049; GCN-NOHSA-VI-NEXT:    s_endpgm
2050;
2051; EG-LABEL: global_sextload_v8i16_to_v8i32:
2052; EG:       ; %bb.0:
2053; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
2054; EG-NEXT:    TEX 0 @6
2055; EG-NEXT:    ALU 19, @9, KC0[CB0:0-32], KC1[]
2056; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
2057; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
2058; EG-NEXT:    CF_END
2059; EG-NEXT:    Fetch clause starting at 6:
2060; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
2061; EG-NEXT:    ALU clause starting at 8:
2062; EG-NEXT:     MOV * T7.X, KC0[2].Z,
2063; EG-NEXT:    ALU clause starting at 9:
2064; EG-NEXT:     BFE_INT * T8.Z, T7.Y, 0.0, literal.x,
2065; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2066; EG-NEXT:     BFE_INT T8.X, T7.X, 0.0, literal.x,
2067; EG-NEXT:     BFE_INT T9.Z, T7.W, 0.0, literal.x,
2068; EG-NEXT:     LSHR * T0.W, T7.Y, literal.x,
2069; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2070; EG-NEXT:     BFE_INT T9.X, T7.Z, 0.0, literal.x,
2071; EG-NEXT:     LSHR T0.Z, T7.W, literal.x,
2072; EG-NEXT:     BFE_INT T8.W, PV.W, 0.0, literal.x,
2073; EG-NEXT:     LSHR * T0.W, T7.X, literal.x,
2074; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2075; EG-NEXT:     LSHR T7.X, KC0[2].Y, literal.x,
2076; EG-NEXT:     BFE_INT T8.Y, PS, 0.0, literal.y,
2077; EG-NEXT:     LSHR T1.Z, T7.Z, literal.y,
2078; EG-NEXT:     BFE_INT T9.W, PV.Z, 0.0, literal.y,
2079; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2080; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2081; EG-NEXT:     LSHR T10.X, PS, literal.x,
2082; EG-NEXT:     BFE_INT * T9.Y, PV.Z, 0.0, literal.y,
2083; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2084;
2085; CM-LABEL: global_sextload_v8i16_to_v8i32:
2086; CM:       ; %bb.0:
2087; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
2088; CM-NEXT:    TEX 0 @6
2089; CM-NEXT:    ALU 19, @9, KC0[CB0:0-32], KC1[]
2090; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T9, T7.X
2091; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T8, T10.X
2092; CM-NEXT:    CF_END
2093; CM-NEXT:    Fetch clause starting at 6:
2094; CM-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
2095; CM-NEXT:    ALU clause starting at 8:
2096; CM-NEXT:     MOV * T7.X, KC0[2].Z,
2097; CM-NEXT:    ALU clause starting at 9:
2098; CM-NEXT:     BFE_INT * T8.Z, T7.W, 0.0, literal.x,
2099; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2100; CM-NEXT:     BFE_INT T8.X, T7.Z, 0.0, literal.x,
2101; CM-NEXT:     LSHR T0.Y, T7.Y, literal.x,
2102; CM-NEXT:     BFE_INT T9.Z, T7.Y, 0.0, literal.x,
2103; CM-NEXT:     LSHR * T0.W, T7.W, literal.x,
2104; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2105; CM-NEXT:     BFE_INT T9.X, T7.X, 0.0, literal.x,
2106; CM-NEXT:     LSHR T1.Y, T7.Z, literal.x,
2107; CM-NEXT:     ADD_INT T0.Z, KC0[2].Y, literal.x,
2108; CM-NEXT:     BFE_INT * T8.W, PV.W, 0.0, literal.x,
2109; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2110; CM-NEXT:     LSHR T10.X, PV.Z, literal.x,
2111; CM-NEXT:     BFE_INT T8.Y, PV.Y, 0.0, literal.y,
2112; CM-NEXT:     LSHR T0.Z, T7.X, literal.y,
2113; CM-NEXT:     BFE_INT * T9.W, T0.Y, 0.0, literal.y,
2114; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2115; CM-NEXT:     LSHR T7.X, KC0[2].Y, literal.x,
2116; CM-NEXT:     BFE_INT * T9.Y, PV.Z, 0.0, literal.y,
2117; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2118  %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
2119  %ext = sext <8 x i16> %load to <8 x i32>
2120  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
2121  ret void
2122}
2123
2124define amdgpu_kernel void @global_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
2125; GCN-NOHSA-SI-LABEL: global_zextload_v16i16_to_v16i32:
2126; GCN-NOHSA-SI:       ; %bb.0:
2127; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
2128; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
2129; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
2130; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s2
2131; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s3
2132; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2133; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s6
2134; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s7
2135; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
2136; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s4
2137; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s5
2138; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
2139; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(1)
2140; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v11, 16, v1
2141; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v9, 16, v0
2142; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v15, 16, v3
2143; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v13, 16, v2
2144; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
2145; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v19, 16, v5
2146; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v17, 16, v4
2147; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v10, 0xffff, v1
2148; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v8, 0xffff, v0
2149; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v14, 0xffff, v3
2150; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v12, 0xffff, v2
2151; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v3, 16, v7
2152; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v6
2153; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v18, 0xffff, v5
2154; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v16, 0xffff, v4
2155; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v2, 0xffff, v7
2156; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v0, 0xffff, v6
2157; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
2158; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:32
2159; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16
2160; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0
2161; GCN-NOHSA-SI-NEXT:    s_endpgm
2162;
2163; GCN-HSA-LABEL: global_zextload_v16i16_to_v16i32:
2164; GCN-HSA:       ; %bb.0:
2165; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
2166; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2167; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
2168; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
2169; GCN-HSA-NEXT:    s_add_u32 s2, s2, 16
2170; GCN-HSA-NEXT:    s_addc_u32 s3, s3, 0
2171; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2172; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
2173; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2174; GCN-HSA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
2175; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
2176; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2177; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s3
2178; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s2
2179; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
2180; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s1
2181; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2182; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s0
2183; GCN-HSA-NEXT:    s_add_u32 s0, s0, 32
2184; GCN-HSA-NEXT:    s_addc_u32 s1, s1, 0
2185; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s3
2186; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s2
2187; GCN-HSA-NEXT:    s_waitcnt vmcnt(1)
2188; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v11, 16, v3
2189; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v9, 16, v2
2190; GCN-HSA-NEXT:    v_and_b32_e32 v10, 0xffff, v3
2191; GCN-HSA-NEXT:    v_and_b32_e32 v8, 0xffff, v2
2192; GCN-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[8:11]
2193; GCN-HSA-NEXT:    s_waitcnt vmcnt(1)
2194; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v13, 16, v6
2195; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v11, 16, v5
2196; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v9, 16, v4
2197; GCN-HSA-NEXT:    v_and_b32_e32 v10, 0xffff, v5
2198; GCN-HSA-NEXT:    v_and_b32_e32 v8, 0xffff, v4
2199; GCN-HSA-NEXT:    v_and_b32_e32 v12, 0xffff, v6
2200; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
2201; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
2202; GCN-HSA-NEXT:    v_and_b32_e32 v3, 0xffff, v1
2203; GCN-HSA-NEXT:    v_and_b32_e32 v1, 0xffff, v0
2204; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s1
2205; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v15, 16, v7
2206; GCN-HSA-NEXT:    v_and_b32_e32 v14, 0xffff, v7
2207; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s0
2208; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[1:4]
2209; GCN-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[12:15]
2210; GCN-HSA-NEXT:    flat_store_dwordx4 v[5:6], v[8:11]
2211; GCN-HSA-NEXT:    s_endpgm
2212;
2213; GCN-NOHSA-VI-LABEL: global_zextload_v16i16_to_v16i32:
2214; GCN-NOHSA-VI:       ; %bb.0:
2215; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
2216; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
2217; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
2218; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
2219; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
2220; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
2221; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
2222; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
2223; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
2224; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
2225; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
2226; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
2227; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(1)
2228; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v11, 16, v1
2229; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
2230; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v19, 16, v7
2231; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v18, 0xffff, v7
2232; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v17, 16, v6
2233; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v16, 0xffff, v6
2234; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v10, 0xffff, v1
2235; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v9, 16, v0
2236; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v8, 0xffff, v0
2237; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v15, 16, v3
2238; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v14, 0xffff, v3
2239; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v13, 16, v2
2240; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v12, 0xffff, v2
2241; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v5
2242; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v2, 0xffff, v5
2243; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
2244; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, 0xffff, v4
2245; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48
2246; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2247; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16
2248; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0
2249; GCN-NOHSA-VI-NEXT:    s_endpgm
2250;
2251; EG-LABEL: global_zextload_v16i16_to_v16i32:
2252; EG:       ; %bb.0:
2253; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
2254; EG-NEXT:    TEX 1 @8
2255; EG-NEXT:    ALU 35, @13, KC0[CB0:0-32], KC1[]
2256; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T18.X, 0
2257; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T11.X, 0
2258; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0
2259; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T12.X, 1
2260; EG-NEXT:    CF_END
2261; EG-NEXT:    Fetch clause starting at 8:
2262; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 0, #1
2263; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 16, #1
2264; EG-NEXT:    ALU clause starting at 12:
2265; EG-NEXT:     MOV * T11.X, KC0[2].Z,
2266; EG-NEXT:    ALU clause starting at 13:
2267; EG-NEXT:     LSHR * T13.W, T12.Y, literal.x,
2268; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2269; EG-NEXT:     AND_INT * T13.Z, T12.Y, literal.x,
2270; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2271; EG-NEXT:     LSHR T13.Y, T12.X, literal.x,
2272; EG-NEXT:     LSHR * T14.W, T12.W, literal.x,
2273; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2274; EG-NEXT:     AND_INT T13.X, T12.X, literal.x,
2275; EG-NEXT:     AND_INT T14.Z, T12.W, literal.x,
2276; EG-NEXT:     LSHR * T12.X, KC0[2].Y, literal.y,
2277; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
2278; EG-NEXT:     LSHR T14.Y, T12.Z, literal.x,
2279; EG-NEXT:     LSHR * T15.W, T11.Y, literal.x,
2280; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2281; EG-NEXT:     AND_INT T14.X, T12.Z, literal.x,
2282; EG-NEXT:     AND_INT T15.Z, T11.Y, literal.x,
2283; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2284; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
2285; EG-NEXT:     LSHR T16.X, PV.W, literal.x,
2286; EG-NEXT:     LSHR T15.Y, T11.X, literal.y,
2287; EG-NEXT:     LSHR T17.W, T11.W, literal.y,
2288; EG-NEXT:     AND_INT * T15.X, T11.X, literal.z,
2289; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2290; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2291; EG-NEXT:     AND_INT T17.Z, T11.W, literal.x,
2292; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2293; EG-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
2294; EG-NEXT:     LSHR T11.X, PV.W, literal.x,
2295; EG-NEXT:     LSHR T17.Y, T11.Z, literal.y,
2296; EG-NEXT:     AND_INT * T17.X, T11.Z, literal.z,
2297; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2298; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2299; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
2300; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
2301; EG-NEXT:     LSHR * T18.X, PV.W, literal.x,
2302; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
2303;
2304; CM-LABEL: global_zextload_v16i16_to_v16i32:
2305; CM:       ; %bb.0:
2306; CM-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
2307; CM-NEXT:    TEX 1 @8
2308; CM-NEXT:    ALU 33, @13, KC0[CB0:0-32], KC1[]
2309; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T11, T18.X
2310; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T15, T17.X
2311; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T12, T16.X
2312; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T13, T14.X
2313; CM-NEXT:    CF_END
2314; CM-NEXT:    Fetch clause starting at 8:
2315; CM-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
2316; CM-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
2317; CM-NEXT:    ALU clause starting at 12:
2318; CM-NEXT:     MOV * T11.X, KC0[2].Z,
2319; CM-NEXT:    ALU clause starting at 13:
2320; CM-NEXT:     LSHR * T13.W, T12.W, literal.x,
2321; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2322; CM-NEXT:     AND_INT * T13.Z, T12.W, literal.x,
2323; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2324; CM-NEXT:     LSHR T13.Y, T12.Z, literal.x,
2325; CM-NEXT:     LSHR * T12.W, T12.Y, literal.x,
2326; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2327; CM-NEXT:     AND_INT T13.X, T12.Z, literal.x,
2328; CM-NEXT:     AND_INT T12.Z, T12.Y, literal.x,
2329; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2330; CM-NEXT:    65535(9.183409e-41), 48(6.726233e-44)
2331; CM-NEXT:     LSHR T14.X, PV.W, literal.x,
2332; CM-NEXT:     LSHR T12.Y, T12.X, literal.y,
2333; CM-NEXT:     LSHR * T15.W, T11.W, literal.y,
2334; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2335; CM-NEXT:     AND_INT T12.X, T12.X, literal.x,
2336; CM-NEXT:     AND_INT T15.Z, T11.W, literal.x,
2337; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2338; CM-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
2339; CM-NEXT:     LSHR T16.X, PV.W, literal.x,
2340; CM-NEXT:     LSHR T15.Y, T11.Z, literal.y,
2341; CM-NEXT:     LSHR * T11.W, T11.Y, literal.y,
2342; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2343; CM-NEXT:     AND_INT T15.X, T11.Z, literal.x,
2344; CM-NEXT:     AND_INT T11.Z, T11.Y, literal.x,
2345; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2346; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
2347; CM-NEXT:     LSHR T17.X, PV.W, literal.x,
2348; CM-NEXT:     LSHR * T11.Y, T11.X, literal.y,
2349; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2350; CM-NEXT:     AND_INT * T11.X, T11.X, literal.x,
2351; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2352; CM-NEXT:     LSHR * T18.X, KC0[2].Y, literal.x,
2353; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
2354  %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
2355  %ext = zext <16 x i16> %load to <16 x i32>
2356  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
2357  ret void
2358}
2359
2360define amdgpu_kernel void @global_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
2361; GCN-NOHSA-SI-LABEL: global_sextload_v16i16_to_v16i32:
2362; GCN-NOHSA-SI:       ; %bb.0:
2363; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
2364; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
2365; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
2366; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s2
2367; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s3
2368; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2369; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s6
2370; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s7
2371; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
2372; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s4
2373; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s5
2374; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
2375; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(1)
2376; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v11, 16, v1
2377; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v9, 16, v0
2378; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v10, v1, 0, 16
2379; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v8, v0, 0, 16
2380; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v15, 16, v3
2381; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v13, 16, v2
2382; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v14, v3, 0, 16
2383; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v12, v2, 0, 16
2384; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
2385; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v3, 16, v5
2386; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 16, v4
2387; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v2, v5, 0, 16
2388; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v4, 0, 16
2389; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v19, 16, v7
2390; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v17, 16, v6
2391; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v18, v7, 0, 16
2392; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v16, v6, 0, 16
2393; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48
2394; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2395; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16
2396; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0
2397; GCN-NOHSA-SI-NEXT:    s_endpgm
2398;
2399; GCN-HSA-LABEL: global_sextload_v16i16_to_v16i32:
2400; GCN-HSA:       ; %bb.0:
2401; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
2402; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2403; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
2404; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
2405; GCN-HSA-NEXT:    s_add_u32 s2, s2, 16
2406; GCN-HSA-NEXT:    s_addc_u32 s3, s3, 0
2407; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2408; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
2409; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2410; GCN-HSA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
2411; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
2412; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2413; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s3
2414; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s2
2415; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
2416; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s1
2417; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2418; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s0
2419; GCN-HSA-NEXT:    s_add_u32 s0, s0, 32
2420; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s3
2421; GCN-HSA-NEXT:    s_addc_u32 s1, s1, 0
2422; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s2
2423; GCN-HSA-NEXT:    v_mov_b32_e32 v23, s1
2424; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s0
2425; GCN-HSA-NEXT:    s_waitcnt vmcnt(1)
2426; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v9, 16, v0
2427; GCN-HSA-NEXT:    v_bfe_i32 v10, v1, 0, 16
2428; GCN-HSA-NEXT:    v_bfe_i32 v8, v0, 0, 16
2429; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v15, 16, v3
2430; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v13, 16, v2
2431; GCN-HSA-NEXT:    v_bfe_i32 v14, v3, 0, 16
2432; GCN-HSA-NEXT:    v_bfe_i32 v12, v2, 0, 16
2433; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v11, 16, v1
2434; GCN-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[12:15]
2435; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
2436; GCN-HSA-NEXT:    s_waitcnt vmcnt(2)
2437; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 16, v5
2438; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v10, 16, v7
2439; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v8, 16, v6
2440; GCN-HSA-NEXT:    v_bfe_i32 v9, v7, 0, 16
2441; GCN-HSA-NEXT:    v_bfe_i32 v7, v6, 0, 16
2442; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 16, v4
2443; GCN-HSA-NEXT:    v_bfe_i32 v2, v5, 0, 16
2444; GCN-HSA-NEXT:    v_bfe_i32 v0, v4, 0, 16
2445; GCN-HSA-NEXT:    flat_store_dwordx4 v[20:21], v[7:10]
2446; GCN-HSA-NEXT:    flat_store_dwordx4 v[22:23], v[0:3]
2447; GCN-HSA-NEXT:    s_endpgm
2448;
2449; GCN-NOHSA-VI-LABEL: global_sextload_v16i16_to_v16i32:
2450; GCN-NOHSA-VI:       ; %bb.0:
2451; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
2452; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
2453; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
2454; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
2455; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
2456; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
2457; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
2458; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
2459; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
2460; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
2461; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
2462; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
2463; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(1)
2464; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v11, 16, v1
2465; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
2466; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v19, 16, v7
2467; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v17, 16, v6
2468; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v18, v7, 0, 16
2469; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v16, v6, 0, 16
2470; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v9, 16, v0
2471; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v10, v1, 0, 16
2472; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v8, v0, 0, 16
2473; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v15, 16, v3
2474; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v13, 16, v2
2475; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v14, v3, 0, 16
2476; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v12, v2, 0, 16
2477; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v3, 16, v5
2478; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 16, v4
2479; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v2, v5, 0, 16
2480; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v4, 0, 16
2481; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48
2482; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2483; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16
2484; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0
2485; GCN-NOHSA-VI-NEXT:    s_endpgm
2486;
2487; EG-LABEL: global_sextload_v16i16_to_v16i32:
2488; EG:       ; %bb.0:
2489; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
2490; EG-NEXT:    TEX 1 @8
2491; EG-NEXT:    ALU 39, @13, KC0[CB0:0-32], KC1[]
2492; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T12.X, 0
2493; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T11.X, 0
2494; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T14.X, 0
2495; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T13.X, 1
2496; EG-NEXT:    CF_END
2497; EG-NEXT:    Fetch clause starting at 8:
2498; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
2499; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
2500; EG-NEXT:    ALU clause starting at 12:
2501; EG-NEXT:     MOV * T11.X, KC0[2].Z,
2502; EG-NEXT:    ALU clause starting at 13:
2503; EG-NEXT:     LSHR T13.X, KC0[2].Y, literal.x,
2504; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2505; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2506; EG-NEXT:     LSHR T14.X, PV.W, literal.x,
2507; EG-NEXT:     BFE_INT * T15.Z, T11.Y, 0.0, literal.y,
2508; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2509; EG-NEXT:     BFE_INT T15.X, T11.X, 0.0, literal.x,
2510; EG-NEXT:     LSHR T0.Y, T12.W, literal.x,
2511; EG-NEXT:     BFE_INT T16.Z, T11.W, 0.0, literal.x, BS:VEC_120/SCL_212
2512; EG-NEXT:     LSHR T0.W, T12.Y, literal.x,
2513; EG-NEXT:     LSHR * T1.W, T11.Y, literal.x,
2514; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2515; EG-NEXT:     BFE_INT T16.X, T11.Z, 0.0, literal.x,
2516; EG-NEXT:     LSHR T1.Y, T11.W, literal.x,
2517; EG-NEXT:     BFE_INT T17.Z, T12.Y, 0.0, literal.x,
2518; EG-NEXT:     BFE_INT T15.W, PS, 0.0, literal.x,
2519; EG-NEXT:     LSHR * T1.W, T11.X, literal.x,
2520; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2521; EG-NEXT:     BFE_INT T17.X, T12.X, 0.0, literal.x,
2522; EG-NEXT:     BFE_INT T15.Y, PS, 0.0, literal.x,
2523; EG-NEXT:     BFE_INT T18.Z, T12.W, 0.0, literal.x,
2524; EG-NEXT:     BFE_INT T16.W, PV.Y, 0.0, literal.x,
2525; EG-NEXT:     LSHR * T1.W, T11.Z, literal.x,
2526; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2527; EG-NEXT:     BFE_INT T18.X, T12.Z, 0.0, literal.x,
2528; EG-NEXT:     BFE_INT T16.Y, PS, 0.0, literal.x,
2529; EG-NEXT:     LSHR T0.Z, T12.X, literal.x,
2530; EG-NEXT:     BFE_INT T17.W, T0.W, 0.0, literal.x,
2531; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2532; EG-NEXT:    16(2.242078e-44), 32(4.484155e-44)
2533; EG-NEXT:     LSHR T11.X, PS, literal.x,
2534; EG-NEXT:     BFE_INT T17.Y, PV.Z, 0.0, literal.y,
2535; EG-NEXT:     LSHR T0.Z, T12.Z, literal.y,
2536; EG-NEXT:     BFE_INT T18.W, T0.Y, 0.0, literal.y,
2537; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
2538; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2539; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
2540; EG-NEXT:     LSHR T12.X, PS, literal.x,
2541; EG-NEXT:     BFE_INT * T18.Y, PV.Z, 0.0, literal.y,
2542; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2543;
2544; CM-LABEL: global_sextload_v16i16_to_v16i32:
2545; CM:       ; %bb.0:
2546; CM-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
2547; CM-NEXT:    TEX 1 @8
2548; CM-NEXT:    ALU 40, @13, KC0[CB0:0-32], KC1[]
2549; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T17, T11.X
2550; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T12, T18.X
2551; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T16, T14.X
2552; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T15, T13.X
2553; CM-NEXT:    CF_END
2554; CM-NEXT:    Fetch clause starting at 8:
2555; CM-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
2556; CM-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
2557; CM-NEXT:    ALU clause starting at 12:
2558; CM-NEXT:     MOV * T11.X, KC0[2].Z,
2559; CM-NEXT:    ALU clause starting at 13:
2560; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
2561; CM-NEXT:    48(6.726233e-44), 0(0.000000e+00)
2562; CM-NEXT:     LSHR T13.X, PV.W, literal.x,
2563; CM-NEXT:     LSHR T0.Y, T11.Y, literal.y,
2564; CM-NEXT:     LSHR T0.Z, T11.Z, literal.y,
2565; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
2566; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2567; CM-NEXT:    32(4.484155e-44), 0(0.000000e+00)
2568; CM-NEXT:     LSHR T14.X, PV.W, literal.x,
2569; CM-NEXT:     LSHR T1.Y, T11.W, literal.y,
2570; CM-NEXT:     BFE_INT T15.Z, T12.W, 0.0, literal.y, BS:VEC_120/SCL_212
2571; CM-NEXT:     LSHR * T0.W, T12.X, literal.y,
2572; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2573; CM-NEXT:     BFE_INT T15.X, T12.Z, 0.0, literal.x,
2574; CM-NEXT:     LSHR T2.Y, T12.Y, literal.x,
2575; CM-NEXT:     BFE_INT T16.Z, T12.Y, 0.0, literal.x,
2576; CM-NEXT:     LSHR * T1.W, T12.W, literal.x,
2577; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2578; CM-NEXT:     BFE_INT T16.X, T12.X, 0.0, literal.x,
2579; CM-NEXT:     LSHR T3.Y, T12.Z, literal.x,
2580; CM-NEXT:     BFE_INT T12.Z, T11.W, 0.0, literal.x,
2581; CM-NEXT:     BFE_INT * T15.W, PV.W, 0.0, literal.x,
2582; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2583; CM-NEXT:     BFE_INT T12.X, T11.Z, 0.0, literal.x,
2584; CM-NEXT:     BFE_INT T15.Y, PV.Y, 0.0, literal.x,
2585; CM-NEXT:     BFE_INT T17.Z, T11.Y, 0.0, literal.x,
2586; CM-NEXT:     BFE_INT * T16.W, T2.Y, 0.0, literal.x, BS:VEC_120/SCL_212
2587; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2588; CM-NEXT:     BFE_INT T17.X, T11.X, 0.0, literal.x,
2589; CM-NEXT:     BFE_INT T16.Y, T0.W, 0.0, literal.x,
2590; CM-NEXT:     ADD_INT T1.Z, KC0[2].Y, literal.x,
2591; CM-NEXT:     BFE_INT * T12.W, T1.Y, 0.0, literal.x,
2592; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2593; CM-NEXT:     LSHR T18.X, PV.Z, literal.x,
2594; CM-NEXT:     BFE_INT T12.Y, T0.Z, 0.0, literal.y,
2595; CM-NEXT:     LSHR T0.Z, T11.X, literal.y,
2596; CM-NEXT:     BFE_INT * T17.W, T0.Y, 0.0, literal.y,
2597; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2598; CM-NEXT:     LSHR T11.X, KC0[2].Y, literal.x,
2599; CM-NEXT:     BFE_INT * T17.Y, PV.Z, 0.0, literal.y,
2600; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2601  %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
2602  %ext = sext <16 x i16> %load to <16 x i32>
2603  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
2604  ret void
2605}
2606
2607define amdgpu_kernel void @global_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
2608; GCN-NOHSA-SI-LABEL: global_zextload_v32i16_to_v32i32:
2609; GCN-NOHSA-SI:       ; %bb.0:
2610; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
2611; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
2612; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
2613; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s2
2614; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s3
2615; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2616; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s6
2617; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s7
2618; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
2619; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
2620; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32
2621; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48
2622; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(3)
2623; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v19, 16, v3
2624; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v17, 16, v2
2625; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v23, 16, v1
2626; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v21, 16, v0
2627; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(2)
2628; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v27, 16, v7
2629; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v25, 16, v6
2630; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v18, 0xffff, v3
2631; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v16, 0xffff, v2
2632; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v22, 0xffff, v1
2633; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v20, 0xffff, v0
2634; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v3, 16, v5
2635; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
2636; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v26, 0xffff, v7
2637; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v24, 0xffff, v6
2638; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v2, 0xffff, v5
2639; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v0, 0xffff, v4
2640; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(1)
2641; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v7, 16, v11
2642; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v5, 16, v10
2643; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v31, 16, v9
2644; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v29, 16, v8
2645; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v6, 0xffff, v11
2646; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v4, 0xffff, v10
2647; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v30, 0xffff, v9
2648; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v28, 0xffff, v8
2649; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
2650; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v11, 16, v15
2651; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v9, 16, v14
2652; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v35, 16, v13
2653; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v33, 16, v12
2654; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v10, 0xffff, v15
2655; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v8, 0xffff, v14
2656; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v34, 0xffff, v13
2657; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v32, 0xffff, v12
2658; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s4
2659; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s5
2660; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:96
2661; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112
2662; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:64
2663; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80
2664; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2665; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:48
2666; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0
2667; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16
2668; GCN-NOHSA-SI-NEXT:    s_endpgm
2669;
2670; GCN-HSA-LABEL: global_zextload_v32i16_to_v32i32:
2671; GCN-HSA:       ; %bb.0:
2672; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
2673; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2674; GCN-HSA-NEXT:    s_add_u32 s4, s2, 16
2675; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
2676; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
2677; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
2678; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
2679; GCN-HSA-NEXT:    s_add_u32 s4, s2, 32
2680; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
2681; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
2682; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
2683; GCN-HSA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
2684; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s3
2685; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s2
2686; GCN-HSA-NEXT:    s_add_u32 s2, s2, 48
2687; GCN-HSA-NEXT:    s_addc_u32 s3, s3, 0
2688; GCN-HSA-NEXT:    flat_load_dwordx4 v[8:11], v[8:9]
2689; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s3
2690; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s2
2691; GCN-HSA-NEXT:    flat_load_dwordx4 v[12:15], v[12:13]
2692; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
2693; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2694; GCN-HSA-NEXT:    s_add_u32 s4, s0, 0x60
2695; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
2696; GCN-HSA-NEXT:    s_add_u32 s6, s0, 0x70
2697; GCN-HSA-NEXT:    s_addc_u32 s7, s1, 0
2698; GCN-HSA-NEXT:    s_add_u32 s8, s0, 64
2699; GCN-HSA-NEXT:    s_addc_u32 s9, s1, 0
2700; GCN-HSA-NEXT:    s_add_u32 s10, s0, 0x50
2701; GCN-HSA-NEXT:    s_addc_u32 s11, s1, 0
2702; GCN-HSA-NEXT:    s_add_u32 s12, s0, 32
2703; GCN-HSA-NEXT:    s_addc_u32 s13, s1, 0
2704; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s3
2705; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s2
2706; GCN-HSA-NEXT:    v_mov_b32_e32 v23, s7
2707; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s6
2708; GCN-HSA-NEXT:    s_waitcnt vmcnt(3)
2709; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v19, 16, v1
2710; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v17, 16, v0
2711; GCN-HSA-NEXT:    v_and_b32_e32 v18, 0xffff, v1
2712; GCN-HSA-NEXT:    v_and_b32_e32 v16, 0xffff, v0
2713; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s12
2714; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s13
2715; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[16:19]
2716; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
2717; GCN-HSA-NEXT:    s_waitcnt vmcnt(3)
2718; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v19, 16, v5
2719; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v17, 16, v4
2720; GCN-HSA-NEXT:    v_and_b32_e32 v18, 0xffff, v5
2721; GCN-HSA-NEXT:    v_and_b32_e32 v16, 0xffff, v4
2722; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s9
2723; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s10
2724; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[16:19]
2725; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
2726; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v19, 16, v7
2727; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v17, 16, v6
2728; GCN-HSA-NEXT:    v_and_b32_e32 v18, 0xffff, v7
2729; GCN-HSA-NEXT:    v_and_b32_e32 v16, 0xffff, v6
2730; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s11
2731; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
2732; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[16:19]
2733; GCN-HSA-NEXT:    s_waitcnt vmcnt(4)
2734; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v7, 16, v9
2735; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v5, 16, v8
2736; GCN-HSA-NEXT:    v_and_b32_e32 v6, 0xffff, v9
2737; GCN-HSA-NEXT:    v_and_b32_e32 v4, 0xffff, v8
2738; GCN-HSA-NEXT:    s_add_u32 s0, s0, 48
2739; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
2740; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
2741; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v19, 16, v11
2742; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v17, 16, v10
2743; GCN-HSA-NEXT:    v_and_b32_e32 v18, 0xffff, v11
2744; GCN-HSA-NEXT:    v_and_b32_e32 v16, 0xffff, v10
2745; GCN-HSA-NEXT:    s_addc_u32 s1, s1, 0
2746; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
2747; GCN-HSA-NEXT:    flat_store_dwordx4 v[20:21], v[16:19]
2748; GCN-HSA-NEXT:    s_waitcnt vmcnt(5)
2749; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v10, 16, v15
2750; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v18, 16, v13
2751; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v16, 16, v12
2752; GCN-HSA-NEXT:    v_and_b32_e32 v9, 0xffff, v15
2753; GCN-HSA-NEXT:    v_and_b32_e32 v17, 0xffff, v13
2754; GCN-HSA-NEXT:    v_and_b32_e32 v15, 0xffff, v12
2755; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s1
2756; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
2757; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
2758; GCN-HSA-NEXT:    v_and_b32_e32 v5, 0xffff, v3
2759; GCN-HSA-NEXT:    v_and_b32_e32 v3, 0xffff, v2
2760; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v8, 16, v14
2761; GCN-HSA-NEXT:    v_and_b32_e32 v7, 0xffff, v14
2762; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s0
2763; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[15:18]
2764; GCN-HSA-NEXT:    flat_store_dwordx4 v[22:23], v[7:10]
2765; GCN-HSA-NEXT:    flat_store_dwordx4 v[11:12], v[3:6]
2766; GCN-HSA-NEXT:    s_endpgm
2767;
2768; GCN-NOHSA-VI-LABEL: global_zextload_v32i16_to_v32i32:
2769; GCN-NOHSA-VI:       ; %bb.0:
2770; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
2771; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
2772; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
2773; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
2774; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
2775; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
2776; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
2777; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
2778; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
2779; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
2780; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32
2781; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48
2782; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
2783; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
2784; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(3)
2785; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v19, 16, v3
2786; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v17, 16, v2
2787; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v23, 16, v1
2788; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
2789; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v35, 16, v13
2790; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v33, 16, v12
2791; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v34, 0xffff, v13
2792; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v32, 0xffff, v12
2793; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v21, 16, v0
2794; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v27, 16, v7
2795; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v25, 16, v6
2796; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v18, 0xffff, v3
2797; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v16, 0xffff, v2
2798; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v22, 0xffff, v1
2799; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v20, 0xffff, v0
2800; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v5
2801; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
2802; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v26, 0xffff, v7
2803; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v24, 0xffff, v6
2804; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v2, 0xffff, v5
2805; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, 0xffff, v4
2806; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v7, 16, v11
2807; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v5, 16, v10
2808; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v31, 16, v9
2809; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v29, 16, v8
2810; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v6, 0xffff, v11
2811; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v4, 0xffff, v10
2812; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v30, 0xffff, v9
2813; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v28, 0xffff, v8
2814; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v11, 16, v15
2815; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v9, 16, v14
2816; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v10, 0xffff, v15
2817; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v8, 0xffff, v14
2818; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:96
2819; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112
2820; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:64
2821; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80
2822; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2823; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:48
2824; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0
2825; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16
2826; GCN-NOHSA-VI-NEXT:    s_endpgm
2827;
2828; EG-LABEL: global_zextload_v32i16_to_v32i32:
2829; EG:       ; %bb.0:
2830; EG-NEXT:    ALU 0, @20, KC0[CB0:0-32], KC1[]
2831; EG-NEXT:    TEX 3 @12
2832; EG-NEXT:    ALU 72, @21, KC0[CB0:0-32], KC1[]
2833; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T34.X, 0
2834; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T33.X, 0
2835; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T32.X, 0
2836; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T30.X, 0
2837; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T29.X, 0
2838; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T27.X, 0
2839; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T26.X, 0
2840; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T24.X, 1
2841; EG-NEXT:    CF_END
2842; EG-NEXT:    Fetch clause starting at 12:
2843; EG-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 0, #1
2844; EG-NEXT:     VTX_READ_128 T21.XYZW, T19.X, 48, #1
2845; EG-NEXT:     VTX_READ_128 T22.XYZW, T19.X, 32, #1
2846; EG-NEXT:     VTX_READ_128 T19.XYZW, T19.X, 16, #1
2847; EG-NEXT:    ALU clause starting at 20:
2848; EG-NEXT:     MOV * T19.X, KC0[2].Z,
2849; EG-NEXT:    ALU clause starting at 21:
2850; EG-NEXT:     LSHR * T23.W, T20.W, literal.x,
2851; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2852; EG-NEXT:     AND_INT * T23.Z, T20.W, literal.x,
2853; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2854; EG-NEXT:     LSHR T23.Y, T20.Z, literal.x,
2855; EG-NEXT:     LSHR * T20.W, T20.Y, literal.x,
2856; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2857; EG-NEXT:     AND_INT T23.X, T20.Z, literal.x,
2858; EG-NEXT:     AND_INT T20.Z, T20.Y, literal.x,
2859; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2860; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
2861; EG-NEXT:     LSHR T24.X, PV.W, literal.x,
2862; EG-NEXT:     LSHR T20.Y, T20.X, literal.y,
2863; EG-NEXT:     LSHR T25.W, T19.W, literal.y,
2864; EG-NEXT:     AND_INT * T20.X, T20.X, literal.z,
2865; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2866; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2867; EG-NEXT:     AND_INT * T25.Z, T19.W, literal.x,
2868; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2869; EG-NEXT:     LSHR T26.X, KC0[2].Y, literal.x,
2870; EG-NEXT:     LSHR T25.Y, T19.Z, literal.y,
2871; EG-NEXT:     LSHR T19.W, T19.Y, literal.y,
2872; EG-NEXT:     AND_INT * T25.X, T19.Z, literal.z,
2873; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2874; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2875; EG-NEXT:     AND_INT T19.Z, T19.Y, literal.x,
2876; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2877; EG-NEXT:    65535(9.183409e-41), 48(6.726233e-44)
2878; EG-NEXT:     LSHR T27.X, PV.W, literal.x,
2879; EG-NEXT:     LSHR T19.Y, T19.X, literal.y,
2880; EG-NEXT:     LSHR T28.W, T22.W, literal.y,
2881; EG-NEXT:     AND_INT * T19.X, T19.X, literal.z,
2882; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2883; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2884; EG-NEXT:     AND_INT T28.Z, T22.W, literal.x,
2885; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2886; EG-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
2887; EG-NEXT:     LSHR T29.X, PV.W, literal.x,
2888; EG-NEXT:     LSHR T28.Y, T22.Z, literal.y,
2889; EG-NEXT:     LSHR T22.W, T22.Y, literal.y,
2890; EG-NEXT:     AND_INT * T28.X, T22.Z, literal.z,
2891; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2892; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2893; EG-NEXT:     AND_INT T22.Z, T22.Y, literal.x,
2894; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2895; EG-NEXT:    65535(9.183409e-41), 80(1.121039e-43)
2896; EG-NEXT:     LSHR T30.X, PV.W, literal.x,
2897; EG-NEXT:     LSHR T22.Y, T22.X, literal.y,
2898; EG-NEXT:     LSHR T31.W, T21.W, literal.y,
2899; EG-NEXT:     AND_INT * T22.X, T22.X, literal.z,
2900; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2901; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2902; EG-NEXT:     AND_INT T31.Z, T21.W, literal.x,
2903; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2904; EG-NEXT:    65535(9.183409e-41), 64(8.968310e-44)
2905; EG-NEXT:     LSHR T32.X, PV.W, literal.x,
2906; EG-NEXT:     LSHR T31.Y, T21.Z, literal.y,
2907; EG-NEXT:     LSHR T21.W, T21.Y, literal.y,
2908; EG-NEXT:     AND_INT * T31.X, T21.Z, literal.z,
2909; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2910; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2911; EG-NEXT:     AND_INT T21.Z, T21.Y, literal.x,
2912; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2913; EG-NEXT:    65535(9.183409e-41), 112(1.569454e-43)
2914; EG-NEXT:     LSHR T33.X, PV.W, literal.x,
2915; EG-NEXT:     LSHR T21.Y, T21.X, literal.y,
2916; EG-NEXT:     AND_INT * T21.X, T21.X, literal.z,
2917; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2918; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2919; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
2920; EG-NEXT:    96(1.345247e-43), 0(0.000000e+00)
2921; EG-NEXT:     LSHR * T34.X, PV.W, literal.x,
2922; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
2923;
2924; CM-LABEL: global_zextload_v32i16_to_v32i32:
2925; CM:       ; %bb.0:
2926; CM-NEXT:    ALU 0, @20, KC0[CB0:0-32], KC1[]
2927; CM-NEXT:    TEX 3 @12
2928; CM-NEXT:    ALU 65, @21, KC0[CB0:0-32], KC1[]
2929; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T33, T34.X
2930; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T31, T21.X
2931; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T30, T32.X
2932; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T28, T22.X
2933; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T27, T29.X
2934; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T25, T19.X
2935; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T24, T26.X
2936; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T23, T20.X
2937; CM-NEXT:    CF_END
2938; CM-NEXT:    Fetch clause starting at 12:
2939; CM-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 48, #1
2940; CM-NEXT:     VTX_READ_128 T21.XYZW, T19.X, 0, #1
2941; CM-NEXT:     VTX_READ_128 T22.XYZW, T19.X, 16, #1
2942; CM-NEXT:     VTX_READ_128 T19.XYZW, T19.X, 32, #1
2943; CM-NEXT:    ALU clause starting at 20:
2944; CM-NEXT:     MOV * T19.X, KC0[2].Z,
2945; CM-NEXT:    ALU clause starting at 21:
2946; CM-NEXT:     LSHR * T23.W, T20.Y, literal.x,
2947; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2948; CM-NEXT:     AND_INT * T23.Z, T20.Y, literal.x,
2949; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2950; CM-NEXT:     LSHR T23.Y, T20.X, literal.x,
2951; CM-NEXT:     LSHR * T24.W, T20.W, literal.x,
2952; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2953; CM-NEXT:     AND_INT T23.X, T20.X, literal.x,
2954; CM-NEXT:     AND_INT T24.Z, T20.W, literal.x,
2955; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2956; CM-NEXT:    65535(9.183409e-41), 96(1.345247e-43)
2957; CM-NEXT:     LSHR T20.X, PV.W, literal.x,
2958; CM-NEXT:     LSHR T24.Y, T20.Z, literal.y,
2959; CM-NEXT:     LSHR * T25.W, T19.Y, literal.y,
2960; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2961; CM-NEXT:     AND_INT T24.X, T20.Z, literal.x,
2962; CM-NEXT:     AND_INT T25.Z, T19.Y, literal.x,
2963; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2964; CM-NEXT:    65535(9.183409e-41), 112(1.569454e-43)
2965; CM-NEXT:     LSHR T26.X, PV.W, literal.x,
2966; CM-NEXT:     LSHR T25.Y, T19.X, literal.y,
2967; CM-NEXT:     LSHR * T27.W, T19.W, literal.y,
2968; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2969; CM-NEXT:     AND_INT T25.X, T19.X, literal.x,
2970; CM-NEXT:     AND_INT T27.Z, T19.W, literal.x,
2971; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2972; CM-NEXT:    65535(9.183409e-41), 64(8.968310e-44)
2973; CM-NEXT:     LSHR T19.X, PV.W, literal.x,
2974; CM-NEXT:     LSHR T27.Y, T19.Z, literal.y,
2975; CM-NEXT:     LSHR * T28.W, T22.Y, literal.y,
2976; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2977; CM-NEXT:     AND_INT T27.X, T19.Z, literal.x,
2978; CM-NEXT:     AND_INT T28.Z, T22.Y, literal.x,
2979; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2980; CM-NEXT:    65535(9.183409e-41), 80(1.121039e-43)
2981; CM-NEXT:     LSHR T29.X, PV.W, literal.x,
2982; CM-NEXT:     LSHR T28.Y, T22.X, literal.y,
2983; CM-NEXT:     LSHR * T30.W, T22.W, literal.y,
2984; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2985; CM-NEXT:     AND_INT T28.X, T22.X, literal.x,
2986; CM-NEXT:     AND_INT T30.Z, T22.W, literal.x,
2987; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2988; CM-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
2989; CM-NEXT:     LSHR T22.X, PV.W, literal.x,
2990; CM-NEXT:     LSHR T30.Y, T22.Z, literal.y,
2991; CM-NEXT:     LSHR * T31.W, T21.Y, literal.y,
2992; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2993; CM-NEXT:     AND_INT T30.X, T22.Z, literal.x,
2994; CM-NEXT:     AND_INT T31.Z, T21.Y, literal.x,
2995; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2996; CM-NEXT:    65535(9.183409e-41), 48(6.726233e-44)
2997; CM-NEXT:     LSHR T32.X, PV.W, literal.x,
2998; CM-NEXT:     LSHR T31.Y, T21.X, literal.y,
2999; CM-NEXT:     LSHR * T33.W, T21.W, literal.y,
3000; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3001; CM-NEXT:     AND_INT T31.X, T21.X, literal.x,
3002; CM-NEXT:     AND_INT * T33.Z, T21.W, literal.x,
3003; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3004; CM-NEXT:     LSHR T21.X, KC0[2].Y, literal.x,
3005; CM-NEXT:     LSHR * T33.Y, T21.Z, literal.y,
3006; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3007; CM-NEXT:     AND_INT T33.X, T21.Z, literal.x,
3008; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3009; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
3010; CM-NEXT:     LSHR * T34.X, PV.W, literal.x,
3011; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
3012  %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
3013  %ext = zext <32 x i16> %load to <32 x i32>
3014  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
3015  ret void
3016}
3017
3018define amdgpu_kernel void @global_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
3019; GCN-NOHSA-SI-LABEL: global_sextload_v32i16_to_v32i32:
3020; GCN-NOHSA-SI:       ; %bb.0:
3021; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
3022; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
3023; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
3024; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s2
3025; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s3
3026; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
3027; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s6
3028; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s7
3029; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
3030; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
3031; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32
3032; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48
3033; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(3)
3034; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v19, 16, v3
3035; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v17, 16, v2
3036; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v18, v3, 0, 16
3037; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v16, v2, 0, 16
3038; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v23, 16, v1
3039; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v21, 16, v0
3040; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v22, v1, 0, 16
3041; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v20, v0, 0, 16
3042; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(2)
3043; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v3, 16, v7
3044; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 16, v6
3045; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v2, v7, 0, 16
3046; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v6, 0, 16
3047; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v27, 16, v5
3048; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v25, 16, v4
3049; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v26, v5, 0, 16
3050; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v24, v4, 0, 16
3051; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(1)
3052; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v7, 16, v11
3053; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v5, 16, v10
3054; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v6, v11, 0, 16
3055; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v4, v10, 0, 16
3056; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v31, 16, v9
3057; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v29, 16, v8
3058; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v30, v9, 0, 16
3059; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v28, v8, 0, 16
3060; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
3061; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v11, 16, v15
3062; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v9, 16, v14
3063; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v10, v15, 0, 16
3064; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v8, v14, 0, 16
3065; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v35, 16, v13
3066; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v33, 16, v12
3067; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v34, v13, 0, 16
3068; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v32, v12, 0, 16
3069; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s4
3070; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s5
3071; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:96
3072; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112
3073; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:64
3074; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80
3075; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:32
3076; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
3077; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0
3078; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16
3079; GCN-NOHSA-SI-NEXT:    s_endpgm
3080;
3081; GCN-HSA-LABEL: global_sextload_v32i16_to_v32i32:
3082; GCN-HSA:       ; %bb.0:
3083; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
3084; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
3085; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
3086; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
3087; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
3088; GCN-HSA-NEXT:    s_add_u32 s4, s2, 48
3089; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
3090; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
3091; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
3092; GCN-HSA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
3093; GCN-HSA-NEXT:    s_add_u32 s4, s2, 32
3094; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
3095; GCN-HSA-NEXT:    s_add_u32 s2, s2, 16
3096; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s5
3097; GCN-HSA-NEXT:    s_addc_u32 s3, s3, 0
3098; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s4
3099; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s3
3100; GCN-HSA-NEXT:    flat_load_dwordx4 v[8:11], v[8:9]
3101; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s2
3102; GCN-HSA-NEXT:    flat_load_dwordx4 v[12:15], v[12:13]
3103; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
3104; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3105; GCN-HSA-NEXT:    s_waitcnt vmcnt(3)
3106; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v19, 16, v1
3107; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v17, 16, v0
3108; GCN-HSA-NEXT:    v_bfe_i32 v18, v1, 0, 16
3109; GCN-HSA-NEXT:    v_bfe_i32 v16, v0, 0, 16
3110; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
3111; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
3112; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[16:19]
3113; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
3114; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
3115; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x60
3116; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3117; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s3
3118; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s2
3119; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x70
3120; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v19, 16, v3
3121; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v17, 16, v2
3122; GCN-HSA-NEXT:    v_bfe_i32 v18, v3, 0, 16
3123; GCN-HSA-NEXT:    v_bfe_i32 v16, v2, 0, 16
3124; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3125; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[16:19]
3126; GCN-HSA-NEXT:    s_waitcnt vmcnt(4)
3127; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 16, v5
3128; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s3
3129; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s2
3130; GCN-HSA-NEXT:    s_add_u32 s2, s0, 64
3131; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3132; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s3
3133; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s2
3134; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x50
3135; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 16, v4
3136; GCN-HSA-NEXT:    v_bfe_i32 v2, v5, 0, 16
3137; GCN-HSA-NEXT:    v_bfe_i32 v0, v4, 0, 16
3138; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3139; GCN-HSA-NEXT:    flat_store_dwordx4 v[20:21], v[0:3]
3140; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s3
3141; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s2
3142; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
3143; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3144; GCN-HSA-NEXT:    s_add_u32 s0, s0, 48
3145; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 16, v7
3146; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 16, v6
3147; GCN-HSA-NEXT:    v_bfe_i32 v2, v7, 0, 16
3148; GCN-HSA-NEXT:    v_bfe_i32 v0, v6, 0, 16
3149; GCN-HSA-NEXT:    s_waitcnt vmcnt(4)
3150; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v7, 16, v9
3151; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v5, 16, v8
3152; GCN-HSA-NEXT:    v_bfe_i32 v6, v9, 0, 16
3153; GCN-HSA-NEXT:    v_bfe_i32 v4, v8, 0, 16
3154; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s3
3155; GCN-HSA-NEXT:    s_addc_u32 s1, s1, 0
3156; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[0:3]
3157; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s2
3158; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 16, v11
3159; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 16, v10
3160; GCN-HSA-NEXT:    v_bfe_i32 v2, v11, 0, 16
3161; GCN-HSA-NEXT:    v_bfe_i32 v0, v10, 0, 16
3162; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s1
3163; GCN-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[4:7]
3164; GCN-HSA-NEXT:    flat_store_dwordx4 v[20:21], v[0:3]
3165; GCN-HSA-NEXT:    s_waitcnt vmcnt(6)
3166; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v7, 16, v13
3167; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v5, 16, v12
3168; GCN-HSA-NEXT:    v_bfe_i32 v6, v13, 0, 16
3169; GCN-HSA-NEXT:    v_bfe_i32 v4, v12, 0, 16
3170; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s0
3171; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 16, v15
3172; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 16, v14
3173; GCN-HSA-NEXT:    v_bfe_i32 v2, v15, 0, 16
3174; GCN-HSA-NEXT:    v_bfe_i32 v0, v14, 0, 16
3175; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[4:7]
3176; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[0:3]
3177; GCN-HSA-NEXT:    s_endpgm
3178;
3179; GCN-NOHSA-VI-LABEL: global_sextload_v32i16_to_v32i32:
3180; GCN-NOHSA-VI:       ; %bb.0:
3181; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
3182; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
3183; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
3184; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
3185; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
3186; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
3187; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
3188; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
3189; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
3190; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
3191; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32
3192; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48
3193; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
3194; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
3195; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(3)
3196; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v19, 16, v3
3197; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v17, 16, v2
3198; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v18, v3, 0, 16
3199; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
3200; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v35, 16, v13
3201; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v33, 16, v12
3202; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v34, v13, 0, 16
3203; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v32, v12, 0, 16
3204; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v16, v2, 0, 16
3205; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v23, 16, v1
3206; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v21, 16, v0
3207; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v22, v1, 0, 16
3208; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v20, v0, 0, 16
3209; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v3, 16, v7
3210; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 16, v6
3211; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v2, v7, 0, 16
3212; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v6, 0, 16
3213; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v27, 16, v5
3214; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v25, 16, v4
3215; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v26, v5, 0, 16
3216; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v24, v4, 0, 16
3217; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v7, 16, v11
3218; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v5, 16, v10
3219; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v6, v11, 0, 16
3220; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v4, v10, 0, 16
3221; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v31, 16, v9
3222; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v29, 16, v8
3223; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v30, v9, 0, 16
3224; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v28, v8, 0, 16
3225; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v11, 16, v15
3226; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v9, 16, v14
3227; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v10, v15, 0, 16
3228; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v8, v14, 0, 16
3229; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:96
3230; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112
3231; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:64
3232; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80
3233; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:32
3234; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
3235; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0
3236; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16
3237; GCN-NOHSA-VI-NEXT:    s_endpgm
3238;
3239; EG-LABEL: global_sextload_v32i16_to_v32i32:
3240; EG:       ; %bb.0:
3241; EG-NEXT:    ALU 9, @20, KC0[CB0:0-32], KC1[]
3242; EG-NEXT:    TEX 3 @12
3243; EG-NEXT:    ALU 73, @30, KC0[CB0:0-32], KC1[]
3244; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T22.X, 0
3245; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T34.X, 0
3246; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T28.X, 0
3247; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T27.X, 0
3248; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T26.X, 0
3249; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T21.X, 0
3250; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T20.X, 0
3251; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T19.X, 1
3252; EG-NEXT:    CF_END
3253; EG-NEXT:    Fetch clause starting at 12:
3254; EG-NEXT:     VTX_READ_128 T23.XYZW, T22.X, 16, #1
3255; EG-NEXT:     VTX_READ_128 T24.XYZW, T22.X, 32, #1
3256; EG-NEXT:     VTX_READ_128 T25.XYZW, T22.X, 0, #1
3257; EG-NEXT:     VTX_READ_128 T22.XYZW, T22.X, 48, #1
3258; EG-NEXT:    ALU clause starting at 20:
3259; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
3260; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3261; EG-NEXT:     LSHR T19.X, PV.W, literal.x,
3262; EG-NEXT:     LSHR * T20.X, KC0[2].Y, literal.x,
3263; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
3264; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
3265; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
3266; EG-NEXT:     LSHR T21.X, PV.W, literal.x,
3267; EG-NEXT:     MOV * T22.X, KC0[2].Z,
3268; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
3269; EG-NEXT:    ALU clause starting at 30:
3270; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
3271; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
3272; EG-NEXT:     LSHR T26.X, PV.W, literal.x,
3273; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3274; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
3275; EG-NEXT:     LSHR T27.X, PV.W, literal.x,
3276; EG-NEXT:     LSHR T0.W, T22.Y, literal.y,
3277; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
3278; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3279; EG-NEXT:    64(8.968310e-44), 0(0.000000e+00)
3280; EG-NEXT:     LSHR T28.X, PS, literal.x,
3281; EG-NEXT:     LSHR T0.Y, T22.W, literal.y,
3282; EG-NEXT:     BFE_INT T29.Z, T25.W, 0.0, literal.y, BS:VEC_120/SCL_212
3283; EG-NEXT:     LSHR T1.W, T24.Y, literal.y,
3284; EG-NEXT:     LSHR * T2.W, T24.W, literal.y,
3285; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3286; EG-NEXT:     BFE_INT T29.X, T25.Z, 0.0, literal.x,
3287; EG-NEXT:     LSHR T1.Y, T23.Y, literal.x,
3288; EG-NEXT:     BFE_INT T30.Z, T25.Y, 0.0, literal.x, BS:VEC_120/SCL_212
3289; EG-NEXT:     LSHR T3.W, T23.W, literal.x,
3290; EG-NEXT:     LSHR * T4.W, T25.W, literal.x,
3291; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3292; EG-NEXT:     BFE_INT T30.X, T25.X, 0.0, literal.x,
3293; EG-NEXT:     LSHR T2.Y, T25.Y, literal.x,
3294; EG-NEXT:     BFE_INT T31.Z, T23.W, 0.0, literal.x,
3295; EG-NEXT:     BFE_INT T29.W, PS, 0.0, literal.x,
3296; EG-NEXT:     LSHR * T4.W, T25.Z, literal.x,
3297; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3298; EG-NEXT:     BFE_INT T31.X, T23.Z, 0.0, literal.x,
3299; EG-NEXT:     BFE_INT T29.Y, PS, 0.0, literal.x,
3300; EG-NEXT:     BFE_INT T25.Z, T23.Y, 0.0, literal.x,
3301; EG-NEXT:     BFE_INT T30.W, PV.Y, 0.0, literal.x,
3302; EG-NEXT:     LSHR * T4.W, T25.X, literal.x,
3303; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3304; EG-NEXT:     BFE_INT T25.X, T23.X, 0.0, literal.x,
3305; EG-NEXT:     BFE_INT T30.Y, PS, 0.0, literal.x,
3306; EG-NEXT:     BFE_INT T32.Z, T24.W, 0.0, literal.x,
3307; EG-NEXT:     BFE_INT T31.W, T3.W, 0.0, literal.x, BS:VEC_120/SCL_212
3308; EG-NEXT:     LSHR * T3.W, T23.Z, literal.x,
3309; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3310; EG-NEXT:     BFE_INT T32.X, T24.Z, 0.0, literal.x,
3311; EG-NEXT:     BFE_INT T31.Y, PS, 0.0, literal.x,
3312; EG-NEXT:     BFE_INT T23.Z, T24.Y, 0.0, literal.x,
3313; EG-NEXT:     BFE_INT T25.W, T1.Y, 0.0, literal.x, BS:VEC_120/SCL_212
3314; EG-NEXT:     LSHR * T3.W, T23.X, literal.x,
3315; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3316; EG-NEXT:     BFE_INT T23.X, T24.X, 0.0, literal.x,
3317; EG-NEXT:     BFE_INT T25.Y, PS, 0.0, literal.x,
3318; EG-NEXT:     BFE_INT T33.Z, T22.W, 0.0, literal.x,
3319; EG-NEXT:     BFE_INT T32.W, T2.W, 0.0, literal.x, BS:VEC_120/SCL_212
3320; EG-NEXT:     LSHR * T2.W, T24.Z, literal.x,
3321; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3322; EG-NEXT:     BFE_INT T33.X, T22.Z, 0.0, literal.x,
3323; EG-NEXT:     BFE_INT T32.Y, PS, 0.0, literal.x,
3324; EG-NEXT:     BFE_INT T24.Z, T22.Y, 0.0, literal.x,
3325; EG-NEXT:     BFE_INT T23.W, T1.W, 0.0, literal.x,
3326; EG-NEXT:     LSHR * T1.W, T24.X, literal.x,
3327; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3328; EG-NEXT:     BFE_INT T24.X, T22.X, 0.0, literal.x,
3329; EG-NEXT:     BFE_INT T23.Y, PS, 0.0, literal.x,
3330; EG-NEXT:     LSHR T0.Z, T22.Z, literal.x,
3331; EG-NEXT:     BFE_INT T33.W, T0.Y, 0.0, literal.x,
3332; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
3333; EG-NEXT:    16(2.242078e-44), 112(1.569454e-43)
3334; EG-NEXT:     LSHR T34.X, PS, literal.x,
3335; EG-NEXT:     BFE_INT T33.Y, PV.Z, 0.0, literal.y,
3336; EG-NEXT:     LSHR T0.Z, T22.X, literal.y,
3337; EG-NEXT:     BFE_INT T24.W, T0.W, 0.0, literal.y,
3338; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
3339; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3340; EG-NEXT:    96(1.345247e-43), 0(0.000000e+00)
3341; EG-NEXT:     LSHR T22.X, PS, literal.x,
3342; EG-NEXT:     BFE_INT * T24.Y, PV.Z, 0.0, literal.y,
3343; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3344;
3345; CM-LABEL: global_sextload_v32i16_to_v32i32:
3346; CM:       ; %bb.0:
3347; CM-NEXT:    ALU 0, @22, KC0[CB0:0-32], KC1[]
3348; CM-NEXT:    TEX 0 @14
3349; CM-NEXT:    ALU 7, @23, KC0[CB0:0-32], KC1[]
3350; CM-NEXT:    TEX 2 @16
3351; CM-NEXT:    ALU 76, @31, KC0[CB0:0-32], KC1[]
3352; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T33, T34.X
3353; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T19, T20.X
3354; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T32, T28.X
3355; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T23, T27.X
3356; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T31, T26.X
3357; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T22, T25.X
3358; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T30, T24.X
3359; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T29, T21.X
3360; CM-NEXT:    CF_END
3361; CM-NEXT:    Fetch clause starting at 14:
3362; CM-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 0, #1
3363; CM-NEXT:    Fetch clause starting at 16:
3364; CM-NEXT:     VTX_READ_128 T22.XYZW, T19.X, 48, #1
3365; CM-NEXT:     VTX_READ_128 T23.XYZW, T19.X, 32, #1
3366; CM-NEXT:     VTX_READ_128 T19.XYZW, T19.X, 16, #1
3367; CM-NEXT:    ALU clause starting at 22:
3368; CM-NEXT:     MOV * T19.X, KC0[2].Z,
3369; CM-NEXT:    ALU clause starting at 23:
3370; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
3371; CM-NEXT:    96(1.345247e-43), 0(0.000000e+00)
3372; CM-NEXT:     LSHR T21.X, PV.W, literal.x,
3373; CM-NEXT:     LSHR T0.Y, T20.Z, literal.y,
3374; CM-NEXT:     LSHR T0.Z, T20.W, literal.y,
3375; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
3376; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3377; CM-NEXT:    112(1.569454e-43), 0(0.000000e+00)
3378; CM-NEXT:    ALU clause starting at 31:
3379; CM-NEXT:     LSHR T24.X, T0.W, literal.x,
3380; CM-NEXT:     LSHR T1.Y, T20.Y, literal.y,
3381; CM-NEXT:     LSHR T1.Z, T19.Z, literal.y,
3382; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
3383; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3384; CM-NEXT:    64(8.968310e-44), 0(0.000000e+00)
3385; CM-NEXT:     LSHR T25.X, PV.W, literal.x,
3386; CM-NEXT:     LSHR T2.Y, T19.W, literal.y,
3387; CM-NEXT:     LSHR T2.Z, T19.X, literal.y,
3388; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
3389; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3390; CM-NEXT:    80(1.121039e-43), 0(0.000000e+00)
3391; CM-NEXT:     LSHR T26.X, PV.W, literal.x,
3392; CM-NEXT:     LSHR T3.Y, T19.Y, literal.y,
3393; CM-NEXT:     LSHR T3.Z, T23.Z, literal.y,
3394; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
3395; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3396; CM-NEXT:    32(4.484155e-44), 0(0.000000e+00)
3397; CM-NEXT:     LSHR T27.X, PV.W, literal.x,
3398; CM-NEXT:     LSHR T4.Y, T23.W, literal.y,
3399; CM-NEXT:     LSHR T4.Z, T23.X, literal.y,
3400; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
3401; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3402; CM-NEXT:    48(6.726233e-44), 0(0.000000e+00)
3403; CM-NEXT:     LSHR T28.X, PV.W, literal.x,
3404; CM-NEXT:     LSHR T5.Y, T23.Y, literal.y,
3405; CM-NEXT:     BFE_INT T29.Z, T22.Y, 0.0, literal.y, BS:VEC_120/SCL_212
3406; CM-NEXT:     LSHR * T0.W, T22.Z, literal.y,
3407; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3408; CM-NEXT:     BFE_INT T29.X, T22.X, 0.0, literal.x,
3409; CM-NEXT:     LSHR T6.Y, T22.W, literal.x,
3410; CM-NEXT:     BFE_INT T30.Z, T22.W, 0.0, literal.x,
3411; CM-NEXT:     LSHR * T1.W, T22.Y, literal.x,
3412; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3413; CM-NEXT:     BFE_INT T30.X, T22.Z, 0.0, literal.x,
3414; CM-NEXT:     LSHR T7.Y, T22.X, literal.x,
3415; CM-NEXT:     BFE_INT T22.Z, T23.Y, 0.0, literal.x,
3416; CM-NEXT:     BFE_INT * T29.W, PV.W, 0.0, literal.x,
3417; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3418; CM-NEXT:     BFE_INT T22.X, T23.X, 0.0, literal.x,
3419; CM-NEXT:     BFE_INT T29.Y, PV.Y, 0.0, literal.x,
3420; CM-NEXT:     BFE_INT T31.Z, T23.W, 0.0, literal.x,
3421; CM-NEXT:     BFE_INT * T30.W, T6.Y, 0.0, literal.x,
3422; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3423; CM-NEXT:     BFE_INT T31.X, T23.Z, 0.0, literal.x,
3424; CM-NEXT:     BFE_INT T30.Y, T0.W, 0.0, literal.x,
3425; CM-NEXT:     BFE_INT T23.Z, T19.Y, 0.0, literal.x,
3426; CM-NEXT:     BFE_INT * T22.W, T5.Y, 0.0, literal.x, BS:VEC_120/SCL_212
3427; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3428; CM-NEXT:     BFE_INT T23.X, T19.X, 0.0, literal.x,
3429; CM-NEXT:     BFE_INT T22.Y, T4.Z, 0.0, literal.x,
3430; CM-NEXT:     BFE_INT T32.Z, T19.W, 0.0, literal.x,
3431; CM-NEXT:     BFE_INT * T31.W, T4.Y, 0.0, literal.x,
3432; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3433; CM-NEXT:     BFE_INT T32.X, T19.Z, 0.0, literal.x,
3434; CM-NEXT:     BFE_INT T31.Y, T3.Z, 0.0, literal.x, BS:VEC_120/SCL_212
3435; CM-NEXT:     BFE_INT T19.Z, T20.Y, 0.0, literal.x,
3436; CM-NEXT:     BFE_INT * T23.W, T3.Y, 0.0, literal.x, BS:VEC_120/SCL_212
3437; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3438; CM-NEXT:     BFE_INT T19.X, T20.X, 0.0, literal.x,
3439; CM-NEXT:     BFE_INT T23.Y, T2.Z, 0.0, literal.x,
3440; CM-NEXT:     BFE_INT T33.Z, T20.W, 0.0, literal.x,
3441; CM-NEXT:     BFE_INT * T32.W, T2.Y, 0.0, literal.x,
3442; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3443; CM-NEXT:     BFE_INT T33.X, T20.Z, 0.0, literal.x,
3444; CM-NEXT:     BFE_INT T32.Y, T1.Z, 0.0, literal.x, BS:VEC_120/SCL_212
3445; CM-NEXT:     LSHR T1.Z, T20.X, literal.x,
3446; CM-NEXT:     BFE_INT * T19.W, T1.Y, 0.0, literal.x,
3447; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3448; CM-NEXT:     LSHR T20.X, KC0[2].Y, literal.x,
3449; CM-NEXT:     BFE_INT T19.Y, PV.Z, 0.0, literal.y,
3450; CM-NEXT:     ADD_INT T1.Z, KC0[2].Y, literal.y,
3451; CM-NEXT:     BFE_INT * T33.W, T0.Z, 0.0, literal.y,
3452; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3453; CM-NEXT:     LSHR T34.X, PV.Z, literal.x,
3454; CM-NEXT:     BFE_INT * T33.Y, T0.Y, 0.0, literal.y,
3455; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3456  %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
3457  %ext = sext <32 x i16> %load to <32 x i32>
3458  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
3459  ret void
3460}
3461
3462define amdgpu_kernel void @global_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
3463; GCN-NOHSA-SI-LABEL: global_zextload_v64i16_to_v64i32:
3464; GCN-NOHSA-SI:       ; %bb.0:
3465; GCN-NOHSA-SI-NEXT:    s_mov_b32 s12, SCRATCH_RSRC_DWORD0
3466; GCN-NOHSA-SI-NEXT:    s_mov_b32 s13, SCRATCH_RSRC_DWORD1
3467; GCN-NOHSA-SI-NEXT:    s_mov_b32 s14, -1
3468; GCN-NOHSA-SI-NEXT:    s_mov_b32 s15, 0xe8f000
3469; GCN-NOHSA-SI-NEXT:    s_add_u32 s12, s12, s3
3470; GCN-NOHSA-SI-NEXT:    s_addc_u32 s13, s13, 0
3471; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
3472; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
3473; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
3474; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s2
3475; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s3
3476; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
3477; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s6
3478; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s7
3479; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[8:11], 0
3480; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:16
3481; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(1)
3482; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v3, 16, v15
3483; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v14
3484; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v6, 16, v13
3485; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v4, 16, v12
3486; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
3487; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v9, 16, v19
3488; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v7, 16, v18
3489; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v2, 0xffff, v15
3490; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v0, 0xffff, v14
3491; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
3492; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
3493; GCN-NOHSA-SI-NEXT:    buffer_store_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill
3494; GCN-NOHSA-SI-NEXT:    buffer_store_dword v2, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill
3495; GCN-NOHSA-SI-NEXT:    buffer_store_dword v3, off, s[12:15], 0 offset:16 ; 4-byte Folded Spill
3496; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v5, 0xffff, v13
3497; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3498; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v3, 0xffff, v12
3499; GCN-NOHSA-SI-NEXT:    buffer_store_dword v3, off, s[12:15], 0 offset:20 ; 4-byte Folded Spill
3500; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
3501; GCN-NOHSA-SI-NEXT:    buffer_store_dword v4, off, s[12:15], 0 offset:24 ; 4-byte Folded Spill
3502; GCN-NOHSA-SI-NEXT:    buffer_store_dword v5, off, s[12:15], 0 offset:28 ; 4-byte Folded Spill
3503; GCN-NOHSA-SI-NEXT:    buffer_store_dword v6, off, s[12:15], 0 offset:32 ; 4-byte Folded Spill
3504; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v15, 16, v17
3505; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v13, 16, v16
3506; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v8, 0xffff, v19
3507; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3508; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v6, 0xffff, v18
3509; GCN-NOHSA-SI-NEXT:    buffer_store_dword v6, off, s[12:15], 0 offset:36 ; 4-byte Folded Spill
3510; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
3511; GCN-NOHSA-SI-NEXT:    buffer_store_dword v7, off, s[12:15], 0 offset:40 ; 4-byte Folded Spill
3512; GCN-NOHSA-SI-NEXT:    buffer_store_dword v8, off, s[12:15], 0 offset:44 ; 4-byte Folded Spill
3513; GCN-NOHSA-SI-NEXT:    buffer_store_dword v9, off, s[12:15], 0 offset:48 ; 4-byte Folded Spill
3514; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:32
3515; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v14, 0xffff, v17
3516; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v12, 0xffff, v16
3517; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:48
3518; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(1)
3519; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v19, 16, v27
3520; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v17, 16, v26
3521; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v23, 16, v25
3522; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v21, 16, v24
3523; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v18, 0xffff, v27
3524; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v16, 0xffff, v26
3525; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v22, 0xffff, v25
3526; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v20, 0xffff, v24
3527; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
3528; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v27, 16, v31
3529; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v25, 16, v30
3530; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v34, 16, v29
3531; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v32, 16, v28
3532; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v26, 0xffff, v31
3533; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v24, 0xffff, v30
3534; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[35:38], off, s[8:11], 0 offset:64
3535; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v33, 0xffff, v29
3536; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v31, 0xffff, v28
3537; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[39:42], off, s[8:11], 0 offset:80
3538; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(1)
3539; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v46, 16, v38
3540; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v44, 16, v37
3541; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v50, 16, v36
3542; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v48, 16, v35
3543; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v45, 0xffff, v38
3544; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v43, 0xffff, v37
3545; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v49, 0xffff, v36
3546; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v47, 0xffff, v35
3547; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
3548; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v38, 16, v42
3549; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v36, 16, v41
3550; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v54, 16, v40
3551; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v52, 16, v39
3552; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v37, 0xffff, v42
3553; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v35, 0xffff, v41
3554; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[55:58], off, s[8:11], 0 offset:96
3555; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v53, 0xffff, v40
3556; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v51, 0xffff, v39
3557; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[39:42], off, s[8:11], 0 offset:112
3558; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(1)
3559; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v61, 16, v58
3560; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v59, 16, v57
3561; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v11, 16, v56
3562; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3563; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v9, 16, v55
3564; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v60, 0xffff, v58
3565; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v58, 0xffff, v57
3566; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v10, 0xffff, v56
3567; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v8, 0xffff, v55
3568; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
3569; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v7, 16, v42
3570; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v5, 16, v41
3571; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v3, 16, v40
3572; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v39
3573; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v6, 0xffff, v42
3574; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v4, 0xffff, v41
3575; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v2, 0xffff, v40
3576; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v0, 0xffff, v39
3577; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s4
3578; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s5
3579; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
3580; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:240
3581; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:192
3582; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[58:61], off, s[0:3], 0 offset:208
3583; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[51:54], off, s[0:3], 0 offset:160
3584; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[35:38], off, s[0:3], 0 offset:176
3585; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[47:50], off, s[0:3], 0 offset:128
3586; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[43:46], off, s[0:3], 0 offset:144
3587; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[31:34], off, s[0:3], 0 offset:96
3588; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:112
3589; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:64
3590; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:80
3591; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32
3592; GCN-NOHSA-SI-NEXT:    buffer_load_dword v0, off, s[12:15], 0 offset:36 ; 4-byte Folded Reload
3593; GCN-NOHSA-SI-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:40 ; 4-byte Folded Reload
3594; GCN-NOHSA-SI-NEXT:    buffer_load_dword v2, off, s[12:15], 0 offset:44 ; 4-byte Folded Reload
3595; GCN-NOHSA-SI-NEXT:    buffer_load_dword v3, off, s[12:15], 0 offset:48 ; 4-byte Folded Reload
3596; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
3597; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
3598; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3599; GCN-NOHSA-SI-NEXT:    buffer_load_dword v0, off, s[12:15], 0 offset:20 ; 4-byte Folded Reload
3600; GCN-NOHSA-SI-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:24 ; 4-byte Folded Reload
3601; GCN-NOHSA-SI-NEXT:    buffer_load_dword v2, off, s[12:15], 0 offset:28 ; 4-byte Folded Reload
3602; GCN-NOHSA-SI-NEXT:    buffer_load_dword v3, off, s[12:15], 0 offset:32 ; 4-byte Folded Reload
3603; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
3604; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
3605; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3606; GCN-NOHSA-SI-NEXT:    buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
3607; GCN-NOHSA-SI-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
3608; GCN-NOHSA-SI-NEXT:    buffer_load_dword v2, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
3609; GCN-NOHSA-SI-NEXT:    buffer_load_dword v3, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload
3610; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
3611; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
3612; GCN-NOHSA-SI-NEXT:    s_endpgm
3613;
3614; GCN-HSA-LABEL: global_zextload_v64i16_to_v64i32:
3615; GCN-HSA:       ; %bb.0:
3616; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
3617; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
3618; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
3619; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
3620; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
3621; GCN-HSA-NEXT:    s_add_u32 s4, s2, 0x50
3622; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
3623; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
3624; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
3625; GCN-HSA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
3626; GCN-HSA-NEXT:    s_add_u32 s4, s2, 0x60
3627; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
3628; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s5
3629; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s4
3630; GCN-HSA-NEXT:    s_add_u32 s4, s2, 0x70
3631; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
3632; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s5
3633; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s4
3634; GCN-HSA-NEXT:    s_add_u32 s4, s2, 16
3635; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
3636; GCN-HSA-NEXT:    s_add_u32 s6, s2, 32
3637; GCN-HSA-NEXT:    s_addc_u32 s7, s3, 0
3638; GCN-HSA-NEXT:    s_add_u32 s8, s2, 48
3639; GCN-HSA-NEXT:    flat_load_dwordx4 v[8:11], v[8:9]
3640; GCN-HSA-NEXT:    s_addc_u32 s9, s3, 0
3641; GCN-HSA-NEXT:    s_add_u32 s2, s2, 64
3642; GCN-HSA-NEXT:    s_addc_u32 s3, s3, 0
3643; GCN-HSA-NEXT:    flat_load_dwordx4 v[12:15], v[12:13]
3644; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s3
3645; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s2
3646; GCN-HSA-NEXT:    flat_load_dwordx4 v[16:19], v[16:17]
3647; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s9
3648; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s8
3649; GCN-HSA-NEXT:    flat_load_dwordx4 v[20:23], v[20:21]
3650; GCN-HSA-NEXT:    v_mov_b32_e32 v29, s5
3651; GCN-HSA-NEXT:    v_mov_b32_e32 v28, s4
3652; GCN-HSA-NEXT:    flat_load_dwordx4 v[28:31], v[28:29]
3653; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
3654; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3655; GCN-HSA-NEXT:    v_mov_b32_e32 v37, s1
3656; GCN-HSA-NEXT:    v_mov_b32_e32 v36, s0
3657; GCN-HSA-NEXT:    s_waitcnt vmcnt(6)
3658; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v27, 16, v1
3659; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v25, 16, v0
3660; GCN-HSA-NEXT:    v_and_b32_e32 v26, 0xffff, v1
3661; GCN-HSA-NEXT:    v_and_b32_e32 v24, 0xffff, v0
3662; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
3663; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s7
3664; GCN-HSA-NEXT:    flat_load_dwordx4 v[32:35], v[0:1]
3665; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
3666; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
3667; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xe0
3668; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3669; GCN-HSA-NEXT:    s_add_u32 s4, s0, 0xf0
3670; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
3671; GCN-HSA-NEXT:    s_add_u32 s6, s0, 0xc0
3672; GCN-HSA-NEXT:    s_addc_u32 s7, s1, 0
3673; GCN-HSA-NEXT:    s_add_u32 s8, s0, 0xd0
3674; GCN-HSA-NEXT:    s_addc_u32 s9, s1, 0
3675; GCN-HSA-NEXT:    flat_store_dwordx4 v[36:37], v[24:27]
3676; GCN-HSA-NEXT:    s_add_u32 s10, s0, 0xa0
3677; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v27, 16, v3
3678; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v25, 16, v2
3679; GCN-HSA-NEXT:    v_and_b32_e32 v26, 0xffff, v3
3680; GCN-HSA-NEXT:    v_and_b32_e32 v24, 0xffff, v2
3681; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[24:27]
3682; GCN-HSA-NEXT:    s_addc_u32 s11, s1, 0
3683; GCN-HSA-NEXT:    v_mov_b32_e32 v25, s3
3684; GCN-HSA-NEXT:    s_waitcnt vmcnt(8)
3685; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v3, 16, v5
3686; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
3687; GCN-HSA-NEXT:    v_and_b32_e32 v2, 0xffff, v5
3688; GCN-HSA-NEXT:    v_and_b32_e32 v0, 0xffff, v4
3689; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s10
3690; GCN-HSA-NEXT:    v_mov_b32_e32 v24, s2
3691; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xb0
3692; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s11
3693; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3694; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3695; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
3696; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v3, 16, v7
3697; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v6
3698; GCN-HSA-NEXT:    v_and_b32_e32 v2, 0xffff, v7
3699; GCN-HSA-NEXT:    v_and_b32_e32 v0, 0xffff, v6
3700; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
3701; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3702; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s6
3703; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x80
3704; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s7
3705; GCN-HSA-NEXT:    s_waitcnt vmcnt(9)
3706; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v3, 16, v9
3707; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v8
3708; GCN-HSA-NEXT:    v_and_b32_e32 v2, 0xffff, v9
3709; GCN-HSA-NEXT:    v_and_b32_e32 v0, 0xffff, v8
3710; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3711; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3712; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s8
3713; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v3, 16, v11
3714; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v10
3715; GCN-HSA-NEXT:    v_and_b32_e32 v2, 0xffff, v11
3716; GCN-HSA-NEXT:    v_and_b32_e32 v0, 0xffff, v10
3717; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s3
3718; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s9
3719; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s2
3720; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x90
3721; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s5
3722; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3723; GCN-HSA-NEXT:    s_waitcnt vmcnt(10)
3724; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v5, 16, v14
3725; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v3, 16, v13
3726; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v12
3727; GCN-HSA-NEXT:    v_and_b32_e32 v2, 0xffff, v13
3728; GCN-HSA-NEXT:    v_and_b32_e32 v0, 0xffff, v12
3729; GCN-HSA-NEXT:    v_and_b32_e32 v6, 0xffff, v15
3730; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3731; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s4
3732; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v7, 16, v15
3733; GCN-HSA-NEXT:    v_and_b32_e32 v4, 0xffff, v14
3734; GCN-HSA-NEXT:    flat_store_dwordx4 v[24:25], v[0:3]
3735; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[4:7]
3736; GCN-HSA-NEXT:    s_waitcnt vmcnt(11)
3737; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v3, 16, v17
3738; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s3
3739; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v16
3740; GCN-HSA-NEXT:    v_and_b32_e32 v2, 0xffff, v17
3741; GCN-HSA-NEXT:    v_and_b32_e32 v0, 0xffff, v16
3742; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s2
3743; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x60
3744; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v4, 16, v19
3745; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[0:3]
3746; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3747; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v2, 16, v18
3748; GCN-HSA-NEXT:    v_and_b32_e32 v3, 0xffff, v19
3749; GCN-HSA-NEXT:    v_and_b32_e32 v1, 0xffff, v18
3750; GCN-HSA-NEXT:    flat_store_dwordx4 v[5:6], v[1:4]
3751; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
3752; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
3753; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x70
3754; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3755; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s3
3756; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s2
3757; GCN-HSA-NEXT:    s_add_u32 s2, s0, 64
3758; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3759; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s3
3760; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s2
3761; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x50
3762; GCN-HSA-NEXT:    s_waitcnt vmcnt(10)
3763; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v15, 16, v33
3764; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v13, 16, v32
3765; GCN-HSA-NEXT:    v_and_b32_e32 v14, 0xffff, v33
3766; GCN-HSA-NEXT:    v_and_b32_e32 v12, 0xffff, v32
3767; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3768; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v3, 16, v21
3769; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v20
3770; GCN-HSA-NEXT:    v_and_b32_e32 v2, 0xffff, v21
3771; GCN-HSA-NEXT:    v_and_b32_e32 v0, 0xffff, v20
3772; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[12:15]
3773; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3774; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s3
3775; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v7, 16, v23
3776; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v5, 16, v22
3777; GCN-HSA-NEXT:    v_and_b32_e32 v6, 0xffff, v23
3778; GCN-HSA-NEXT:    v_and_b32_e32 v4, 0xffff, v22
3779; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s2
3780; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
3781; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[4:7]
3782; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v11, 16, v35
3783; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v9, 16, v34
3784; GCN-HSA-NEXT:    v_and_b32_e32 v10, 0xffff, v35
3785; GCN-HSA-NEXT:    v_and_b32_e32 v8, 0xffff, v34
3786; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3787; GCN-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[8:11]
3788; GCN-HSA-NEXT:    s_add_u32 s0, s0, 48
3789; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s3
3790; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v7, 16, v29
3791; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v5, 16, v28
3792; GCN-HSA-NEXT:    v_and_b32_e32 v6, 0xffff, v29
3793; GCN-HSA-NEXT:    v_and_b32_e32 v4, 0xffff, v28
3794; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s2
3795; GCN-HSA-NEXT:    s_addc_u32 s1, s1, 0
3796; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[4:7]
3797; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v3, 16, v31
3798; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
3799; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v30
3800; GCN-HSA-NEXT:    v_and_b32_e32 v2, 0xffff, v31
3801; GCN-HSA-NEXT:    v_and_b32_e32 v0, 0xffff, v30
3802; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
3803; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3804; GCN-HSA-NEXT:    s_endpgm
3805;
3806; GCN-NOHSA-VI-LABEL: global_zextload_v64i16_to_v64i32:
3807; GCN-NOHSA-VI:       ; %bb.0:
3808; GCN-NOHSA-VI-NEXT:    s_mov_b32 s88, SCRATCH_RSRC_DWORD0
3809; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
3810; GCN-NOHSA-VI-NEXT:    s_mov_b32 s89, SCRATCH_RSRC_DWORD1
3811; GCN-NOHSA-VI-NEXT:    s_mov_b32 s90, -1
3812; GCN-NOHSA-VI-NEXT:    s_mov_b32 s91, 0xe80000
3813; GCN-NOHSA-VI-NEXT:    s_add_u32 s88, s88, s3
3814; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
3815; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
3816; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
3817; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
3818; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
3819; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
3820; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
3821; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[8:11], 0
3822; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:16
3823; GCN-NOHSA-VI-NEXT:    s_addc_u32 s89, s89, 0
3824; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
3825; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
3826; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(1)
3827; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v15
3828; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v14
3829; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v2, 0xffff, v15
3830; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, 0xffff, v14
3831; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[88:91], 0 offset:4 ; 4-byte Folded Spill
3832; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
3833; GCN-NOHSA-VI-NEXT:    buffer_store_dword v1, off, s[88:91], 0 offset:8 ; 4-byte Folded Spill
3834; GCN-NOHSA-VI-NEXT:    buffer_store_dword v2, off, s[88:91], 0 offset:12 ; 4-byte Folded Spill
3835; GCN-NOHSA-VI-NEXT:    buffer_store_dword v3, off, s[88:91], 0 offset:16 ; 4-byte Folded Spill
3836; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v6, 16, v13
3837; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v4, 16, v12
3838; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v5, 0xffff, v13
3839; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v3, 0xffff, v12
3840; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v11, 16, v19
3841; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v9, 16, v18
3842; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v15, 16, v17
3843; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v13, 16, v16
3844; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v10, 0xffff, v19
3845; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v8, 0xffff, v18
3846; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v14, 0xffff, v17
3847; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v12, 0xffff, v16
3848; GCN-NOHSA-VI-NEXT:    buffer_store_dword v3, off, s[88:91], 0 offset:20 ; 4-byte Folded Spill
3849; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
3850; GCN-NOHSA-VI-NEXT:    buffer_store_dword v4, off, s[88:91], 0 offset:24 ; 4-byte Folded Spill
3851; GCN-NOHSA-VI-NEXT:    buffer_store_dword v5, off, s[88:91], 0 offset:28 ; 4-byte Folded Spill
3852; GCN-NOHSA-VI-NEXT:    buffer_store_dword v6, off, s[88:91], 0 offset:32 ; 4-byte Folded Spill
3853; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:32
3854; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:48
3855; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[35:38], off, s[8:11], 0 offset:64
3856; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[39:42], off, s[8:11], 0 offset:80
3857; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(3)
3858; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v19, 16, v27
3859; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(1)
3860; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v46, 16, v38
3861; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v44, 16, v37
3862; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v50, 16, v36
3863; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v48, 16, v35
3864; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v45, 0xffff, v38
3865; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v43, 0xffff, v37
3866; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v49, 0xffff, v36
3867; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v47, 0xffff, v35
3868; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
3869; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v38, 16, v42
3870; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v36, 16, v41
3871; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v54, 16, v40
3872; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v52, 16, v39
3873; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v37, 0xffff, v42
3874; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v35, 0xffff, v41
3875; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v53, 0xffff, v40
3876; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v51, 0xffff, v39
3877; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[39:42], off, s[8:11], 0 offset:96
3878; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[55:58], off, s[8:11], 0 offset:112
3879; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v17, 16, v26
3880; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v23, 16, v25
3881; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v21, 16, v24
3882; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v18, 0xffff, v27
3883; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v16, 0xffff, v26
3884; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v22, 0xffff, v25
3885; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v20, 0xffff, v24
3886; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v27, 16, v31
3887; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v25, 16, v30
3888; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v34, 16, v29
3889; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v32, 16, v28
3890; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v26, 0xffff, v31
3891; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v24, 0xffff, v30
3892; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v33, 0xffff, v29
3893; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v31, 0xffff, v28
3894; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(1)
3895; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v62, 16, v42
3896; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
3897; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v56
3898; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v55
3899; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v2, 0xffff, v56
3900; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, 0xffff, v55
3901; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v60, 16, v41
3902; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v7, 16, v40
3903; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v5, 16, v39
3904; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v61, 0xffff, v42
3905; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v59, 0xffff, v41
3906; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v6, 0xffff, v40
3907; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v4, 0xffff, v39
3908; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v42, 16, v58
3909; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v40, 16, v57
3910; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v41, 0xffff, v58
3911; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v39, 0xffff, v57
3912; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
3913; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[39:42], off, s[0:3], 0 offset:240
3914; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:192
3915; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[59:62], off, s[0:3], 0 offset:208
3916; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[51:54], off, s[0:3], 0 offset:160
3917; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[35:38], off, s[0:3], 0 offset:176
3918; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[47:50], off, s[0:3], 0 offset:128
3919; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[43:46], off, s[0:3], 0 offset:144
3920; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[31:34], off, s[0:3], 0 offset:96
3921; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:112
3922; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:64
3923; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:80
3924; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32
3925; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:48
3926; GCN-NOHSA-VI-NEXT:    buffer_load_dword v0, off, s[88:91], 0 offset:20 ; 4-byte Folded Reload
3927; GCN-NOHSA-VI-NEXT:    buffer_load_dword v1, off, s[88:91], 0 offset:24 ; 4-byte Folded Reload
3928; GCN-NOHSA-VI-NEXT:    buffer_load_dword v2, off, s[88:91], 0 offset:28 ; 4-byte Folded Reload
3929; GCN-NOHSA-VI-NEXT:    buffer_load_dword v3, off, s[88:91], 0 offset:32 ; 4-byte Folded Reload
3930; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
3931; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
3932; GCN-NOHSA-VI-NEXT:    buffer_load_dword v0, off, s[88:91], 0 offset:4 ; 4-byte Folded Reload
3933; GCN-NOHSA-VI-NEXT:    buffer_load_dword v1, off, s[88:91], 0 offset:8 ; 4-byte Folded Reload
3934; GCN-NOHSA-VI-NEXT:    buffer_load_dword v2, off, s[88:91], 0 offset:12 ; 4-byte Folded Reload
3935; GCN-NOHSA-VI-NEXT:    buffer_load_dword v3, off, s[88:91], 0 offset:16 ; 4-byte Folded Reload
3936; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
3937; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
3938; GCN-NOHSA-VI-NEXT:    s_endpgm
3939;
3940; EG-LABEL: global_zextload_v64i16_to_v64i32:
3941; EG:       ; %bb.0:
3942; EG-NEXT:    ALU 0, @38, KC0[CB0:0-32], KC1[]
3943; EG-NEXT:    TEX 3 @22
3944; EG-NEXT:    ALU 56, @39, KC0[CB0:0-32], KC1[]
3945; EG-NEXT:    TEX 3 @30
3946; EG-NEXT:    ALU 87, @96, KC0[CB0:0-32], KC1[]
3947; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T66.X, 0
3948; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T65.X, 0
3949; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T64.X, 0
3950; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T62.X, 0
3951; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T51.XYZW, T61.X, 0
3952; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T59.X, 0
3953; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T52.XYZW, T58.X, 0
3954; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T54.XYZW, T56.X, 0
3955; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T55.X, 0
3956; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T53.X, 0
3957; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T48.X, 0
3958; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T47.X, 0
3959; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T46.X, 0
3960; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T44.X, 0
3961; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T43.X, 0
3962; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T41.X, 1
3963; EG-NEXT:    CF_END
3964; EG-NEXT:    Fetch clause starting at 22:
3965; EG-NEXT:     VTX_READ_128 T36.XYZW, T35.X, 0, #1
3966; EG-NEXT:     VTX_READ_128 T38.XYZW, T35.X, 48, #1
3967; EG-NEXT:     VTX_READ_128 T39.XYZW, T35.X, 32, #1
3968; EG-NEXT:     VTX_READ_128 T40.XYZW, T35.X, 16, #1
3969; EG-NEXT:    Fetch clause starting at 30:
3970; EG-NEXT:     VTX_READ_128 T49.XYZW, T35.X, 112, #1
3971; EG-NEXT:     VTX_READ_128 T50.XYZW, T35.X, 96, #1
3972; EG-NEXT:     VTX_READ_128 T51.XYZW, T35.X, 80, #1
3973; EG-NEXT:     VTX_READ_128 T52.XYZW, T35.X, 64, #1
3974; EG-NEXT:    ALU clause starting at 38:
3975; EG-NEXT:     MOV * T35.X, KC0[2].Z,
3976; EG-NEXT:    ALU clause starting at 39:
3977; EG-NEXT:     LSHR * T37.W, T36.W, literal.x,
3978; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3979; EG-NEXT:     AND_INT * T37.Z, T36.W, literal.x,
3980; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3981; EG-NEXT:     LSHR T37.Y, T36.Z, literal.x,
3982; EG-NEXT:     LSHR * T36.W, T36.Y, literal.x,
3983; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3984; EG-NEXT:     AND_INT T37.X, T36.Z, literal.x,
3985; EG-NEXT:     AND_INT T36.Z, T36.Y, literal.x,
3986; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3987; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
3988; EG-NEXT:     LSHR T41.X, PV.W, literal.x,
3989; EG-NEXT:     LSHR T36.Y, T36.X, literal.y,
3990; EG-NEXT:     LSHR T42.W, T40.W, literal.y,
3991; EG-NEXT:     AND_INT * T36.X, T36.X, literal.z,
3992; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3993; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3994; EG-NEXT:     AND_INT * T42.Z, T40.W, literal.x,
3995; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3996; EG-NEXT:     LSHR T43.X, KC0[2].Y, literal.x,
3997; EG-NEXT:     LSHR T42.Y, T40.Z, literal.y,
3998; EG-NEXT:     LSHR T40.W, T40.Y, literal.y,
3999; EG-NEXT:     AND_INT * T42.X, T40.Z, literal.z,
4000; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4001; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4002; EG-NEXT:     AND_INT T40.Z, T40.Y, literal.x,
4003; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4004; EG-NEXT:    65535(9.183409e-41), 48(6.726233e-44)
4005; EG-NEXT:     LSHR T44.X, PV.W, literal.x,
4006; EG-NEXT:     LSHR T40.Y, T40.X, literal.y,
4007; EG-NEXT:     LSHR T45.W, T39.W, literal.y,
4008; EG-NEXT:     AND_INT * T40.X, T40.X, literal.z,
4009; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4010; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4011; EG-NEXT:     AND_INT T45.Z, T39.W, literal.x,
4012; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4013; EG-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
4014; EG-NEXT:     LSHR T46.X, PV.W, literal.x,
4015; EG-NEXT:     LSHR T45.Y, T39.Z, literal.y,
4016; EG-NEXT:     LSHR T39.W, T39.Y, literal.y,
4017; EG-NEXT:     AND_INT * T45.X, T39.Z, literal.z,
4018; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4019; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4020; EG-NEXT:     AND_INT T39.Z, T39.Y, literal.x,
4021; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4022; EG-NEXT:    65535(9.183409e-41), 80(1.121039e-43)
4023; EG-NEXT:     LSHR T47.X, PV.W, literal.x,
4024; EG-NEXT:     LSHR T39.Y, T39.X, literal.y,
4025; EG-NEXT:     AND_INT * T39.X, T39.X, literal.z,
4026; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4027; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4028; EG-NEXT:     ADD_INT T0.W, KC0[2].Y, literal.x,
4029; EG-NEXT:     LSHR * T35.W, T38.W, literal.y,
4030; EG-NEXT:    64(8.968310e-44), 16(2.242078e-44)
4031; EG-NEXT:     LSHR T48.X, PV.W, literal.x,
4032; EG-NEXT:     AND_INT * T35.Z, T38.W, literal.y,
4033; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
4034; EG-NEXT:    ALU clause starting at 96:
4035; EG-NEXT:     LSHR T35.Y, T38.Z, literal.x,
4036; EG-NEXT:     LSHR * T38.W, T38.Y, literal.x,
4037; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4038; EG-NEXT:     AND_INT T35.X, T38.Z, literal.x,
4039; EG-NEXT:     AND_INT T38.Z, T38.Y, literal.x,
4040; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4041; EG-NEXT:    65535(9.183409e-41), 112(1.569454e-43)
4042; EG-NEXT:     LSHR T53.X, PV.W, literal.x,
4043; EG-NEXT:     LSHR T38.Y, T38.X, literal.y,
4044; EG-NEXT:     LSHR T54.W, T52.W, literal.y,
4045; EG-NEXT:     AND_INT * T38.X, T38.X, literal.z,
4046; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4047; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4048; EG-NEXT:     AND_INT T54.Z, T52.W, literal.x,
4049; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4050; EG-NEXT:    65535(9.183409e-41), 96(1.345247e-43)
4051; EG-NEXT:     LSHR T55.X, PV.W, literal.x,
4052; EG-NEXT:     LSHR T54.Y, T52.Z, literal.y,
4053; EG-NEXT:     LSHR T52.W, T52.Y, literal.y,
4054; EG-NEXT:     AND_INT * T54.X, T52.Z, literal.z,
4055; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4056; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4057; EG-NEXT:     AND_INT T52.Z, T52.Y, literal.x,
4058; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4059; EG-NEXT:    65535(9.183409e-41), 144(2.017870e-43)
4060; EG-NEXT:     LSHR T56.X, PV.W, literal.x,
4061; EG-NEXT:     LSHR T52.Y, T52.X, literal.y,
4062; EG-NEXT:     LSHR T57.W, T51.W, literal.y,
4063; EG-NEXT:     AND_INT * T52.X, T52.X, literal.z,
4064; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4065; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4066; EG-NEXT:     AND_INT T57.Z, T51.W, literal.x,
4067; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4068; EG-NEXT:    65535(9.183409e-41), 128(1.793662e-43)
4069; EG-NEXT:     LSHR T58.X, PV.W, literal.x,
4070; EG-NEXT:     LSHR T57.Y, T51.Z, literal.y,
4071; EG-NEXT:     LSHR T51.W, T51.Y, literal.y,
4072; EG-NEXT:     AND_INT * T57.X, T51.Z, literal.z,
4073; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4074; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4075; EG-NEXT:     AND_INT T51.Z, T51.Y, literal.x,
4076; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4077; EG-NEXT:    65535(9.183409e-41), 176(2.466285e-43)
4078; EG-NEXT:     LSHR T59.X, PV.W, literal.x,
4079; EG-NEXT:     LSHR T51.Y, T51.X, literal.y,
4080; EG-NEXT:     LSHR T60.W, T50.W, literal.y,
4081; EG-NEXT:     AND_INT * T51.X, T51.X, literal.z,
4082; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4083; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4084; EG-NEXT:     AND_INT T60.Z, T50.W, literal.x,
4085; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4086; EG-NEXT:    65535(9.183409e-41), 160(2.242078e-43)
4087; EG-NEXT:     LSHR T61.X, PV.W, literal.x,
4088; EG-NEXT:     LSHR T60.Y, T50.Z, literal.y,
4089; EG-NEXT:     LSHR T50.W, T50.Y, literal.y,
4090; EG-NEXT:     AND_INT * T60.X, T50.Z, literal.z,
4091; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4092; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4093; EG-NEXT:     AND_INT T50.Z, T50.Y, literal.x,
4094; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4095; EG-NEXT:    65535(9.183409e-41), 208(2.914701e-43)
4096; EG-NEXT:     LSHR T62.X, PV.W, literal.x,
4097; EG-NEXT:     LSHR T50.Y, T50.X, literal.y,
4098; EG-NEXT:     LSHR T63.W, T49.W, literal.y,
4099; EG-NEXT:     AND_INT * T50.X, T50.X, literal.z,
4100; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4101; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4102; EG-NEXT:     AND_INT T63.Z, T49.W, literal.x,
4103; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4104; EG-NEXT:    65535(9.183409e-41), 192(2.690493e-43)
4105; EG-NEXT:     LSHR T64.X, PV.W, literal.x,
4106; EG-NEXT:     LSHR T63.Y, T49.Z, literal.y,
4107; EG-NEXT:     LSHR T49.W, T49.Y, literal.y,
4108; EG-NEXT:     AND_INT * T63.X, T49.Z, literal.z,
4109; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4110; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4111; EG-NEXT:     AND_INT T49.Z, T49.Y, literal.x,
4112; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4113; EG-NEXT:    65535(9.183409e-41), 240(3.363116e-43)
4114; EG-NEXT:     LSHR T65.X, PV.W, literal.x,
4115; EG-NEXT:     LSHR T49.Y, T49.X, literal.y,
4116; EG-NEXT:     AND_INT * T49.X, T49.X, literal.z,
4117; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4118; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4119; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
4120; EG-NEXT:    224(3.138909e-43), 0(0.000000e+00)
4121; EG-NEXT:     LSHR * T66.X, PV.W, literal.x,
4122; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4123;
4124; CM-LABEL: global_zextload_v64i16_to_v64i32:
4125; CM:       ; %bb.0:
4126; CM-NEXT:    ALU 0, @38, KC0[CB0:0-32], KC1[]
4127; CM-NEXT:    TEX 3 @22
4128; CM-NEXT:    ALU 50, @39, KC0[CB0:0-32], KC1[]
4129; CM-NEXT:    TEX 3 @30
4130; CM-NEXT:    ALU 78, @90, KC0[CB0:0-32], KC1[]
4131; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T65, T66.X
4132; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T63, T48.X
4133; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T62, T64.X
4134; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T60, T49.X
4135; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T59, T61.X
4136; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T57, T50.X
4137; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T56, T58.X
4138; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T54, T51.X
4139; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T53, T55.X
4140; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T35, T37.X
4141; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T47, T52.X
4142; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T45, T38.X
4143; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T44, T46.X
4144; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T42, T39.X
4145; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T41, T43.X
4146; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T40, T36.X
4147; CM-NEXT:    CF_END
4148; CM-NEXT:    Fetch clause starting at 22:
4149; CM-NEXT:     VTX_READ_128 T36.XYZW, T35.X, 112, #1
4150; CM-NEXT:     VTX_READ_128 T37.XYZW, T35.X, 64, #1
4151; CM-NEXT:     VTX_READ_128 T38.XYZW, T35.X, 80, #1
4152; CM-NEXT:     VTX_READ_128 T39.XYZW, T35.X, 96, #1
4153; CM-NEXT:    Fetch clause starting at 30:
4154; CM-NEXT:     VTX_READ_128 T48.XYZW, T35.X, 0, #1
4155; CM-NEXT:     VTX_READ_128 T49.XYZW, T35.X, 16, #1
4156; CM-NEXT:     VTX_READ_128 T50.XYZW, T35.X, 32, #1
4157; CM-NEXT:     VTX_READ_128 T51.XYZW, T35.X, 48, #1
4158; CM-NEXT:    ALU clause starting at 38:
4159; CM-NEXT:     MOV * T35.X, KC0[2].Z,
4160; CM-NEXT:    ALU clause starting at 39:
4161; CM-NEXT:     LSHR * T40.W, T36.Y, literal.x,
4162; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4163; CM-NEXT:     AND_INT * T40.Z, T36.Y, literal.x,
4164; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4165; CM-NEXT:     LSHR T40.Y, T36.X, literal.x,
4166; CM-NEXT:     LSHR * T41.W, T36.W, literal.x,
4167; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4168; CM-NEXT:     AND_INT T40.X, T36.X, literal.x,
4169; CM-NEXT:     AND_INT T41.Z, T36.W, literal.x,
4170; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4171; CM-NEXT:    65535(9.183409e-41), 224(3.138909e-43)
4172; CM-NEXT:     LSHR T36.X, PV.W, literal.x,
4173; CM-NEXT:     LSHR T41.Y, T36.Z, literal.y,
4174; CM-NEXT:     LSHR * T42.W, T39.Y, literal.y,
4175; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4176; CM-NEXT:     AND_INT T41.X, T36.Z, literal.x,
4177; CM-NEXT:     AND_INT T42.Z, T39.Y, literal.x,
4178; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4179; CM-NEXT:    65535(9.183409e-41), 240(3.363116e-43)
4180; CM-NEXT:     LSHR T43.X, PV.W, literal.x,
4181; CM-NEXT:     LSHR T42.Y, T39.X, literal.y,
4182; CM-NEXT:     LSHR * T44.W, T39.W, literal.y,
4183; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4184; CM-NEXT:     AND_INT T42.X, T39.X, literal.x,
4185; CM-NEXT:     AND_INT T44.Z, T39.W, literal.x,
4186; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4187; CM-NEXT:    65535(9.183409e-41), 192(2.690493e-43)
4188; CM-NEXT:     LSHR T39.X, PV.W, literal.x,
4189; CM-NEXT:     LSHR T44.Y, T39.Z, literal.y,
4190; CM-NEXT:     LSHR * T45.W, T38.Y, literal.y,
4191; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4192; CM-NEXT:     AND_INT T44.X, T39.Z, literal.x,
4193; CM-NEXT:     AND_INT T45.Z, T38.Y, literal.x,
4194; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4195; CM-NEXT:    65535(9.183409e-41), 208(2.914701e-43)
4196; CM-NEXT:     LSHR T46.X, PV.W, literal.x,
4197; CM-NEXT:     LSHR T45.Y, T38.X, literal.y,
4198; CM-NEXT:     LSHR * T47.W, T38.W, literal.y,
4199; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4200; CM-NEXT:     AND_INT T45.X, T38.X, literal.x,
4201; CM-NEXT:     AND_INT T47.Z, T38.W, literal.x,
4202; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4203; CM-NEXT:    65535(9.183409e-41), 160(2.242078e-43)
4204; CM-NEXT:     LSHR T38.X, PV.W, literal.x,
4205; CM-NEXT:     LSHR T47.Y, T38.Z, literal.y,
4206; CM-NEXT:     LSHR * T35.W, T37.Y, literal.y,
4207; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4208; CM-NEXT:     AND_INT T47.X, T38.Z, literal.x,
4209; CM-NEXT:     AND_INT T35.Z, T37.Y, literal.x,
4210; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4211; CM-NEXT:    65535(9.183409e-41), 176(2.466285e-43)
4212; CM-NEXT:    ALU clause starting at 90:
4213; CM-NEXT:     LSHR T52.X, T0.W, literal.x,
4214; CM-NEXT:     LSHR T35.Y, T37.X, literal.y,
4215; CM-NEXT:     LSHR * T53.W, T37.W, literal.y, BS:VEC_120/SCL_212
4216; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4217; CM-NEXT:     AND_INT T35.X, T37.X, literal.x,
4218; CM-NEXT:     AND_INT T53.Z, T37.W, literal.x,
4219; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4220; CM-NEXT:    65535(9.183409e-41), 128(1.793662e-43)
4221; CM-NEXT:     LSHR T37.X, PV.W, literal.x,
4222; CM-NEXT:     LSHR T53.Y, T37.Z, literal.y,
4223; CM-NEXT:     LSHR * T54.W, T51.Y, literal.y,
4224; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4225; CM-NEXT:     AND_INT T53.X, T37.Z, literal.x,
4226; CM-NEXT:     AND_INT T54.Z, T51.Y, literal.x,
4227; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4228; CM-NEXT:    65535(9.183409e-41), 144(2.017870e-43)
4229; CM-NEXT:     LSHR T55.X, PV.W, literal.x,
4230; CM-NEXT:     LSHR T54.Y, T51.X, literal.y,
4231; CM-NEXT:     LSHR * T56.W, T51.W, literal.y,
4232; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4233; CM-NEXT:     AND_INT T54.X, T51.X, literal.x,
4234; CM-NEXT:     AND_INT T56.Z, T51.W, literal.x,
4235; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4236; CM-NEXT:    65535(9.183409e-41), 96(1.345247e-43)
4237; CM-NEXT:     LSHR T51.X, PV.W, literal.x,
4238; CM-NEXT:     LSHR T56.Y, T51.Z, literal.y,
4239; CM-NEXT:     LSHR * T57.W, T50.Y, literal.y,
4240; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4241; CM-NEXT:     AND_INT T56.X, T51.Z, literal.x,
4242; CM-NEXT:     AND_INT T57.Z, T50.Y, literal.x,
4243; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4244; CM-NEXT:    65535(9.183409e-41), 112(1.569454e-43)
4245; CM-NEXT:     LSHR T58.X, PV.W, literal.x,
4246; CM-NEXT:     LSHR T57.Y, T50.X, literal.y,
4247; CM-NEXT:     LSHR * T59.W, T50.W, literal.y,
4248; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4249; CM-NEXT:     AND_INT T57.X, T50.X, literal.x,
4250; CM-NEXT:     AND_INT T59.Z, T50.W, literal.x,
4251; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4252; CM-NEXT:    65535(9.183409e-41), 64(8.968310e-44)
4253; CM-NEXT:     LSHR T50.X, PV.W, literal.x,
4254; CM-NEXT:     LSHR T59.Y, T50.Z, literal.y,
4255; CM-NEXT:     LSHR * T60.W, T49.Y, literal.y,
4256; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4257; CM-NEXT:     AND_INT T59.X, T50.Z, literal.x,
4258; CM-NEXT:     AND_INT T60.Z, T49.Y, literal.x,
4259; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4260; CM-NEXT:    65535(9.183409e-41), 80(1.121039e-43)
4261; CM-NEXT:     LSHR T61.X, PV.W, literal.x,
4262; CM-NEXT:     LSHR T60.Y, T49.X, literal.y,
4263; CM-NEXT:     LSHR * T62.W, T49.W, literal.y,
4264; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4265; CM-NEXT:     AND_INT T60.X, T49.X, literal.x,
4266; CM-NEXT:     AND_INT T62.Z, T49.W, literal.x,
4267; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4268; CM-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
4269; CM-NEXT:     LSHR T49.X, PV.W, literal.x,
4270; CM-NEXT:     LSHR T62.Y, T49.Z, literal.y,
4271; CM-NEXT:     LSHR * T63.W, T48.Y, literal.y,
4272; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4273; CM-NEXT:     AND_INT T62.X, T49.Z, literal.x,
4274; CM-NEXT:     AND_INT T63.Z, T48.Y, literal.x,
4275; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4276; CM-NEXT:    65535(9.183409e-41), 48(6.726233e-44)
4277; CM-NEXT:     LSHR T64.X, PV.W, literal.x,
4278; CM-NEXT:     LSHR T63.Y, T48.X, literal.y,
4279; CM-NEXT:     LSHR * T65.W, T48.W, literal.y,
4280; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4281; CM-NEXT:     AND_INT T63.X, T48.X, literal.x,
4282; CM-NEXT:     AND_INT * T65.Z, T48.W, literal.x,
4283; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4284; CM-NEXT:     LSHR T48.X, KC0[2].Y, literal.x,
4285; CM-NEXT:     LSHR * T65.Y, T48.Z, literal.y,
4286; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4287; CM-NEXT:     AND_INT T65.X, T48.Z, literal.x,
4288; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4289; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
4290; CM-NEXT:     LSHR * T66.X, PV.W, literal.x,
4291; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4292  %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
4293  %ext = zext <64 x i16> %load to <64 x i32>
4294  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
4295  ret void
4296}
4297
4298define amdgpu_kernel void @global_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
4299; GCN-NOHSA-SI-LABEL: global_sextload_v64i16_to_v64i32:
4300; GCN-NOHSA-SI:       ; %bb.0:
4301; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, SCRATCH_RSRC_DWORD0
4302; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, SCRATCH_RSRC_DWORD1
4303; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, -1
4304; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, 0xe8f000
4305; GCN-NOHSA-SI-NEXT:    s_add_u32 s8, s8, s3
4306; GCN-NOHSA-SI-NEXT:    s_addc_u32 s9, s9, 0
4307; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
4308; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
4309; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
4310; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4311; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s4
4312; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s5
4313; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s6
4314; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s7
4315; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, s2
4316; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, s3
4317; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:112
4318; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:96
4319; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
4320; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:64
4321; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0
4322; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:16
4323; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[32:35], off, s[4:7], 0 offset:32
4324; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[36:39], off, s[4:7], 0 offset:48
4325; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(3)
4326; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v3, 16, v11
4327; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 16, v10
4328; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v2, v11, 0, 16
4329; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v10, 0, 16
4330; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill
4331; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
4332; GCN-NOHSA-SI-NEXT:    buffer_store_dword v1, off, s[8:11], 0 offset:8 ; 4-byte Folded Spill
4333; GCN-NOHSA-SI-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:12 ; 4-byte Folded Spill
4334; GCN-NOHSA-SI-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:16 ; 4-byte Folded Spill
4335; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v7, 16, v9
4336; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v5, 16, v8
4337; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v6, v9, 0, 16
4338; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v4, v8, 0, 16
4339; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v11, 16, v31
4340; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v9, 16, v30
4341; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v10, v31, 0, 16
4342; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v8, v30, 0, 16
4343; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v43, 16, v29
4344; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v41, 16, v28
4345; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v42, v29, 0, 16
4346; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v40, v28, 0, 16
4347; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v31, 16, v35
4348; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v29, 16, v34
4349; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v30, v35, 0, 16
4350; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v28, v34, 0, 16
4351; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v47, 16, v33
4352; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v45, 16, v32
4353; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v46, v33, 0, 16
4354; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v44, v32, 0, 16
4355; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v35, 16, v39
4356; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v33, 16, v38
4357; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v34, v39, 0, 16
4358; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v32, v38, 0, 16
4359; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v51, 16, v37
4360; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v49, 16, v36
4361; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v50, v37, 0, 16
4362; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v48, v36, 0, 16
4363; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v39, 16, v27
4364; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v37, 16, v26
4365; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v38, v27, 0, 16
4366; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v36, v26, 0, 16
4367; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v55, 16, v25
4368; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v53, 16, v24
4369; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v54, v25, 0, 16
4370; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v52, v24, 0, 16
4371; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v26, 16, v23
4372; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v24, 16, v22
4373; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v25, v23, 0, 16
4374; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v23, v22, 0, 16
4375; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v59, 16, v21
4376; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v57, 16, v20
4377; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v58, v21, 0, 16
4378; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v56, v20, 0, 16
4379; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v22, 16, v19
4380; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v20, 16, v18
4381; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v21, v19, 0, 16
4382; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v19, v18, 0, 16
4383; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v63, 16, v17
4384; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v61, 16, v16
4385; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v62, v17, 0, 16
4386; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v60, v16, 0, 16
4387; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v18, 16, v15
4388; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v16, 16, v14
4389; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v17, v15, 0, 16
4390; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v15, v14, 0, 16
4391; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
4392; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v3, 16, v13
4393; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 16, v12
4394; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v2, v13, 0, 16
4395; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v12, 0, 16
4396; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
4397; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:240
4398; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[60:63], off, s[0:3], 0 offset:192
4399; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:208
4400; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[56:59], off, s[0:3], 0 offset:160
4401; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:176
4402; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[52:55], off, s[0:3], 0 offset:128
4403; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[36:39], off, s[0:3], 0 offset:144
4404; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[48:51], off, s[0:3], 0 offset:96
4405; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:112
4406; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[44:47], off, s[0:3], 0 offset:64
4407; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:80
4408; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[40:43], off, s[0:3], 0 offset:32
4409; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:48
4410; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
4411; GCN-NOHSA-SI-NEXT:    buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload
4412; GCN-NOHSA-SI-NEXT:    buffer_load_dword v1, off, s[8:11], 0 offset:8 ; 4-byte Folded Reload
4413; GCN-NOHSA-SI-NEXT:    buffer_load_dword v2, off, s[8:11], 0 offset:12 ; 4-byte Folded Reload
4414; GCN-NOHSA-SI-NEXT:    buffer_load_dword v3, off, s[8:11], 0 offset:16 ; 4-byte Folded Reload
4415; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
4416; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
4417; GCN-NOHSA-SI-NEXT:    s_endpgm
4418;
4419; GCN-HSA-LABEL: global_sextload_v64i16_to_v64i32:
4420; GCN-HSA:       ; %bb.0:
4421; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
4422; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4423; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
4424; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
4425; GCN-HSA-NEXT:    flat_load_dwordx4 v[12:15], v[0:1]
4426; GCN-HSA-NEXT:    s_add_u32 s4, s2, 0x70
4427; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
4428; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
4429; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
4430; GCN-HSA-NEXT:    flat_load_dwordx4 v[8:11], v[0:1]
4431; GCN-HSA-NEXT:    s_add_u32 s4, s2, 0x60
4432; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
4433; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
4434; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
4435; GCN-HSA-NEXT:    flat_load_dwordx4 v[4:7], v[0:1]
4436; GCN-HSA-NEXT:    s_add_u32 s4, s2, 0x50
4437; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
4438; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
4439; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
4440; GCN-HSA-NEXT:    s_add_u32 s4, s2, 64
4441; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
4442; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
4443; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s5
4444; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s4
4445; GCN-HSA-NEXT:    flat_load_dwordx4 v[16:19], v[16:17]
4446; GCN-HSA-NEXT:    s_add_u32 s4, s2, 48
4447; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
4448; GCN-HSA-NEXT:    s_add_u32 s6, s2, 32
4449; GCN-HSA-NEXT:    s_addc_u32 s7, s3, 0
4450; GCN-HSA-NEXT:    s_add_u32 s2, s2, 16
4451; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s5
4452; GCN-HSA-NEXT:    s_addc_u32 s3, s3, 0
4453; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s4
4454; GCN-HSA-NEXT:    v_mov_b32_e32 v33, s7
4455; GCN-HSA-NEXT:    flat_load_dwordx4 v[20:23], v[20:21]
4456; GCN-HSA-NEXT:    v_mov_b32_e32 v32, s6
4457; GCN-HSA-NEXT:    v_mov_b32_e32 v37, s1
4458; GCN-HSA-NEXT:    v_mov_b32_e32 v36, s0
4459; GCN-HSA-NEXT:    s_waitcnt vmcnt(5)
4460; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v27, 16, v13
4461; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v25, 16, v12
4462; GCN-HSA-NEXT:    v_bfe_i32 v26, v13, 0, 16
4463; GCN-HSA-NEXT:    v_bfe_i32 v24, v12, 0, 16
4464; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s3
4465; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s2
4466; GCN-HSA-NEXT:    flat_load_dwordx4 v[28:31], v[12:13]
4467; GCN-HSA-NEXT:    flat_load_dwordx4 v[32:35], v[32:33]
4468; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
4469; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4470; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s3
4471; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s2
4472; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xe0
4473; GCN-HSA-NEXT:    flat_store_dwordx4 v[36:37], v[24:27]
4474; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4475; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v27, 16, v15
4476; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v25, 16, v14
4477; GCN-HSA-NEXT:    v_bfe_i32 v26, v15, 0, 16
4478; GCN-HSA-NEXT:    v_bfe_i32 v24, v14, 0, 16
4479; GCN-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[24:27]
4480; GCN-HSA-NEXT:    s_waitcnt vmcnt(8)
4481; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v15, 16, v9
4482; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v13, 16, v8
4483; GCN-HSA-NEXT:    v_bfe_i32 v14, v9, 0, 16
4484; GCN-HSA-NEXT:    v_bfe_i32 v12, v8, 0, 16
4485; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s3
4486; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s2
4487; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xf0
4488; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4489; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[12:15]
4490; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s3
4491; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s2
4492; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xc0
4493; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4494; GCN-HSA-NEXT:    v_mov_b32_e32 v25, s3
4495; GCN-HSA-NEXT:    v_mov_b32_e32 v24, s2
4496; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xd0
4497; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v14, 16, v11
4498; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v12, 16, v10
4499; GCN-HSA-NEXT:    v_bfe_i32 v13, v11, 0, 16
4500; GCN-HSA-NEXT:    v_bfe_i32 v11, v10, 0, 16
4501; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4502; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[11:14]
4503; GCN-HSA-NEXT:    s_waitcnt vmcnt(9)
4504; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v9, 16, v4
4505; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s3
4506; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s2
4507; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xa0
4508; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4509; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s3
4510; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s2
4511; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xb0
4512; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v11, 16, v5
4513; GCN-HSA-NEXT:    v_bfe_i32 v10, v5, 0, 16
4514; GCN-HSA-NEXT:    v_bfe_i32 v8, v4, 0, 16
4515; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4516; GCN-HSA-NEXT:    flat_store_dwordx4 v[24:25], v[8:11]
4517; GCN-HSA-NEXT:    v_mov_b32_e32 v25, s3
4518; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v10, 16, v7
4519; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v8, 16, v6
4520; GCN-HSA-NEXT:    v_bfe_i32 v9, v7, 0, 16
4521; GCN-HSA-NEXT:    v_bfe_i32 v7, v6, 0, 16
4522; GCN-HSA-NEXT:    v_mov_b32_e32 v24, s2
4523; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x80
4524; GCN-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[7:10]
4525; GCN-HSA-NEXT:    s_waitcnt vmcnt(10)
4526; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v5, 16, v0
4527; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v7, 16, v1
4528; GCN-HSA-NEXT:    v_bfe_i32 v6, v1, 0, 16
4529; GCN-HSA-NEXT:    v_bfe_i32 v4, v0, 0, 16
4530; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4531; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v11, 16, v3
4532; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v9, 16, v2
4533; GCN-HSA-NEXT:    v_bfe_i32 v10, v3, 0, 16
4534; GCN-HSA-NEXT:    v_bfe_i32 v8, v2, 0, 16
4535; GCN-HSA-NEXT:    flat_store_dwordx4 v[14:15], v[4:7]
4536; GCN-HSA-NEXT:    flat_store_dwordx4 v[24:25], v[8:11]
4537; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
4538; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
4539; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x90
4540; GCN-HSA-NEXT:    s_waitcnt vmcnt(11)
4541; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 16, v17
4542; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 16, v16
4543; GCN-HSA-NEXT:    v_bfe_i32 v2, v17, 0, 16
4544; GCN-HSA-NEXT:    v_bfe_i32 v0, v16, 0, 16
4545; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4546; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4547; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
4548; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
4549; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x60
4550; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 16, v19
4551; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 16, v18
4552; GCN-HSA-NEXT:    v_bfe_i32 v2, v19, 0, 16
4553; GCN-HSA-NEXT:    v_bfe_i32 v0, v18, 0, 16
4554; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4555; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4556; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
4557; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
4558; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x70
4559; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4560; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s3
4561; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s2
4562; GCN-HSA-NEXT:    s_add_u32 s2, s0, 64
4563; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4564; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s3
4565; GCN-HSA-NEXT:    s_waitcnt vmcnt(12)
4566; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v11, 16, v23
4567; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v9, 16, v22
4568; GCN-HSA-NEXT:    v_bfe_i32 v10, v23, 0, 16
4569; GCN-HSA-NEXT:    v_bfe_i32 v8, v22, 0, 16
4570; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s2
4571; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x50
4572; GCN-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[8:11]
4573; GCN-HSA-NEXT:    s_waitcnt vmcnt(11)
4574; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v15, 16, v33
4575; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v13, 16, v32
4576; GCN-HSA-NEXT:    v_bfe_i32 v14, v33, 0, 16
4577; GCN-HSA-NEXT:    v_bfe_i32 v12, v32, 0, 16
4578; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4579; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[12:15]
4580; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v11, 16, v35
4581; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s3
4582; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s2
4583; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
4584; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v9, 16, v34
4585; GCN-HSA-NEXT:    v_bfe_i32 v10, v35, 0, 16
4586; GCN-HSA-NEXT:    v_bfe_i32 v8, v34, 0, 16
4587; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4588; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 16, v21
4589; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 16, v20
4590; GCN-HSA-NEXT:    v_bfe_i32 v2, v21, 0, 16
4591; GCN-HSA-NEXT:    v_bfe_i32 v0, v20, 0, 16
4592; GCN-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[8:11]
4593; GCN-HSA-NEXT:    s_add_u32 s0, s0, 48
4594; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s3
4595; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4596; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v7, 16, v29
4597; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v5, 16, v28
4598; GCN-HSA-NEXT:    v_bfe_i32 v6, v29, 0, 16
4599; GCN-HSA-NEXT:    v_bfe_i32 v4, v28, 0, 16
4600; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s2
4601; GCN-HSA-NEXT:    s_addc_u32 s1, s1, 0
4602; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[4:7]
4603; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 16, v31
4604; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
4605; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 16, v30
4606; GCN-HSA-NEXT:    v_bfe_i32 v2, v31, 0, 16
4607; GCN-HSA-NEXT:    v_bfe_i32 v0, v30, 0, 16
4608; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
4609; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4610; GCN-HSA-NEXT:    s_endpgm
4611;
4612; GCN-NOHSA-VI-LABEL: global_sextload_v64i16_to_v64i32:
4613; GCN-NOHSA-VI:       ; %bb.0:
4614; GCN-NOHSA-VI-NEXT:    s_mov_b32 s88, SCRATCH_RSRC_DWORD0
4615; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
4616; GCN-NOHSA-VI-NEXT:    s_mov_b32 s89, SCRATCH_RSRC_DWORD1
4617; GCN-NOHSA-VI-NEXT:    s_mov_b32 s90, -1
4618; GCN-NOHSA-VI-NEXT:    s_mov_b32 s91, 0xe80000
4619; GCN-NOHSA-VI-NEXT:    s_add_u32 s88, s88, s3
4620; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
4621; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
4622; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4623; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
4624; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
4625; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s6
4626; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s7
4627; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, s2
4628; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, s3
4629; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:112
4630; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:96
4631; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:80
4632; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:64
4633; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[24:27], off, s[4:7], 0
4634; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:16
4635; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[32:35], off, s[4:7], 0 offset:32
4636; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[36:39], off, s[4:7], 0 offset:48
4637; GCN-NOHSA-VI-NEXT:    s_addc_u32 s89, s89, 0
4638; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(6)
4639; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v63, 16, v13
4640; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(5)
4641; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v59, 16, v17
4642; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(4)
4643; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v55, 16, v21
4644; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(3)
4645; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v3, 16, v27
4646; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 16, v26
4647; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v2, v27, 0, 16
4648; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v26, 0, 16
4649; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[88:91], 0 offset:4 ; 4-byte Folded Spill
4650; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
4651; GCN-NOHSA-VI-NEXT:    buffer_store_dword v1, off, s[88:91], 0 offset:8 ; 4-byte Folded Spill
4652; GCN-NOHSA-VI-NEXT:    buffer_store_dword v2, off, s[88:91], 0 offset:12 ; 4-byte Folded Spill
4653; GCN-NOHSA-VI-NEXT:    buffer_store_dword v3, off, s[88:91], 0 offset:16 ; 4-byte Folded Spill
4654; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v3, 16, v9
4655; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 16, v8
4656; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v2, v9, 0, 16
4657; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v8, 0, 16
4658; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v7, 16, v25
4659; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v5, 16, v24
4660; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v6, v25, 0, 16
4661; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v4, v24, 0, 16
4662; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v27, 16, v31
4663; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v25, 16, v30
4664; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v26, v31, 0, 16
4665; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v24, v30, 0, 16
4666; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v43, 16, v29
4667; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v41, 16, v28
4668; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v42, v29, 0, 16
4669; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v40, v28, 0, 16
4670; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v31, 16, v35
4671; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v29, 16, v34
4672; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v30, v35, 0, 16
4673; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v28, v34, 0, 16
4674; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v47, 16, v33
4675; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v45, 16, v32
4676; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v46, v33, 0, 16
4677; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v44, v32, 0, 16
4678; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v35, 16, v39
4679; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v33, 16, v38
4680; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v34, v39, 0, 16
4681; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v32, v38, 0, 16
4682; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v51, 16, v37
4683; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v49, 16, v36
4684; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v50, v37, 0, 16
4685; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v48, v36, 0, 16
4686; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v39, 16, v23
4687; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v37, 16, v22
4688; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v38, v23, 0, 16
4689; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v36, v22, 0, 16
4690; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v53, 16, v20
4691; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v54, v21, 0, 16
4692; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v52, v20, 0, 16
4693; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v22, 16, v19
4694; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v20, 16, v18
4695; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v21, v19, 0, 16
4696; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v19, v18, 0, 16
4697; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v57, 16, v16
4698; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v58, v17, 0, 16
4699; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v56, v16, 0, 16
4700; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v18, 16, v15
4701; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v16, 16, v14
4702; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v17, v15, 0, 16
4703; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v15, v14, 0, 16
4704; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v61, 16, v12
4705; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v62, v13, 0, 16
4706; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v60, v12, 0, 16
4707; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v14, 16, v11
4708; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v12, 16, v10
4709; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v13, v11, 0, 16
4710; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v11, v10, 0, 16
4711; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
4712; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:240
4713; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[60:63], off, s[0:3], 0 offset:192
4714; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:208
4715; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[56:59], off, s[0:3], 0 offset:160
4716; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:176
4717; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[52:55], off, s[0:3], 0 offset:128
4718; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[36:39], off, s[0:3], 0 offset:144
4719; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[48:51], off, s[0:3], 0 offset:96
4720; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:112
4721; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[44:47], off, s[0:3], 0 offset:64
4722; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:80
4723; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[40:43], off, s[0:3], 0 offset:32
4724; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:48
4725; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
4726; GCN-NOHSA-VI-NEXT:    buffer_load_dword v0, off, s[88:91], 0 offset:4 ; 4-byte Folded Reload
4727; GCN-NOHSA-VI-NEXT:    buffer_load_dword v1, off, s[88:91], 0 offset:8 ; 4-byte Folded Reload
4728; GCN-NOHSA-VI-NEXT:    buffer_load_dword v2, off, s[88:91], 0 offset:12 ; 4-byte Folded Reload
4729; GCN-NOHSA-VI-NEXT:    buffer_load_dword v3, off, s[88:91], 0 offset:16 ; 4-byte Folded Reload
4730; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
4731; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
4732; GCN-NOHSA-VI-NEXT:    s_endpgm
4733;
4734; EG-LABEL: global_sextload_v64i16_to_v64i32:
4735; EG:       ; %bb.0:
4736; EG-NEXT:    ALU 18, @38, KC0[CB0:0-32], KC1[]
4737; EG-NEXT:    TEX 7 @22
4738; EG-NEXT:    ALU 75, @57, KC0[CB0:0-32], KC1[]
4739; EG-NEXT:    ALU 71, @133, KC0[CB0:0-32], KC1[]
4740; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T41.X, 0
4741; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T66.X, 0
4742; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T56.X, 0
4743; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T64.XYZW, T55.X, 0
4744; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T54.X, 0
4745; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T53.X, 0
4746; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T52.X, 0
4747; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T51.X, 0
4748; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T50.X, 0
4749; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T61.XYZW, T49.X, 0
4750; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T40.X, 0
4751; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T39.X, 0
4752; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T38.X, 0
4753; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T37.X, 0
4754; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T58.XYZW, T36.X, 0
4755; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T35.X, 1
4756; EG-NEXT:    CF_END
4757; EG-NEXT:    PAD
4758; EG-NEXT:    Fetch clause starting at 22:
4759; EG-NEXT:     VTX_READ_128 T42.XYZW, T41.X, 16, #1
4760; EG-NEXT:     VTX_READ_128 T43.XYZW, T41.X, 32, #1
4761; EG-NEXT:     VTX_READ_128 T44.XYZW, T41.X, 0, #1
4762; EG-NEXT:     VTX_READ_128 T45.XYZW, T41.X, 48, #1
4763; EG-NEXT:     VTX_READ_128 T46.XYZW, T41.X, 64, #1
4764; EG-NEXT:     VTX_READ_128 T47.XYZW, T41.X, 80, #1
4765; EG-NEXT:     VTX_READ_128 T48.XYZW, T41.X, 96, #1
4766; EG-NEXT:     VTX_READ_128 T41.XYZW, T41.X, 112, #1
4767; EG-NEXT:    ALU clause starting at 38:
4768; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
4769; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4770; EG-NEXT:     LSHR T35.X, PV.W, literal.x,
4771; EG-NEXT:     LSHR * T36.X, KC0[2].Y, literal.x,
4772; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4773; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
4774; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
4775; EG-NEXT:     LSHR T37.X, PV.W, literal.x,
4776; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4777; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
4778; EG-NEXT:     LSHR T38.X, PV.W, literal.x,
4779; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4780; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
4781; EG-NEXT:     LSHR T39.X, PV.W, literal.x,
4782; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4783; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
4784; EG-NEXT:     LSHR T40.X, PV.W, literal.x,
4785; EG-NEXT:     MOV * T41.X, KC0[2].Z,
4786; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4787; EG-NEXT:    ALU clause starting at 57:
4788; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
4789; EG-NEXT:    112(1.569454e-43), 0(0.000000e+00)
4790; EG-NEXT:     LSHR T49.X, PV.W, literal.x,
4791; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4792; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
4793; EG-NEXT:     LSHR T50.X, PV.W, literal.x,
4794; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4795; EG-NEXT:    2(2.802597e-45), 144(2.017870e-43)
4796; EG-NEXT:     LSHR T51.X, PV.W, literal.x,
4797; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4798; EG-NEXT:    2(2.802597e-45), 128(1.793662e-43)
4799; EG-NEXT:     LSHR T52.X, PV.W, literal.x,
4800; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4801; EG-NEXT:    2(2.802597e-45), 176(2.466285e-43)
4802; EG-NEXT:     LSHR T53.X, PV.W, literal.x,
4803; EG-NEXT:     LSHR T0.Y, T41.Y, literal.y,
4804; EG-NEXT:     LSHR T0.Z, T41.W, literal.y,
4805; EG-NEXT:     LSHR T0.W, T48.Y, literal.y, BS:VEC_120/SCL_212
4806; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
4807; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4808; EG-NEXT:    160(2.242078e-43), 0(0.000000e+00)
4809; EG-NEXT:     LSHR T54.X, PS, literal.x,
4810; EG-NEXT:     LSHR T1.Y, T48.W, literal.y,
4811; EG-NEXT:     LSHR T1.Z, T47.Y, literal.y,
4812; EG-NEXT:     LSHR T1.W, T47.W, literal.y, BS:VEC_120/SCL_212
4813; EG-NEXT:     ADD_INT * T2.W, KC0[2].Y, literal.z,
4814; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4815; EG-NEXT:    208(2.914701e-43), 0(0.000000e+00)
4816; EG-NEXT:     LSHR T55.X, PS, literal.x,
4817; EG-NEXT:     LSHR T2.Y, T46.Y, literal.y,
4818; EG-NEXT:     LSHR T2.Z, T46.W, literal.y,
4819; EG-NEXT:     LSHR T2.W, T45.Y, literal.y, BS:VEC_120/SCL_212
4820; EG-NEXT:     ADD_INT * T3.W, KC0[2].Y, literal.z,
4821; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4822; EG-NEXT:    192(2.690493e-43), 0(0.000000e+00)
4823; EG-NEXT:     LSHR T56.X, PS, literal.x,
4824; EG-NEXT:     LSHR T3.Y, T45.W, literal.y,
4825; EG-NEXT:     BFE_INT T57.Z, T44.W, 0.0, literal.y, BS:VEC_120/SCL_212
4826; EG-NEXT:     LSHR T3.W, T43.Y, literal.y,
4827; EG-NEXT:     LSHR * T4.W, T43.W, literal.y,
4828; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4829; EG-NEXT:     BFE_INT T57.X, T44.Z, 0.0, literal.x,
4830; EG-NEXT:     LSHR T4.Y, T42.Y, literal.x,
4831; EG-NEXT:     BFE_INT T58.Z, T44.Y, 0.0, literal.x, BS:VEC_120/SCL_212
4832; EG-NEXT:     LSHR T5.W, T42.W, literal.x,
4833; EG-NEXT:     LSHR * T6.W, T44.W, literal.x,
4834; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4835; EG-NEXT:     BFE_INT T58.X, T44.X, 0.0, literal.x,
4836; EG-NEXT:     LSHR T5.Y, T44.Y, literal.x,
4837; EG-NEXT:     BFE_INT T59.Z, T42.W, 0.0, literal.x,
4838; EG-NEXT:     BFE_INT T57.W, PS, 0.0, literal.x,
4839; EG-NEXT:     LSHR * T6.W, T44.Z, literal.x,
4840; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4841; EG-NEXT:     BFE_INT T59.X, T42.Z, 0.0, literal.x,
4842; EG-NEXT:     BFE_INT T57.Y, PS, 0.0, literal.x,
4843; EG-NEXT:     BFE_INT T44.Z, T42.Y, 0.0, literal.x,
4844; EG-NEXT:     BFE_INT T58.W, PV.Y, 0.0, literal.x,
4845; EG-NEXT:     LSHR * T6.W, T44.X, literal.x,
4846; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4847; EG-NEXT:     BFE_INT T44.X, T42.X, 0.0, literal.x,
4848; EG-NEXT:     BFE_INT T58.Y, PS, 0.0, literal.x,
4849; EG-NEXT:     BFE_INT T60.Z, T43.W, 0.0, literal.x,
4850; EG-NEXT:     BFE_INT T59.W, T5.W, 0.0, literal.x, BS:VEC_120/SCL_212
4851; EG-NEXT:     LSHR * T5.W, T42.Z, literal.x,
4852; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4853; EG-NEXT:     BFE_INT T60.X, T43.Z, 0.0, literal.x,
4854; EG-NEXT:     BFE_INT T59.Y, PS, 0.0, literal.x,
4855; EG-NEXT:     BFE_INT T42.Z, T43.Y, 0.0, literal.x,
4856; EG-NEXT:     BFE_INT T44.W, T4.Y, 0.0, literal.x, BS:VEC_120/SCL_212
4857; EG-NEXT:     LSHR * T5.W, T42.X, literal.x,
4858; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4859; EG-NEXT:     BFE_INT T42.X, T43.X, 0.0, literal.x,
4860; EG-NEXT:     BFE_INT T44.Y, PS, 0.0, literal.x,
4861; EG-NEXT:     BFE_INT T61.Z, T45.W, 0.0, literal.x,
4862; EG-NEXT:     BFE_INT * T60.W, T4.W, 0.0, literal.x, BS:VEC_120/SCL_212
4863; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4864; EG-NEXT:    ALU clause starting at 133:
4865; EG-NEXT:     LSHR * T4.W, T43.Z, literal.x,
4866; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4867; EG-NEXT:     BFE_INT T61.X, T45.Z, 0.0, literal.x,
4868; EG-NEXT:     BFE_INT T60.Y, PV.W, 0.0, literal.x,
4869; EG-NEXT:     BFE_INT T43.Z, T45.Y, 0.0, literal.x,
4870; EG-NEXT:     BFE_INT T42.W, T3.W, 0.0, literal.x,
4871; EG-NEXT:     LSHR * T3.W, T43.X, literal.x,
4872; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4873; EG-NEXT:     BFE_INT T43.X, T45.X, 0.0, literal.x,
4874; EG-NEXT:     BFE_INT T42.Y, PS, 0.0, literal.x,
4875; EG-NEXT:     BFE_INT T62.Z, T46.W, 0.0, literal.x,
4876; EG-NEXT:     BFE_INT T61.W, T3.Y, 0.0, literal.x,
4877; EG-NEXT:     LSHR * T3.W, T45.Z, literal.x,
4878; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4879; EG-NEXT:     BFE_INT T62.X, T46.Z, 0.0, literal.x,
4880; EG-NEXT:     BFE_INT T61.Y, PS, 0.0, literal.x,
4881; EG-NEXT:     BFE_INT T45.Z, T46.Y, 0.0, literal.x,
4882; EG-NEXT:     BFE_INT T43.W, T2.W, 0.0, literal.x,
4883; EG-NEXT:     LSHR * T2.W, T45.X, literal.x,
4884; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4885; EG-NEXT:     BFE_INT T45.X, T46.X, 0.0, literal.x,
4886; EG-NEXT:     BFE_INT T43.Y, PS, 0.0, literal.x,
4887; EG-NEXT:     BFE_INT T63.Z, T47.W, 0.0, literal.x,
4888; EG-NEXT:     BFE_INT T62.W, T2.Z, 0.0, literal.x,
4889; EG-NEXT:     LSHR * T2.W, T46.Z, literal.x,
4890; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4891; EG-NEXT:     BFE_INT T63.X, T47.Z, 0.0, literal.x,
4892; EG-NEXT:     BFE_INT T62.Y, PS, 0.0, literal.x,
4893; EG-NEXT:     BFE_INT T46.Z, T47.Y, 0.0, literal.x,
4894; EG-NEXT:     BFE_INT T45.W, T2.Y, 0.0, literal.x, BS:VEC_120/SCL_212
4895; EG-NEXT:     LSHR * T2.W, T46.X, literal.x,
4896; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4897; EG-NEXT:     BFE_INT T46.X, T47.X, 0.0, literal.x,
4898; EG-NEXT:     BFE_INT T45.Y, PS, 0.0, literal.x,
4899; EG-NEXT:     BFE_INT T64.Z, T48.W, 0.0, literal.x,
4900; EG-NEXT:     BFE_INT T63.W, T1.W, 0.0, literal.x, BS:VEC_120/SCL_212
4901; EG-NEXT:     LSHR * T1.W, T47.Z, literal.x,
4902; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4903; EG-NEXT:     BFE_INT T64.X, T48.Z, 0.0, literal.x,
4904; EG-NEXT:     BFE_INT T63.Y, PS, 0.0, literal.x,
4905; EG-NEXT:     BFE_INT T47.Z, T48.Y, 0.0, literal.x,
4906; EG-NEXT:     BFE_INT T46.W, T1.Z, 0.0, literal.x, BS:VEC_120/SCL_212
4907; EG-NEXT:     LSHR * T1.W, T47.X, literal.x,
4908; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4909; EG-NEXT:     BFE_INT T47.X, T48.X, 0.0, literal.x,
4910; EG-NEXT:     BFE_INT T46.Y, PS, 0.0, literal.x,
4911; EG-NEXT:     BFE_INT T65.Z, T41.W, 0.0, literal.x,
4912; EG-NEXT:     BFE_INT T64.W, T1.Y, 0.0, literal.x,
4913; EG-NEXT:     LSHR * T1.W, T48.Z, literal.x,
4914; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4915; EG-NEXT:     BFE_INT T65.X, T41.Z, 0.0, literal.x,
4916; EG-NEXT:     BFE_INT T64.Y, PS, 0.0, literal.x,
4917; EG-NEXT:     BFE_INT T48.Z, T41.Y, 0.0, literal.x,
4918; EG-NEXT:     BFE_INT T47.W, T0.W, 0.0, literal.x,
4919; EG-NEXT:     LSHR * T0.W, T48.X, literal.x,
4920; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4921; EG-NEXT:     BFE_INT T48.X, T41.X, 0.0, literal.x,
4922; EG-NEXT:     BFE_INT T47.Y, PS, 0.0, literal.x,
4923; EG-NEXT:     LSHR T1.Z, T41.Z, literal.x,
4924; EG-NEXT:     BFE_INT T65.W, T0.Z, 0.0, literal.x, BS:VEC_120/SCL_212
4925; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4926; EG-NEXT:    16(2.242078e-44), 240(3.363116e-43)
4927; EG-NEXT:     LSHR T66.X, PS, literal.x,
4928; EG-NEXT:     BFE_INT T65.Y, PV.Z, 0.0, literal.y,
4929; EG-NEXT:     LSHR T0.Z, T41.X, literal.y,
4930; EG-NEXT:     BFE_INT T48.W, T0.Y, 0.0, literal.y,
4931; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
4932; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4933; EG-NEXT:    224(3.138909e-43), 0(0.000000e+00)
4934; EG-NEXT:     LSHR T41.X, PS, literal.x,
4935; EG-NEXT:     BFE_INT * T48.Y, PV.Z, 0.0, literal.y,
4936; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4937;
4938; CM-LABEL: global_sextload_v64i16_to_v64i32:
4939; CM:       ; %bb.0:
4940; CM-NEXT:    ALU 0, @40, KC0[CB0:0-32], KC1[]
4941; CM-NEXT:    TEX 1 @24
4942; CM-NEXT:    ALU 15, @41, KC0[CB0:0-32], KC1[]
4943; CM-NEXT:    TEX 5 @28
4944; CM-NEXT:    ALU 82, @57, KC0[CB0:0-32], KC1[]
4945; CM-NEXT:    ALU 72, @140, KC0[CB0:0-32], KC1[]
4946; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T65, T66.X
4947; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T35, T36.X
4948; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T64, T56.X
4949; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T37, T55.X
4950; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T63, T54.X
4951; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T45, T53.X
4952; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T62, T52.X
4953; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T44, T51.X
4954; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T61, T50.X
4955; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T43, T49.X
4956; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T60, T48.X
4957; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T42, T47.X
4958; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T59, T46.X
4959; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T41, T40.X
4960; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T58, T39.X
4961; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T57, T38.X
4962; CM-NEXT:    CF_END
4963; CM-NEXT:    PAD
4964; CM-NEXT:    Fetch clause starting at 24:
4965; CM-NEXT:     VTX_READ_128 T35.XYZW, T37.X, 16, #1
4966; CM-NEXT:     VTX_READ_128 T36.XYZW, T37.X, 0, #1
4967; CM-NEXT:    Fetch clause starting at 28:
4968; CM-NEXT:     VTX_READ_128 T41.XYZW, T37.X, 112, #1
4969; CM-NEXT:     VTX_READ_128 T42.XYZW, T37.X, 96, #1
4970; CM-NEXT:     VTX_READ_128 T43.XYZW, T37.X, 80, #1
4971; CM-NEXT:     VTX_READ_128 T44.XYZW, T37.X, 64, #1
4972; CM-NEXT:     VTX_READ_128 T45.XYZW, T37.X, 48, #1
4973; CM-NEXT:     VTX_READ_128 T37.XYZW, T37.X, 32, #1
4974; CM-NEXT:    ALU clause starting at 40:
4975; CM-NEXT:     MOV * T37.X, KC0[2].Z,
4976; CM-NEXT:    ALU clause starting at 41:
4977; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
4978; CM-NEXT:    224(3.138909e-43), 0(0.000000e+00)
4979; CM-NEXT:     LSHR T38.X, PV.W, literal.x,
4980; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4981; CM-NEXT:    2(2.802597e-45), 240(3.363116e-43)
4982; CM-NEXT:     LSHR T39.X, PV.W, literal.x,
4983; CM-NEXT:     LSHR T0.Y, T36.Z, literal.y,
4984; CM-NEXT:     LSHR T0.Z, T36.W, literal.y,
4985; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
4986; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4987; CM-NEXT:    192(2.690493e-43), 0(0.000000e+00)
4988; CM-NEXT:     LSHR T40.X, PV.W, literal.x,
4989; CM-NEXT:     LSHR T1.Y, T36.Y, literal.y,
4990; CM-NEXT:     LSHR T1.Z, T35.Z, literal.y,
4991; CM-NEXT:     LSHR * T0.W, T35.W, literal.y,
4992; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4993; CM-NEXT:    ALU clause starting at 57:
4994; CM-NEXT:     LSHR T2.Z, T35.X, literal.x,
4995; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
4996; CM-NEXT:    16(2.242078e-44), 208(2.914701e-43)
4997; CM-NEXT:     LSHR T46.X, PV.W, literal.x,
4998; CM-NEXT:     LSHR T2.Y, T35.Y, literal.y,
4999; CM-NEXT:     LSHR T3.Z, T37.Z, literal.y,
5000; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
5001; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5002; CM-NEXT:    160(2.242078e-43), 0(0.000000e+00)
5003; CM-NEXT:     LSHR T47.X, PV.W, literal.x,
5004; CM-NEXT:     LSHR T3.Y, T37.W, literal.y,
5005; CM-NEXT:     LSHR T4.Z, T37.X, literal.y,
5006; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
5007; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5008; CM-NEXT:    176(2.466285e-43), 0(0.000000e+00)
5009; CM-NEXT:     LSHR T48.X, PV.W, literal.x,
5010; CM-NEXT:     LSHR T4.Y, T37.Y, literal.y,
5011; CM-NEXT:     LSHR T5.Z, T45.Z, literal.y,
5012; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
5013; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5014; CM-NEXT:    128(1.793662e-43), 0(0.000000e+00)
5015; CM-NEXT:     LSHR T49.X, PV.W, literal.x,
5016; CM-NEXT:     LSHR T5.Y, T45.W, literal.y,
5017; CM-NEXT:     LSHR T6.Z, T45.X, literal.y,
5018; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
5019; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5020; CM-NEXT:    144(2.017870e-43), 0(0.000000e+00)
5021; CM-NEXT:     LSHR T50.X, PV.W, literal.x,
5022; CM-NEXT:     LSHR T6.Y, T45.Y, literal.y,
5023; CM-NEXT:     LSHR T7.Z, T44.Z, literal.y,
5024; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
5025; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5026; CM-NEXT:    96(1.345247e-43), 0(0.000000e+00)
5027; CM-NEXT:     LSHR T51.X, PV.W, literal.x,
5028; CM-NEXT:     LSHR T7.Y, T44.W, literal.y,
5029; CM-NEXT:     LSHR T8.Z, T44.X, literal.y,
5030; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
5031; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5032; CM-NEXT:    112(1.569454e-43), 0(0.000000e+00)
5033; CM-NEXT:     LSHR T52.X, PV.W, literal.x,
5034; CM-NEXT:     LSHR T8.Y, T44.Y, literal.y,
5035; CM-NEXT:     LSHR T9.Z, T43.Z, literal.y,
5036; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
5037; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5038; CM-NEXT:    64(8.968310e-44), 0(0.000000e+00)
5039; CM-NEXT:     LSHR T53.X, PV.W, literal.x,
5040; CM-NEXT:     LSHR T9.Y, T43.W, literal.y,
5041; CM-NEXT:     LSHR T10.Z, T43.X, literal.y,
5042; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
5043; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5044; CM-NEXT:    80(1.121039e-43), 0(0.000000e+00)
5045; CM-NEXT:     LSHR T54.X, PV.W, literal.x,
5046; CM-NEXT:     LSHR T10.Y, T43.Y, literal.y,
5047; CM-NEXT:     LSHR T11.Z, T42.Z, literal.y,
5048; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
5049; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5050; CM-NEXT:    32(4.484155e-44), 0(0.000000e+00)
5051; CM-NEXT:     LSHR T55.X, PV.W, literal.x,
5052; CM-NEXT:     LSHR T11.Y, T42.W, literal.y,
5053; CM-NEXT:     LSHR T12.Z, T42.X, literal.y,
5054; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
5055; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5056; CM-NEXT:    48(6.726233e-44), 0(0.000000e+00)
5057; CM-NEXT:     LSHR T56.X, PV.W, literal.x,
5058; CM-NEXT:     LSHR T12.Y, T42.Y, literal.y,
5059; CM-NEXT:     BFE_INT T57.Z, T41.Y, 0.0, literal.y, BS:VEC_120/SCL_212
5060; CM-NEXT:     LSHR * T1.W, T41.Z, literal.y,
5061; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5062; CM-NEXT:     BFE_INT T57.X, T41.X, 0.0, literal.x,
5063; CM-NEXT:     LSHR T13.Y, T41.W, literal.x,
5064; CM-NEXT:     BFE_INT T58.Z, T41.W, 0.0, literal.x,
5065; CM-NEXT:     LSHR * T2.W, T41.Y, literal.x,
5066; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5067; CM-NEXT:     BFE_INT T58.X, T41.Z, 0.0, literal.x,
5068; CM-NEXT:     LSHR T14.Y, T41.X, literal.x,
5069; CM-NEXT:     BFE_INT T41.Z, T42.Y, 0.0, literal.x,
5070; CM-NEXT:     BFE_INT * T57.W, PV.W, 0.0, literal.x,
5071; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5072; CM-NEXT:     BFE_INT T41.X, T42.X, 0.0, literal.x,
5073; CM-NEXT:     BFE_INT T57.Y, PV.Y, 0.0, literal.x,
5074; CM-NEXT:     BFE_INT T59.Z, T42.W, 0.0, literal.x,
5075; CM-NEXT:     BFE_INT * T58.W, T13.Y, 0.0, literal.x,
5076; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5077; CM-NEXT:    ALU clause starting at 140:
5078; CM-NEXT:     BFE_INT T59.X, T42.Z, 0.0, literal.x,
5079; CM-NEXT:     BFE_INT T58.Y, T1.W, 0.0, literal.x,
5080; CM-NEXT:     BFE_INT T42.Z, T43.Y, 0.0, literal.x,
5081; CM-NEXT:     BFE_INT * T41.W, T12.Y, 0.0, literal.x, BS:VEC_120/SCL_212
5082; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5083; CM-NEXT:     BFE_INT T42.X, T43.X, 0.0, literal.x,
5084; CM-NEXT:     BFE_INT T41.Y, T12.Z, 0.0, literal.x,
5085; CM-NEXT:     BFE_INT T60.Z, T43.W, 0.0, literal.x,
5086; CM-NEXT:     BFE_INT * T59.W, T11.Y, 0.0, literal.x,
5087; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5088; CM-NEXT:     BFE_INT T60.X, T43.Z, 0.0, literal.x,
5089; CM-NEXT:     BFE_INT T59.Y, T11.Z, 0.0, literal.x, BS:VEC_120/SCL_212
5090; CM-NEXT:     BFE_INT T43.Z, T44.Y, 0.0, literal.x,
5091; CM-NEXT:     BFE_INT * T42.W, T10.Y, 0.0, literal.x, BS:VEC_120/SCL_212
5092; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5093; CM-NEXT:     BFE_INT T43.X, T44.X, 0.0, literal.x,
5094; CM-NEXT:     BFE_INT T42.Y, T10.Z, 0.0, literal.x,
5095; CM-NEXT:     BFE_INT T61.Z, T44.W, 0.0, literal.x,
5096; CM-NEXT:     BFE_INT * T60.W, T9.Y, 0.0, literal.x,
5097; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5098; CM-NEXT:     BFE_INT T61.X, T44.Z, 0.0, literal.x,
5099; CM-NEXT:     BFE_INT T60.Y, T9.Z, 0.0, literal.x, BS:VEC_120/SCL_212
5100; CM-NEXT:     BFE_INT T44.Z, T45.Y, 0.0, literal.x,
5101; CM-NEXT:     BFE_INT * T43.W, T8.Y, 0.0, literal.x, BS:VEC_120/SCL_212
5102; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5103; CM-NEXT:     BFE_INT T44.X, T45.X, 0.0, literal.x,
5104; CM-NEXT:     BFE_INT T43.Y, T8.Z, 0.0, literal.x,
5105; CM-NEXT:     BFE_INT T62.Z, T45.W, 0.0, literal.x,
5106; CM-NEXT:     BFE_INT * T61.W, T7.Y, 0.0, literal.x,
5107; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5108; CM-NEXT:     BFE_INT T62.X, T45.Z, 0.0, literal.x,
5109; CM-NEXT:     BFE_INT T61.Y, T7.Z, 0.0, literal.x, BS:VEC_120/SCL_212
5110; CM-NEXT:     BFE_INT T45.Z, T37.Y, 0.0, literal.x,
5111; CM-NEXT:     BFE_INT * T44.W, T6.Y, 0.0, literal.x, BS:VEC_120/SCL_212
5112; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5113; CM-NEXT:     BFE_INT T45.X, T37.X, 0.0, literal.x,
5114; CM-NEXT:     BFE_INT T44.Y, T6.Z, 0.0, literal.x,
5115; CM-NEXT:     BFE_INT T63.Z, T37.W, 0.0, literal.x,
5116; CM-NEXT:     BFE_INT * T62.W, T5.Y, 0.0, literal.x,
5117; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5118; CM-NEXT:     BFE_INT T63.X, T37.Z, 0.0, literal.x,
5119; CM-NEXT:     BFE_INT T62.Y, T5.Z, 0.0, literal.x, BS:VEC_120/SCL_212
5120; CM-NEXT:     BFE_INT T37.Z, T35.Y, 0.0, literal.x,
5121; CM-NEXT:     BFE_INT * T45.W, T4.Y, 0.0, literal.x, BS:VEC_120/SCL_212
5122; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5123; CM-NEXT:     BFE_INT T37.X, T35.X, 0.0, literal.x,
5124; CM-NEXT:     BFE_INT T45.Y, T4.Z, 0.0, literal.x,
5125; CM-NEXT:     BFE_INT T64.Z, T35.W, 0.0, literal.x,
5126; CM-NEXT:     BFE_INT * T63.W, T3.Y, 0.0, literal.x,
5127; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5128; CM-NEXT:     BFE_INT T64.X, T35.Z, 0.0, literal.x,
5129; CM-NEXT:     BFE_INT T63.Y, T3.Z, 0.0, literal.x, BS:VEC_120/SCL_212
5130; CM-NEXT:     BFE_INT T35.Z, T36.Y, 0.0, literal.x,
5131; CM-NEXT:     BFE_INT * T37.W, T2.Y, 0.0, literal.x, BS:VEC_120/SCL_212
5132; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5133; CM-NEXT:     BFE_INT T35.X, T36.X, 0.0, literal.x,
5134; CM-NEXT:     BFE_INT T37.Y, T2.Z, 0.0, literal.x,
5135; CM-NEXT:     BFE_INT T65.Z, T36.W, 0.0, literal.x,
5136; CM-NEXT:     BFE_INT * T64.W, T0.W, 0.0, literal.x, BS:VEC_120/SCL_212
5137; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5138; CM-NEXT:     BFE_INT T65.X, T36.Z, 0.0, literal.x,
5139; CM-NEXT:     BFE_INT T64.Y, T1.Z, 0.0, literal.x, BS:VEC_120/SCL_212
5140; CM-NEXT:     LSHR T1.Z, T36.X, literal.x,
5141; CM-NEXT:     BFE_INT * T35.W, T1.Y, 0.0, literal.x,
5142; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5143; CM-NEXT:     LSHR T36.X, KC0[2].Y, literal.x,
5144; CM-NEXT:     BFE_INT T35.Y, PV.Z, 0.0, literal.y,
5145; CM-NEXT:     ADD_INT T1.Z, KC0[2].Y, literal.y,
5146; CM-NEXT:     BFE_INT * T65.W, T0.Z, 0.0, literal.y,
5147; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5148; CM-NEXT:     LSHR T66.X, PV.Z, literal.x,
5149; CM-NEXT:     BFE_INT * T65.Y, T0.Y, 0.0, literal.y,
5150; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5151  %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
5152  %ext = sext <64 x i16> %load to <64 x i32>
5153  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
5154  ret void
5155}
5156
5157define amdgpu_kernel void @global_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
5158; GCN-NOHSA-SI-LABEL: global_zextload_i16_to_i64:
5159; GCN-NOHSA-SI:       ; %bb.0:
5160; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
5161; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
5162; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
5163; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
5164; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
5165; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5166; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
5167; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
5168; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
5169; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
5170; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
5171; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
5172; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
5173; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
5174; GCN-NOHSA-SI-NEXT:    s_endpgm
5175;
5176; GCN-HSA-LABEL: global_zextload_i16_to_i64:
5177; GCN-HSA:       ; %bb.0:
5178; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5179; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5180; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
5181; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
5182; GCN-HSA-NEXT:    flat_load_ushort v0, v[0:1]
5183; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
5184; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
5185; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
5186; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
5187; GCN-HSA-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
5188; GCN-HSA-NEXT:    s_endpgm
5189;
5190; GCN-NOHSA-VI-LABEL: global_zextload_i16_to_i64:
5191; GCN-NOHSA-VI:       ; %bb.0:
5192; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
5193; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
5194; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
5195; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
5196; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
5197; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5198; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
5199; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
5200; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
5201; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
5202; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
5203; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
5204; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
5205; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
5206; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
5207; GCN-NOHSA-VI-NEXT:    s_endpgm
5208;
5209; EG-LABEL: global_zextload_i16_to_i64:
5210; EG:       ; %bb.0:
5211; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5212; EG-NEXT:    TEX 0 @6
5213; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
5214; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
5215; EG-NEXT:    CF_END
5216; EG-NEXT:    PAD
5217; EG-NEXT:    Fetch clause starting at 6:
5218; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
5219; EG-NEXT:    ALU clause starting at 8:
5220; EG-NEXT:     MOV * T0.X, KC0[2].Z,
5221; EG-NEXT:    ALU clause starting at 9:
5222; EG-NEXT:     MOV * T0.Y, 0.0,
5223; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
5224; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5225;
5226; CM-LABEL: global_zextload_i16_to_i64:
5227; CM:       ; %bb.0:
5228; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5229; CM-NEXT:    TEX 0 @6
5230; CM-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
5231; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
5232; CM-NEXT:    CF_END
5233; CM-NEXT:    PAD
5234; CM-NEXT:    Fetch clause starting at 6:
5235; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
5236; CM-NEXT:    ALU clause starting at 8:
5237; CM-NEXT:     MOV * T0.X, KC0[2].Z,
5238; CM-NEXT:    ALU clause starting at 9:
5239; CM-NEXT:     MOV * T0.Y, 0.0,
5240; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
5241; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5242  %a = load i16, i16 addrspace(1)* %in
5243  %ext = zext i16 %a to i64
5244  store i64 %ext, i64 addrspace(1)* %out
5245  ret void
5246}
5247
5248; FIXME: Need to optimize this sequence to avoid extra bfe:
5249;  t28: i32,ch = load<LD2[%in(addrspace=1)], anyext from i16> t12, t27, undef:i64
5250;          t31: i64 = any_extend t28
5251;        t33: i64 = sign_extend_inreg t31, ValueType:ch:i16
5252
5253; TODO: These could be expanded earlier using ASHR 15
5254define amdgpu_kernel void @global_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
5255; GCN-NOHSA-SI-LABEL: global_sextload_i16_to_i64:
5256; GCN-NOHSA-SI:       ; %bb.0:
5257; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
5258; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
5259; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
5260; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
5261; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
5262; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5263; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
5264; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
5265; GCN-NOHSA-SI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
5266; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
5267; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
5268; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
5269; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5270; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
5271; GCN-NOHSA-SI-NEXT:    s_endpgm
5272;
5273; GCN-HSA-LABEL: global_sextload_i16_to_i64:
5274; GCN-HSA:       ; %bb.0:
5275; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5276; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5277; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
5278; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
5279; GCN-HSA-NEXT:    flat_load_sshort v0, v[0:1]
5280; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
5281; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
5282; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
5283; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5284; GCN-HSA-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
5285; GCN-HSA-NEXT:    s_endpgm
5286;
5287; GCN-NOHSA-VI-LABEL: global_sextload_i16_to_i64:
5288; GCN-NOHSA-VI:       ; %bb.0:
5289; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
5290; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
5291; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
5292; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
5293; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
5294; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5295; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
5296; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
5297; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
5298; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
5299; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
5300; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
5301; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v0, 0, 16
5302; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5303; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
5304; GCN-NOHSA-VI-NEXT:    s_endpgm
5305;
5306; EG-LABEL: global_sextload_i16_to_i64:
5307; EG:       ; %bb.0:
5308; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5309; EG-NEXT:    TEX 0 @6
5310; EG-NEXT:    ALU 4, @9, KC0[CB0:0-32], KC1[]
5311; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
5312; EG-NEXT:    CF_END
5313; EG-NEXT:    PAD
5314; EG-NEXT:    Fetch clause starting at 6:
5315; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
5316; EG-NEXT:    ALU clause starting at 8:
5317; EG-NEXT:     MOV * T0.X, KC0[2].Z,
5318; EG-NEXT:    ALU clause starting at 9:
5319; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
5320; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
5321; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
5322; EG-NEXT:     ASHR * T0.Y, PV.X, literal.x,
5323; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5324;
5325; CM-LABEL: global_sextload_i16_to_i64:
5326; CM:       ; %bb.0:
5327; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5328; CM-NEXT:    TEX 0 @6
5329; CM-NEXT:    ALU 4, @9, KC0[CB0:0-32], KC1[]
5330; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
5331; CM-NEXT:    CF_END
5332; CM-NEXT:    PAD
5333; CM-NEXT:    Fetch clause starting at 6:
5334; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
5335; CM-NEXT:    ALU clause starting at 8:
5336; CM-NEXT:     MOV * T0.X, KC0[2].Z,
5337; CM-NEXT:    ALU clause starting at 9:
5338; CM-NEXT:     BFE_INT * T0.X, T0.X, 0.0, literal.x,
5339; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5340; CM-NEXT:     LSHR T1.X, KC0[2].Y, literal.x,
5341; CM-NEXT:     ASHR * T0.Y, PV.X, literal.y,
5342; CM-NEXT:    2(2.802597e-45), 31(4.344025e-44)
5343  %a = load i16, i16 addrspace(1)* %in
5344  %ext = sext i16 %a to i64
5345  store i64 %ext, i64 addrspace(1)* %out
5346  ret void
5347}
5348
5349define amdgpu_kernel void @global_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
5350; GCN-NOHSA-SI-LABEL: global_zextload_v1i16_to_v1i64:
5351; GCN-NOHSA-SI:       ; %bb.0:
5352; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
5353; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
5354; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
5355; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
5356; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
5357; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5358; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
5359; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
5360; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
5361; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
5362; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
5363; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
5364; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
5365; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
5366; GCN-NOHSA-SI-NEXT:    s_endpgm
5367;
5368; GCN-HSA-LABEL: global_zextload_v1i16_to_v1i64:
5369; GCN-HSA:       ; %bb.0:
5370; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5371; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5372; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
5373; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
5374; GCN-HSA-NEXT:    flat_load_ushort v0, v[0:1]
5375; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
5376; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
5377; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
5378; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
5379; GCN-HSA-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
5380; GCN-HSA-NEXT:    s_endpgm
5381;
5382; GCN-NOHSA-VI-LABEL: global_zextload_v1i16_to_v1i64:
5383; GCN-NOHSA-VI:       ; %bb.0:
5384; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
5385; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
5386; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
5387; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
5388; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
5389; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5390; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
5391; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
5392; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
5393; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
5394; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
5395; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
5396; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
5397; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
5398; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
5399; GCN-NOHSA-VI-NEXT:    s_endpgm
5400;
5401; EG-LABEL: global_zextload_v1i16_to_v1i64:
5402; EG:       ; %bb.0:
5403; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5404; EG-NEXT:    TEX 0 @6
5405; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
5406; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
5407; EG-NEXT:    CF_END
5408; EG-NEXT:    PAD
5409; EG-NEXT:    Fetch clause starting at 6:
5410; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
5411; EG-NEXT:    ALU clause starting at 8:
5412; EG-NEXT:     MOV * T0.X, KC0[2].Z,
5413; EG-NEXT:    ALU clause starting at 9:
5414; EG-NEXT:     MOV * T0.Y, 0.0,
5415; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
5416; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5417;
5418; CM-LABEL: global_zextload_v1i16_to_v1i64:
5419; CM:       ; %bb.0:
5420; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5421; CM-NEXT:    TEX 0 @6
5422; CM-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
5423; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
5424; CM-NEXT:    CF_END
5425; CM-NEXT:    PAD
5426; CM-NEXT:    Fetch clause starting at 6:
5427; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
5428; CM-NEXT:    ALU clause starting at 8:
5429; CM-NEXT:     MOV * T0.X, KC0[2].Z,
5430; CM-NEXT:    ALU clause starting at 9:
5431; CM-NEXT:     MOV * T0.Y, 0.0,
5432; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
5433; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5434  %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
5435  %ext = zext <1 x i16> %load to <1 x i64>
5436  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
5437  ret void
5438}
5439
5440; TODO: These could be expanded earlier using ASHR 15
5441define amdgpu_kernel void @global_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
5442; GCN-NOHSA-SI-LABEL: global_sextload_v1i16_to_v1i64:
5443; GCN-NOHSA-SI:       ; %bb.0:
5444; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
5445; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
5446; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
5447; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
5448; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
5449; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5450; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
5451; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
5452; GCN-NOHSA-SI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
5453; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
5454; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
5455; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
5456; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5457; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
5458; GCN-NOHSA-SI-NEXT:    s_endpgm
5459;
5460; GCN-HSA-LABEL: global_sextload_v1i16_to_v1i64:
5461; GCN-HSA:       ; %bb.0:
5462; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5463; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5464; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
5465; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
5466; GCN-HSA-NEXT:    flat_load_sshort v0, v[0:1]
5467; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
5468; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
5469; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
5470; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5471; GCN-HSA-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
5472; GCN-HSA-NEXT:    s_endpgm
5473;
5474; GCN-NOHSA-VI-LABEL: global_sextload_v1i16_to_v1i64:
5475; GCN-NOHSA-VI:       ; %bb.0:
5476; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
5477; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
5478; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
5479; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
5480; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
5481; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5482; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
5483; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
5484; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
5485; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
5486; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
5487; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
5488; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v0, 0, 16
5489; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5490; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
5491; GCN-NOHSA-VI-NEXT:    s_endpgm
5492;
5493; EG-LABEL: global_sextload_v1i16_to_v1i64:
5494; EG:       ; %bb.0:
5495; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5496; EG-NEXT:    TEX 0 @6
5497; EG-NEXT:    ALU 4, @9, KC0[CB0:0-32], KC1[]
5498; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
5499; EG-NEXT:    CF_END
5500; EG-NEXT:    PAD
5501; EG-NEXT:    Fetch clause starting at 6:
5502; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
5503; EG-NEXT:    ALU clause starting at 8:
5504; EG-NEXT:     MOV * T0.X, KC0[2].Z,
5505; EG-NEXT:    ALU clause starting at 9:
5506; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
5507; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
5508; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
5509; EG-NEXT:     ASHR * T0.Y, PV.X, literal.x,
5510; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5511;
5512; CM-LABEL: global_sextload_v1i16_to_v1i64:
5513; CM:       ; %bb.0:
5514; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5515; CM-NEXT:    TEX 0 @6
5516; CM-NEXT:    ALU 4, @9, KC0[CB0:0-32], KC1[]
5517; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
5518; CM-NEXT:    CF_END
5519; CM-NEXT:    PAD
5520; CM-NEXT:    Fetch clause starting at 6:
5521; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
5522; CM-NEXT:    ALU clause starting at 8:
5523; CM-NEXT:     MOV * T0.X, KC0[2].Z,
5524; CM-NEXT:    ALU clause starting at 9:
5525; CM-NEXT:     BFE_INT * T0.X, T0.X, 0.0, literal.x,
5526; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5527; CM-NEXT:     LSHR T1.X, KC0[2].Y, literal.x,
5528; CM-NEXT:     ASHR * T0.Y, PV.X, literal.y,
5529; CM-NEXT:    2(2.802597e-45), 31(4.344025e-44)
5530  %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
5531  %ext = sext <1 x i16> %load to <1 x i64>
5532  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
5533  ret void
5534}
5535
5536define amdgpu_kernel void @global_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
5537; GCN-NOHSA-SI-LABEL: global_zextload_v2i16_to_v2i64:
5538; GCN-NOHSA-SI:       ; %bb.0:
5539; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
5540; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
5541; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
5542; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
5543; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
5544; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5545; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
5546; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
5547; GCN-NOHSA-SI-NEXT:    buffer_load_dword v0, off, s[8:11], 0
5548; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
5549; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
5550; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
5551; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
5552; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
5553; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
5554; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v1
5555; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
5556; GCN-NOHSA-SI-NEXT:    s_endpgm
5557;
5558; GCN-HSA-LABEL: global_zextload_v2i16_to_v2i64:
5559; GCN-HSA:       ; %bb.0:
5560; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5561; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5562; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
5563; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
5564; GCN-HSA-NEXT:    flat_load_dword v0, v[0:1]
5565; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
5566; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
5567; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
5568; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v1
5569; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
5570; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
5571; GCN-HSA-NEXT:    v_and_b32_e32 v0, 0xffff, v0
5572; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5573; GCN-HSA-NEXT:    s_endpgm
5574;
5575; GCN-NOHSA-VI-LABEL: global_zextload_v2i16_to_v2i64:
5576; GCN-NOHSA-VI:       ; %bb.0:
5577; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
5578; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
5579; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
5580; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
5581; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
5582; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5583; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
5584; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
5585; GCN-NOHSA-VI-NEXT:    buffer_load_dword v0, off, s[8:11], 0
5586; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
5587; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
5588; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
5589; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, v1
5590; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
5591; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
5592; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
5593; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
5594; GCN-NOHSA-VI-NEXT:    s_endpgm
5595;
5596; EG-LABEL: global_zextload_v2i16_to_v2i64:
5597; EG:       ; %bb.0:
5598; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5599; EG-NEXT:    TEX 0 @6
5600; EG-NEXT:    ALU 6, @9, KC0[CB0:0-32], KC1[]
5601; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1
5602; EG-NEXT:    CF_END
5603; EG-NEXT:    PAD
5604; EG-NEXT:    Fetch clause starting at 6:
5605; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
5606; EG-NEXT:    ALU clause starting at 8:
5607; EG-NEXT:     MOV * T4.X, KC0[2].Z,
5608; EG-NEXT:    ALU clause starting at 9:
5609; EG-NEXT:     LSHR * T4.Z, T4.X, literal.x,
5610; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5611; EG-NEXT:     AND_INT T4.X, T4.X, literal.x,
5612; EG-NEXT:     MOV T4.Y, 0.0,
5613; EG-NEXT:     MOV T4.W, 0.0,
5614; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
5615; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
5616;
5617; CM-LABEL: global_zextload_v2i16_to_v2i64:
5618; CM:       ; %bb.0:
5619; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5620; CM-NEXT:    TEX 0 @6
5621; CM-NEXT:    ALU 7, @9, KC0[CB0:0-32], KC1[]
5622; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T4, T5.X
5623; CM-NEXT:    CF_END
5624; CM-NEXT:    PAD
5625; CM-NEXT:    Fetch clause starting at 6:
5626; CM-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
5627; CM-NEXT:    ALU clause starting at 8:
5628; CM-NEXT:     MOV * T4.X, KC0[2].Z,
5629; CM-NEXT:    ALU clause starting at 9:
5630; CM-NEXT:     LSHR * T4.Z, T4.X, literal.x,
5631; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5632; CM-NEXT:     AND_INT T4.X, T4.X, literal.x,
5633; CM-NEXT:     MOV T4.Y, 0.0,
5634; CM-NEXT:     MOV * T4.W, 0.0,
5635; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
5636; CM-NEXT:     LSHR * T5.X, KC0[2].Y, literal.x,
5637; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5638  %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
5639  %ext = zext <2 x i16> %load to <2 x i64>
5640  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
5641  ret void
5642}
5643
5644define amdgpu_kernel void @global_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
5645; GCN-NOHSA-SI-LABEL: global_sextload_v2i16_to_v2i64:
5646; GCN-NOHSA-SI:       ; %bb.0:
5647; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
5648; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
5649; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
5650; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
5651; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
5652; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5653; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
5654; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
5655; GCN-NOHSA-SI-NEXT:    buffer_load_dword v0, off, s[8:11], 0
5656; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
5657; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
5658; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
5659; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
5660; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v0, 0, 16
5661; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5662; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v2, v2, 0, 16
5663; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
5664; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
5665; GCN-NOHSA-SI-NEXT:    s_endpgm
5666;
5667; GCN-HSA-LABEL: global_sextload_v2i16_to_v2i64:
5668; GCN-HSA:       ; %bb.0:
5669; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5670; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5671; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
5672; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
5673; GCN-HSA-NEXT:    flat_load_dword v0, v[0:1]
5674; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
5675; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
5676; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
5677; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
5678; GCN-HSA-NEXT:    v_bfe_i32 v0, v0, 0, 16
5679; GCN-HSA-NEXT:    v_bfe_i32 v2, v2, 0, 16
5680; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5681; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
5682; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5683; GCN-HSA-NEXT:    s_endpgm
5684;
5685; GCN-NOHSA-VI-LABEL: global_sextload_v2i16_to_v2i64:
5686; GCN-NOHSA-VI:       ; %bb.0:
5687; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
5688; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
5689; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
5690; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
5691; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
5692; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5693; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
5694; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
5695; GCN-NOHSA-VI-NEXT:    buffer_load_dword v1, off, s[8:11], 0
5696; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
5697; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
5698; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
5699; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
5700; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v1, 0, 16
5701; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v2, v2, 0, 16
5702; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5703; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
5704; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
5705; GCN-NOHSA-VI-NEXT:    s_endpgm
5706;
5707; EG-LABEL: global_sextload_v2i16_to_v2i64:
5708; EG:       ; %bb.0:
5709; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5710; EG-NEXT:    TEX 0 @6
5711; EG-NEXT:    ALU 8, @9, KC0[CB0:0-32], KC1[]
5712; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1
5713; EG-NEXT:    CF_END
5714; EG-NEXT:    PAD
5715; EG-NEXT:    Fetch clause starting at 6:
5716; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
5717; EG-NEXT:    ALU clause starting at 8:
5718; EG-NEXT:     MOV * T4.X, KC0[2].Z,
5719; EG-NEXT:    ALU clause starting at 9:
5720; EG-NEXT:     ASHR * T4.W, T4.X, literal.x,
5721; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5722; EG-NEXT:     ASHR * T4.Z, T4.X, literal.x,
5723; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5724; EG-NEXT:     BFE_INT T4.X, T4.X, 0.0, literal.x,
5725; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
5726; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
5727; EG-NEXT:     ASHR * T4.Y, PV.X, literal.x,
5728; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5729;
5730; CM-LABEL: global_sextload_v2i16_to_v2i64:
5731; CM:       ; %bb.0:
5732; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5733; CM-NEXT:    TEX 0 @6
5734; CM-NEXT:    ALU 8, @9, KC0[CB0:0-32], KC1[]
5735; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T4, T5.X
5736; CM-NEXT:    CF_END
5737; CM-NEXT:    PAD
5738; CM-NEXT:    Fetch clause starting at 6:
5739; CM-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
5740; CM-NEXT:    ALU clause starting at 8:
5741; CM-NEXT:     MOV * T4.X, KC0[2].Z,
5742; CM-NEXT:    ALU clause starting at 9:
5743; CM-NEXT:     ASHR * T4.W, T4.X, literal.x,
5744; CM-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5745; CM-NEXT:     ASHR * T4.Z, T4.X, literal.x,
5746; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5747; CM-NEXT:     BFE_INT * T4.X, T4.X, 0.0, literal.x,
5748; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5749; CM-NEXT:     LSHR T5.X, KC0[2].Y, literal.x,
5750; CM-NEXT:     ASHR * T4.Y, PV.X, literal.y,
5751; CM-NEXT:    2(2.802597e-45), 31(4.344025e-44)
5752  %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
5753  %ext = sext <2 x i16> %load to <2 x i64>
5754  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
5755  ret void
5756}
5757
5758define amdgpu_kernel void @global_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
5759; GCN-NOHSA-SI-LABEL: global_zextload_v4i16_to_v4i64:
5760; GCN-NOHSA-SI:       ; %bb.0:
5761; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
5762; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
5763; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
5764; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
5765; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
5766; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5767; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
5768; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
5769; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx2 v[8:9], off, s[8:11], 0
5770; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
5771; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v1
5772; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, v1
5773; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, v1
5774; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
5775; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
5776; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
5777; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v2, 16, v9
5778; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v6, 16, v8
5779; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v4, 0xffff, v8
5780; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v0, 0xffff, v9
5781; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16
5782; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
5783; GCN-NOHSA-SI-NEXT:    s_endpgm
5784;
5785; GCN-HSA-LABEL: global_zextload_v4i16_to_v4i64:
5786; GCN-HSA:       ; %bb.0:
5787; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5788; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5789; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
5790; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
5791; GCN-HSA-NEXT:    flat_load_dwordx2 v[8:9], v[0:1]
5792; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
5793; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5794; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
5795; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s3
5796; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v1
5797; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s1
5798; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s2
5799; GCN-HSA-NEXT:    v_mov_b32_e32 v5, v1
5800; GCN-HSA-NEXT:    v_mov_b32_e32 v7, v1
5801; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s0
5802; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
5803; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v2, 16, v9
5804; GCN-HSA-NEXT:    v_and_b32_e32 v0, 0xffff, v9
5805; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v6, 16, v8
5806; GCN-HSA-NEXT:    v_and_b32_e32 v4, 0xffff, v8
5807; GCN-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[0:3]
5808; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
5809; GCN-HSA-NEXT:    s_endpgm
5810;
5811; GCN-NOHSA-VI-LABEL: global_zextload_v4i16_to_v4i64:
5812; GCN-NOHSA-VI:       ; %bb.0:
5813; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
5814; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
5815; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
5816; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
5817; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
5818; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5819; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
5820; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
5821; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx2 v[8:9], off, s[8:11], 0
5822; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
5823; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, v1
5824; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
5825; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
5826; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, v1
5827; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v7, v1
5828; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
5829; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v2, 16, v9
5830; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, 0xffff, v9
5831; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v6, 16, v8
5832; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v4, 0xffff, v8
5833; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16
5834; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
5835; GCN-NOHSA-VI-NEXT:    s_endpgm
5836;
5837; EG-LABEL: global_zextload_v4i16_to_v4i64:
5838; EG:       ; %bb.0:
5839; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5840; EG-NEXT:    TEX 0 @6
5841; EG-NEXT:    ALU 18, @9, KC0[CB0:0-32], KC1[]
5842; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T8.X, 0
5843; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T7.X, 1
5844; EG-NEXT:    CF_END
5845; EG-NEXT:    Fetch clause starting at 6:
5846; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
5847; EG-NEXT:    ALU clause starting at 8:
5848; EG-NEXT:     MOV * T5.X, KC0[2].Z,
5849; EG-NEXT:    ALU clause starting at 9:
5850; EG-NEXT:     MOV T2.X, T5.X,
5851; EG-NEXT:     MOV * T3.X, T5.Y,
5852; EG-NEXT:     MOV T0.Y, PV.X,
5853; EG-NEXT:     MOV * T0.Z, PS,
5854; EG-NEXT:     LSHR * T5.Z, PV.Z, literal.x,
5855; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5856; EG-NEXT:     AND_INT T5.X, T0.Z, literal.x,
5857; EG-NEXT:     MOV T5.Y, 0.0,
5858; EG-NEXT:     LSHR T6.Z, T0.Y, literal.y,
5859; EG-NEXT:     AND_INT * T6.X, T0.Y, literal.x,
5860; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
5861; EG-NEXT:     MOV T6.Y, 0.0,
5862; EG-NEXT:     MOV T5.W, 0.0,
5863; EG-NEXT:     MOV * T6.W, 0.0,
5864; EG-NEXT:     LSHR T7.X, KC0[2].Y, literal.x,
5865; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5866; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5867; EG-NEXT:     LSHR * T8.X, PV.W, literal.x,
5868; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5869;
5870; CM-LABEL: global_zextload_v4i16_to_v4i64:
5871; CM:       ; %bb.0:
5872; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5873; CM-NEXT:    TEX 0 @6
5874; CM-NEXT:    ALU 20, @9, KC0[CB0:0-32], KC1[]
5875; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T5, T8.X
5876; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T6, T7.X
5877; CM-NEXT:    CF_END
5878; CM-NEXT:    Fetch clause starting at 6:
5879; CM-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
5880; CM-NEXT:    ALU clause starting at 8:
5881; CM-NEXT:     MOV * T5.X, KC0[2].Z,
5882; CM-NEXT:    ALU clause starting at 9:
5883; CM-NEXT:     MOV * T2.X, T5.X,
5884; CM-NEXT:     MOV * T3.X, T5.Y,
5885; CM-NEXT:     MOV T0.Y, PV.X,
5886; CM-NEXT:     MOV * T0.Z, T2.X,
5887; CM-NEXT:     LSHR * T5.Z, PV.Z, literal.x,
5888; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5889; CM-NEXT:     AND_INT T5.X, T0.Z, literal.x,
5890; CM-NEXT:     MOV T5.Y, 0.0,
5891; CM-NEXT:     LSHR * T6.Z, T0.Y, literal.y,
5892; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
5893; CM-NEXT:     AND_INT T6.X, T0.Y, literal.x,
5894; CM-NEXT:     MOV T6.Y, 0.0,
5895; CM-NEXT:     MOV * T5.W, 0.0,
5896; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
5897; CM-NEXT:     MOV * T6.W, 0.0,
5898; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
5899; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5900; CM-NEXT:     LSHR * T7.X, PV.W, literal.x,
5901; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5902; CM-NEXT:     LSHR * T8.X, KC0[2].Y, literal.x,
5903; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5904  %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
5905  %ext = zext <4 x i16> %load to <4 x i64>
5906  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
5907  ret void
5908}
5909
5910define amdgpu_kernel void @global_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
5911; GCN-NOHSA-SI-LABEL: global_sextload_v4i16_to_v4i64:
5912; GCN-NOHSA-SI:       ; %bb.0:
5913; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
5914; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
5915; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
5916; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
5917; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
5918; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5919; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
5920; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
5921; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx2 v[1:2], off, s[8:11], 0
5922; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
5923; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
5924; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
5925; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v2
5926; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
5927; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v1, 0, 16
5928; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[6:7], v[1:2], 48
5929; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v4, v3, 0, 16
5930; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5931; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v2, v5, 0, 16
5932; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
5933; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
5934; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16
5935; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
5936; GCN-NOHSA-SI-NEXT:    s_endpgm
5937;
5938; GCN-HSA-LABEL: global_sextload_v4i16_to_v4i64:
5939; GCN-HSA:       ; %bb.0:
5940; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5941; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5942; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
5943; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
5944; GCN-HSA-NEXT:    flat_load_dwordx2 v[1:2], v[0:1]
5945; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
5946; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5947; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s3
5948; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s1
5949; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s2
5950; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s0
5951; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
5952; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v2
5953; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
5954; GCN-HSA-NEXT:    v_ashr_i64 v[6:7], v[1:2], 48
5955; GCN-HSA-NEXT:    v_bfe_i32 v2, v4, 0, 16
5956; GCN-HSA-NEXT:    v_bfe_i32 v4, v3, 0, 16
5957; GCN-HSA-NEXT:    v_bfe_i32 v0, v1, 0, 16
5958; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
5959; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5960; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
5961; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
5962; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
5963; GCN-HSA-NEXT:    s_endpgm
5964;
5965; GCN-NOHSA-VI-LABEL: global_sextload_v4i16_to_v4i64:
5966; GCN-NOHSA-VI:       ; %bb.0:
5967; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
5968; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, 0xf000
5969; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, -1
5970; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s6
5971; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s7
5972; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5973; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
5974; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
5975; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx2 v[1:2], off, s[8:11], 0
5976; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
5977; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
5978; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
5979; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, v2
5980; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
5981; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
5982; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v4, v4, 0, 16
5983; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v6, v5, 0, 16
5984; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v1, 0, 16
5985; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v2, v3, 0, 16
5986; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
5987; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v7, 31, v6
5988; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5989; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
5990; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16
5991; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
5992; GCN-NOHSA-VI-NEXT:    s_endpgm
5993;
5994; EG-LABEL: global_sextload_v4i16_to_v4i64:
5995; EG:       ; %bb.0:
5996; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5997; EG-NEXT:    TEX 0 @6
5998; EG-NEXT:    ALU 20, @9, KC0[CB0:0-32], KC1[]
5999; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 0
6000; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1
6001; EG-NEXT:    CF_END
6002; EG-NEXT:    Fetch clause starting at 6:
6003; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
6004; EG-NEXT:    ALU clause starting at 8:
6005; EG-NEXT:     MOV * T5.X, KC0[2].Z,
6006; EG-NEXT:    ALU clause starting at 9:
6007; EG-NEXT:     MOV T2.X, T5.X,
6008; EG-NEXT:     MOV * T3.X, T5.Y,
6009; EG-NEXT:     MOV T0.Y, PS,
6010; EG-NEXT:     MOV * T0.Z, PV.X,
6011; EG-NEXT:     ASHR * T5.W, PV.Z, literal.x,
6012; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6013; EG-NEXT:     LSHR T6.X, KC0[2].Y, literal.x,
6014; EG-NEXT:     ASHR T5.Z, T0.Z, literal.y,
6015; EG-NEXT:     ASHR * T7.W, T0.Y, literal.z,
6016; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6017; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6018; EG-NEXT:     BFE_INT T5.X, T0.Z, 0.0, literal.x,
6019; EG-NEXT:     ASHR * T7.Z, T0.Y, literal.x,
6020; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6021; EG-NEXT:     BFE_INT T7.X, T0.Y, 0.0, literal.x,
6022; EG-NEXT:     ASHR T5.Y, PV.X, literal.y,
6023; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
6024; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6025; EG-NEXT:     LSHR T8.X, PV.W, literal.x,
6026; EG-NEXT:     ASHR * T7.Y, PV.X, literal.y,
6027; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
6028;
6029; CM-LABEL: global_sextload_v4i16_to_v4i64:
6030; CM:       ; %bb.0:
6031; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
6032; CM-NEXT:    TEX 0 @6
6033; CM-NEXT:    ALU 20, @9, KC0[CB0:0-32], KC1[]
6034; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T7, T8.X
6035; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T5, T6.X
6036; CM-NEXT:    CF_END
6037; CM-NEXT:    Fetch clause starting at 6:
6038; CM-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
6039; CM-NEXT:    ALU clause starting at 8:
6040; CM-NEXT:     MOV * T5.X, KC0[2].Z,
6041; CM-NEXT:    ALU clause starting at 9:
6042; CM-NEXT:     MOV * T2.X, T5.X,
6043; CM-NEXT:     MOV T3.X, T5.Y,
6044; CM-NEXT:     MOV * T0.Y, PV.X,
6045; CM-NEXT:     MOV * T0.Z, PV.X,
6046; CM-NEXT:     ADD_INT T1.Z, KC0[2].Y, literal.x,
6047; CM-NEXT:     ASHR * T5.W, PV.Z, literal.y,
6048; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6049; CM-NEXT:     LSHR T6.X, PV.Z, literal.x,
6050; CM-NEXT:     ASHR T5.Z, T0.Z, literal.y,
6051; CM-NEXT:     ASHR * T7.W, T0.Y, literal.z,
6052; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6053; CM-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6054; CM-NEXT:     BFE_INT T5.X, T0.Z, 0.0, literal.x,
6055; CM-NEXT:     ASHR * T7.Z, T0.Y, literal.x,
6056; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6057; CM-NEXT:     BFE_INT T7.X, T0.Y, 0.0, literal.x,
6058; CM-NEXT:     ASHR * T5.Y, PV.X, literal.y,
6059; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6060; CM-NEXT:     LSHR T8.X, KC0[2].Y, literal.x,
6061; CM-NEXT:     ASHR * T7.Y, PV.X, literal.y,
6062; CM-NEXT:    2(2.802597e-45), 31(4.344025e-44)
6063  %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
6064  %ext = sext <4 x i16> %load to <4 x i64>
6065  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
6066  ret void
6067}
6068
6069define amdgpu_kernel void @global_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
6070; GCN-NOHSA-SI-LABEL: global_zextload_v8i16_to_v8i64:
6071; GCN-NOHSA-SI:       ; %bb.0:
6072; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
6073; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
6074; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
6075; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
6076; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
6077; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
6078; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
6079; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
6080; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
6081; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, 0
6082; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, 0
6083; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, 0
6084; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, v9
6085; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, v9
6086; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, v9
6087; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, v9
6088; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, v9
6089; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
6090; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
6091; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
6092; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v18, 16, v1
6093; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v10, 16, v3
6094; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v14, 16, v2
6095; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
6096; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v4, 0xffff, v0
6097; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v12, 0xffff, v2
6098; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v16, 0xffff, v1
6099; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v8, 0xffff, v3
6100; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:48
6101; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0 offset:16
6102; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0 offset:32
6103; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
6104; GCN-NOHSA-SI-NEXT:    s_endpgm
6105;
6106; GCN-HSA-LABEL: global_zextload_v8i16_to_v8i64:
6107; GCN-HSA:       ; %bb.0:
6108; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
6109; GCN-HSA-NEXT:    v_mov_b32_e32 v12, 0
6110; GCN-HSA-NEXT:    v_mov_b32_e32 v14, v12
6111; GCN-HSA-NEXT:    v_mov_b32_e32 v15, v12
6112; GCN-HSA-NEXT:    v_mov_b32_e32 v8, v12
6113; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6114; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
6115; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
6116; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
6117; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
6118; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6119; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6120; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6121; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
6122; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s1
6123; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6124; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s0
6125; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s3
6126; GCN-HSA-NEXT:    s_add_u32 s0, s0, 32
6127; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s2
6128; GCN-HSA-NEXT:    s_addc_u32 s1, s1, 0
6129; GCN-HSA-NEXT:    v_mov_b32_e32 v6, 0
6130; GCN-HSA-NEXT:    v_mov_b32_e32 v10, 0
6131; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
6132; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v13, 16, v3
6133; GCN-HSA-NEXT:    v_and_b32_e32 v11, 0xffff, v3
6134; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[11:14]
6135; GCN-HSA-NEXT:    v_mov_b32_e32 v4, v12
6136; GCN-HSA-NEXT:    v_mov_b32_e32 v13, v12
6137; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v14, 16, v1
6138; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
6139; GCN-HSA-NEXT:    v_and_b32_e32 v3, 0xffff, v0
6140; GCN-HSA-NEXT:    v_and_b32_e32 v12, 0xffff, v1
6141; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
6142; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v9, 16, v2
6143; GCN-HSA-NEXT:    v_and_b32_e32 v7, 0xffff, v2
6144; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
6145; GCN-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[12:15]
6146; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[7:10]
6147; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[3:6]
6148; GCN-HSA-NEXT:    s_endpgm
6149;
6150; GCN-NOHSA-VI-LABEL: global_zextload_v8i16_to_v8i64:
6151; GCN-NOHSA-VI:       ; %bb.0:
6152; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
6153; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
6154; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
6155; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
6156; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
6157; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
6158; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
6159; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
6160; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
6161; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v19, 0
6162; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v17, 0
6163; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
6164; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
6165; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v7, 0
6166; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v11, 0
6167; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v15, 0
6168; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v13, v17
6169; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v9, v17
6170; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, v17
6171; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
6172; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v16, 0xffff, v3
6173; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v18, 16, v3
6174; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v4, 0xffff, v0
6175; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
6176; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v8, 0xffff, v1
6177; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v10, 16, v1
6178; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v12, 0xffff, v2
6179; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v14, 16, v2
6180; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48
6181; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32
6182; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16
6183; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
6184; GCN-NOHSA-VI-NEXT:    s_endpgm
6185;
6186; EG-LABEL: global_zextload_v8i16_to_v8i64:
6187; EG:       ; %bb.0:
6188; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
6189; EG-NEXT:    TEX 0 @8
6190; EG-NEXT:    ALU 30, @11, KC0[CB0:0-32], KC1[]
6191; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T14.X, 0
6192; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T13.X, 0
6193; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T12.X, 0
6194; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T11.X, 1
6195; EG-NEXT:    CF_END
6196; EG-NEXT:    Fetch clause starting at 8:
6197; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
6198; EG-NEXT:    ALU clause starting at 10:
6199; EG-NEXT:     MOV * T7.X, KC0[2].Z,
6200; EG-NEXT:    ALU clause starting at 11:
6201; EG-NEXT:     LSHR * T8.Z, T7.W, literal.x,
6202; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6203; EG-NEXT:     AND_INT T8.X, T7.W, literal.x,
6204; EG-NEXT:     MOV T8.Y, 0.0,
6205; EG-NEXT:     LSHR T9.Z, T7.Z, literal.y,
6206; EG-NEXT:     AND_INT * T9.X, T7.Z, literal.x,
6207; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6208; EG-NEXT:     MOV T9.Y, 0.0,
6209; EG-NEXT:     LSHR * T10.Z, T7.Y, literal.x,
6210; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6211; EG-NEXT:     AND_INT T10.X, T7.Y, literal.x,
6212; EG-NEXT:     MOV T10.Y, 0.0,
6213; EG-NEXT:     LSHR T7.Z, T7.X, literal.y,
6214; EG-NEXT:     AND_INT * T7.X, T7.X, literal.x,
6215; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6216; EG-NEXT:     MOV T7.Y, 0.0,
6217; EG-NEXT:     MOV T8.W, 0.0,
6218; EG-NEXT:     MOV * T9.W, 0.0,
6219; EG-NEXT:     MOV T10.W, 0.0,
6220; EG-NEXT:     MOV * T7.W, 0.0,
6221; EG-NEXT:     LSHR T11.X, KC0[2].Y, literal.x,
6222; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6223; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6224; EG-NEXT:     LSHR T12.X, PV.W, literal.x,
6225; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6226; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
6227; EG-NEXT:     LSHR T13.X, PV.W, literal.x,
6228; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6229; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
6230; EG-NEXT:     LSHR * T14.X, PV.W, literal.x,
6231; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
6232;
6233; CM-LABEL: global_zextload_v8i16_to_v8i64:
6234; CM:       ; %bb.0:
6235; CM-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
6236; CM-NEXT:    TEX 0 @8
6237; CM-NEXT:    ALU 32, @11, KC0[CB0:0-32], KC1[]
6238; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T8, T14.X
6239; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T9, T13.X
6240; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T10, T12.X
6241; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T7, T11.X
6242; CM-NEXT:    CF_END
6243; CM-NEXT:    Fetch clause starting at 8:
6244; CM-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
6245; CM-NEXT:    ALU clause starting at 10:
6246; CM-NEXT:     MOV * T7.X, KC0[2].Z,
6247; CM-NEXT:    ALU clause starting at 11:
6248; CM-NEXT:     LSHR * T8.Z, T7.X, literal.x,
6249; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6250; CM-NEXT:     AND_INT T8.X, T7.X, literal.x,
6251; CM-NEXT:     MOV T8.Y, 0.0,
6252; CM-NEXT:     LSHR * T9.Z, T7.Y, literal.y,
6253; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6254; CM-NEXT:     AND_INT T9.X, T7.Y, literal.x,
6255; CM-NEXT:     MOV T9.Y, 0.0,
6256; CM-NEXT:     LSHR * T10.Z, T7.Z, literal.y,
6257; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6258; CM-NEXT:     AND_INT T10.X, T7.Z, literal.x,
6259; CM-NEXT:     MOV T10.Y, 0.0,
6260; CM-NEXT:     LSHR * T7.Z, T7.W, literal.y,
6261; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6262; CM-NEXT:     AND_INT T7.X, T7.W, literal.x,
6263; CM-NEXT:     MOV T7.Y, 0.0,
6264; CM-NEXT:     MOV * T8.W, 0.0,
6265; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
6266; CM-NEXT:     MOV * T9.W, 0.0,
6267; CM-NEXT:     MOV * T10.W, 0.0,
6268; CM-NEXT:     MOV * T7.W, 0.0,
6269; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
6270; CM-NEXT:    48(6.726233e-44), 0(0.000000e+00)
6271; CM-NEXT:     LSHR T11.X, PV.W, literal.x,
6272; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6273; CM-NEXT:    2(2.802597e-45), 32(4.484155e-44)
6274; CM-NEXT:     LSHR T12.X, PV.W, literal.x,
6275; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6276; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6277; CM-NEXT:     LSHR * T13.X, PV.W, literal.x,
6278; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
6279; CM-NEXT:     LSHR * T14.X, KC0[2].Y, literal.x,
6280; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
6281  %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
6282  %ext = zext <8 x i16> %load to <8 x i64>
6283  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
6284  ret void
6285}
6286
6287define amdgpu_kernel void @global_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
6288; GCN-NOHSA-SI-LABEL: global_sextload_v8i16_to_v8i64:
6289; GCN-NOHSA-SI:       ; %bb.0:
6290; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
6291; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
6292; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
6293; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s2
6294; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s3
6295; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
6296; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s6
6297; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s7
6298; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
6299; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s4
6300; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s5
6301; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
6302; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, v3
6303; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v7, 16, v2
6304; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
6305; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v4, v0, 0, 16
6306; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[10:11], v[0:1], 48
6307; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v12, v5, 0, 16
6308; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[14:15], v[2:3], 48
6309; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v8, v1, 0, 16
6310; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v2, 0, 16
6311; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
6312; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
6313; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v6, v6, 0, 16
6314; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v2, v7, 0, 16
6315; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
6316; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v13, 31, v12
6317; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v7, 31, v6
6318; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
6319; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:48
6320; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16
6321; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
6322; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
6323; GCN-NOHSA-SI-NEXT:    s_endpgm
6324;
6325; GCN-HSA-LABEL: global_sextload_v8i16_to_v8i64:
6326; GCN-HSA:       ; %bb.0:
6327; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
6328; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6329; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
6330; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
6331; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
6332; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
6333; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6334; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s3
6335; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s2
6336; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
6337; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6338; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s1
6339; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s3
6340; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s0
6341; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s2
6342; GCN-HSA-NEXT:    s_add_u32 s0, s0, 32
6343; GCN-HSA-NEXT:    s_addc_u32 s1, s1, 0
6344; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s1
6345; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s0
6346; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
6347; GCN-HSA-NEXT:    v_ashr_i64 v[6:7], v[0:1], 48
6348; GCN-HSA-NEXT:    v_bfe_i32 v4, v1, 0, 16
6349; GCN-HSA-NEXT:    v_mov_b32_e32 v11, v3
6350; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
6351; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
6352; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[4:7]
6353; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
6354; GCN-HSA-NEXT:    v_bfe_i32 v4, v0, 0, 16
6355; GCN-HSA-NEXT:    v_bfe_i32 v8, v2, 0, 16
6356; GCN-HSA-NEXT:    v_ashr_i64 v[2:3], v[2:3], 48
6357; GCN-HSA-NEXT:    v_bfe_i32 v0, v11, 0, 16
6358; GCN-HSA-NEXT:    v_bfe_i32 v6, v1, 0, 16
6359; GCN-HSA-NEXT:    v_bfe_i32 v10, v10, 0, 16
6360; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
6361; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
6362; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
6363; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v7, 31, v6
6364; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v11, 31, v10
6365; GCN-HSA-NEXT:    flat_store_dwordx4 v[14:15], v[0:3]
6366; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
6367; GCN-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[4:7]
6368; GCN-HSA-NEXT:    s_endpgm
6369;
6370; GCN-NOHSA-VI-LABEL: global_sextload_v8i16_to_v8i64:
6371; GCN-NOHSA-VI:       ; %bb.0:
6372; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
6373; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
6374; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
6375; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
6376; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
6377; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
6378; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
6379; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
6380; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
6381; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
6382; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
6383; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
6384; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v11, v3
6385; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
6386; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
6387; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
6388; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
6389; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v12, v11, 0, 16
6390; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v14, v3, 0, 16
6391; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v4, v0, 0, 16
6392; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v1, 0, 16
6393; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v6, v5, 0, 16
6394; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v8, v2, 0, 16
6395; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v2, v7, 0, 16
6396; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v10, v10, 0, 16
6397; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v13, 31, v12
6398; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v15, 31, v14
6399; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
6400; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
6401; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
6402; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v7, 31, v6
6403; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
6404; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v11, 31, v10
6405; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:48
6406; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
6407; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
6408; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
6409; GCN-NOHSA-VI-NEXT:    s_endpgm
6410;
6411; EG-LABEL: global_sextload_v8i16_to_v8i64:
6412; EG:       ; %bb.0:
6413; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
6414; EG-NEXT:    TEX 0 @8
6415; EG-NEXT:    ALU 33, @11, KC0[CB0:0-32], KC1[]
6416; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T7.X, 0
6417; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T11.X, 0
6418; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T9.X, 0
6419; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T8.X, 1
6420; EG-NEXT:    CF_END
6421; EG-NEXT:    Fetch clause starting at 8:
6422; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
6423; EG-NEXT:    ALU clause starting at 10:
6424; EG-NEXT:     MOV * T7.X, KC0[2].Z,
6425; EG-NEXT:    ALU clause starting at 11:
6426; EG-NEXT:     LSHR T8.X, KC0[2].Y, literal.x,
6427; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6428; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6429; EG-NEXT:     LSHR T9.X, PV.W, literal.x,
6430; EG-NEXT:     ADD_INT T0.W, KC0[2].Y, literal.y,
6431; EG-NEXT:     ASHR * T10.W, T7.X, literal.z,
6432; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
6433; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6434; EG-NEXT:     LSHR T11.X, PV.W, literal.x,
6435; EG-NEXT:     ASHR T10.Z, T7.X, literal.y,
6436; EG-NEXT:     ASHR * T12.W, T7.Y, literal.z,
6437; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6438; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6439; EG-NEXT:     BFE_INT T10.X, T7.X, 0.0, literal.x,
6440; EG-NEXT:     ASHR T12.Z, T7.Y, literal.x,
6441; EG-NEXT:     ASHR * T13.W, T7.Z, literal.y,
6442; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6443; EG-NEXT:     BFE_INT T12.X, T7.Y, 0.0, literal.x,
6444; EG-NEXT:     ASHR T10.Y, PV.X, literal.y,
6445; EG-NEXT:     ASHR T13.Z, T7.Z, literal.x,
6446; EG-NEXT:     ASHR * T14.W, T7.W, literal.y,
6447; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6448; EG-NEXT:     BFE_INT T13.X, T7.Z, 0.0, literal.x,
6449; EG-NEXT:     ASHR T12.Y, PV.X, literal.y,
6450; EG-NEXT:     ASHR * T14.Z, T7.W, literal.x,
6451; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6452; EG-NEXT:     BFE_INT T14.X, T7.W, 0.0, literal.x,
6453; EG-NEXT:     ASHR T13.Y, PV.X, literal.y,
6454; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
6455; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6456; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
6457; EG-NEXT:     LSHR T7.X, PV.W, literal.x,
6458; EG-NEXT:     ASHR * T14.Y, PV.X, literal.y,
6459; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
6460;
6461; CM-LABEL: global_sextload_v8i16_to_v8i64:
6462; CM:       ; %bb.0:
6463; CM-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
6464; CM-NEXT:    TEX 0 @8
6465; CM-NEXT:    ALU 33, @11, KC0[CB0:0-32], KC1[]
6466; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T7, T14.X
6467; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T13, T11.X
6468; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T12, T9.X
6469; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T10, T8.X
6470; CM-NEXT:    CF_END
6471; CM-NEXT:    Fetch clause starting at 8:
6472; CM-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
6473; CM-NEXT:    ALU clause starting at 10:
6474; CM-NEXT:     MOV * T7.X, KC0[2].Z,
6475; CM-NEXT:    ALU clause starting at 11:
6476; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
6477; CM-NEXT:    48(6.726233e-44), 0(0.000000e+00)
6478; CM-NEXT:     LSHR T8.X, PV.W, literal.x,
6479; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6480; CM-NEXT:    2(2.802597e-45), 32(4.484155e-44)
6481; CM-NEXT:     LSHR T9.X, PV.W, literal.x,
6482; CM-NEXT:     ADD_INT T0.Z, KC0[2].Y, literal.y,
6483; CM-NEXT:     ASHR * T10.W, T7.W, literal.z,
6484; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6485; CM-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6486; CM-NEXT:     LSHR T11.X, PV.Z, literal.x,
6487; CM-NEXT:     ASHR T10.Z, T7.W, literal.y,
6488; CM-NEXT:     ASHR * T12.W, T7.Z, literal.z,
6489; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6490; CM-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6491; CM-NEXT:     BFE_INT T10.X, T7.W, 0.0, literal.x,
6492; CM-NEXT:     ASHR T12.Z, T7.Z, literal.x,
6493; CM-NEXT:     ASHR * T13.W, T7.Y, literal.y,
6494; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6495; CM-NEXT:     BFE_INT T12.X, T7.Z, 0.0, literal.x,
6496; CM-NEXT:     ASHR T10.Y, PV.X, literal.y,
6497; CM-NEXT:     ASHR T13.Z, T7.Y, literal.x,
6498; CM-NEXT:     ASHR * T7.W, T7.X, literal.y,
6499; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6500; CM-NEXT:     BFE_INT T13.X, T7.Y, 0.0, literal.x,
6501; CM-NEXT:     ASHR T12.Y, PV.X, literal.y,
6502; CM-NEXT:     ASHR * T7.Z, T7.X, literal.x,
6503; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6504; CM-NEXT:     BFE_INT T7.X, T7.X, 0.0, literal.x,
6505; CM-NEXT:     ASHR * T13.Y, PV.X, literal.y,
6506; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6507; CM-NEXT:     LSHR T14.X, KC0[2].Y, literal.x,
6508; CM-NEXT:     ASHR * T7.Y, PV.X, literal.y,
6509; CM-NEXT:    2(2.802597e-45), 31(4.344025e-44)
6510  %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
6511  %ext = sext <8 x i16> %load to <8 x i64>
6512  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
6513  ret void
6514}
6515
6516define amdgpu_kernel void @global_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
6517; GCN-NOHSA-SI-LABEL: global_zextload_v16i16_to_v16i64:
6518; GCN-NOHSA-SI:       ; %bb.0:
6519; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
6520; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
6521; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
6522; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s2
6523; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s3
6524; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
6525; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s6
6526; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s7
6527; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
6528; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
6529; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(1)
6530; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v10, 16, v1
6531; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v14, 16, v2
6532; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v18, 16, v0
6533; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v16, 0xffff, v0
6534; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v12, 0xffff, v2
6535; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v8, 0xffff, v1
6536; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v2, 16, v3
6537; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v0, 0xffff, v3
6538; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v20, 0
6539; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
6540; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v21, 16, v5
6541; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v25, 16, v6
6542; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v23, 0xffff, v6
6543; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v6, 16, v4
6544; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v4, 0xffff, v4
6545; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v28, 16, v7
6546; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v26, 0xffff, v7
6547; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v19, 0xffff, v5
6548; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v22, v20
6549; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v27, v20
6550; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v29, v20
6551; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, v20
6552; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v20
6553; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, v20
6554; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, v20
6555; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, v20
6556; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v24, v20
6557; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, v20
6558; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, v20
6559; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s4
6560; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s5
6561; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:80
6562; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[26:29], off, s[0:3], 0 offset:112
6563; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(1)
6564; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, 0
6565; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, 0
6566; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6567; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v26, 0
6568; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, 0
6569; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
6570; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16
6571; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64
6572; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:96
6573; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32
6574; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0
6575; GCN-NOHSA-SI-NEXT:    s_endpgm
6576;
6577; GCN-HSA-LABEL: global_zextload_v16i16_to_v16i64:
6578; GCN-HSA:       ; %bb.0:
6579; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
6580; GCN-HSA-NEXT:    v_mov_b32_e32 v8, 0
6581; GCN-HSA-NEXT:    v_mov_b32_e32 v10, v8
6582; GCN-HSA-NEXT:    v_mov_b32_e32 v12, v8
6583; GCN-HSA-NEXT:    v_mov_b32_e32 v15, 0
6584; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6585; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
6586; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
6587; GCN-HSA-NEXT:    s_add_u32 s2, s2, 16
6588; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
6589; GCN-HSA-NEXT:    s_addc_u32 s3, s3, 0
6590; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6591; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6592; GCN-HSA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
6593; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
6594; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6595; GCN-HSA-NEXT:    s_add_u32 s4, s0, 16
6596; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
6597; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s5
6598; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s4
6599; GCN-HSA-NEXT:    s_add_u32 s4, s0, 0x50
6600; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
6601; GCN-HSA-NEXT:    s_waitcnt vmcnt(1)
6602; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v11, 16, v1
6603; GCN-HSA-NEXT:    v_and_b32_e32 v9, 0xffff, v1
6604; GCN-HSA-NEXT:    flat_store_dwordx4 v[13:14], v[9:12]
6605; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s5
6606; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s4
6607; GCN-HSA-NEXT:    s_add_u32 s4, s0, 0x70
6608; GCN-HSA-NEXT:    s_waitcnt vmcnt(1)
6609; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v11, 16, v5
6610; GCN-HSA-NEXT:    v_and_b32_e32 v9, 0xffff, v5
6611; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
6612; GCN-HSA-NEXT:    flat_store_dwordx4 v[13:14], v[9:12]
6613; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s5
6614; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s4
6615; GCN-HSA-NEXT:    s_add_u32 s4, s0, 32
6616; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v11, 16, v7
6617; GCN-HSA-NEXT:    v_and_b32_e32 v9, 0xffff, v7
6618; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
6619; GCN-HSA-NEXT:    flat_store_dwordx4 v[13:14], v[9:12]
6620; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s5
6621; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v11, 16, v2
6622; GCN-HSA-NEXT:    v_and_b32_e32 v9, 0xffff, v2
6623; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s2
6624; GCN-HSA-NEXT:    v_mov_b32_e32 v12, 0
6625; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s4
6626; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s3
6627; GCN-HSA-NEXT:    s_add_u32 s2, s0, 64
6628; GCN-HSA-NEXT:    flat_store_dwordx4 v[13:14], v[9:12]
6629; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v14, 16, v4
6630; GCN-HSA-NEXT:    v_and_b32_e32 v12, 0xffff, v4
6631; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
6632; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6633; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
6634; GCN-HSA-NEXT:    s_add_u32 s0, s0, 0x60
6635; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v9, 16, v3
6636; GCN-HSA-NEXT:    v_and_b32_e32 v7, 0xffff, v3
6637; GCN-HSA-NEXT:    s_addc_u32 s1, s1, 0
6638; GCN-HSA-NEXT:    flat_store_dwordx4 v[1:2], v[7:10]
6639; GCN-HSA-NEXT:    v_mov_b32_e32 v13, v8
6640; GCN-HSA-NEXT:    v_mov_b32_e32 v9, v8
6641; GCN-HSA-NEXT:    v_mov_b32_e32 v1, v8
6642; GCN-HSA-NEXT:    v_mov_b32_e32 v3, 0
6643; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v10, 16, v6
6644; GCN-HSA-NEXT:    v_and_b32_e32 v8, 0xffff, v6
6645; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s3
6646; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
6647; GCN-HSA-NEXT:    v_and_b32_e32 v0, 0xffff, v0
6648; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s1
6649; GCN-HSA-NEXT:    v_mov_b32_e32 v11, 0
6650; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s2
6651; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s0
6652; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6653; GCN-HSA-NEXT:    flat_store_dwordx4 v[6:7], v[12:15]
6654; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
6655; GCN-HSA-NEXT:    s_endpgm
6656;
6657; GCN-NOHSA-VI-LABEL: global_zextload_v16i16_to_v16i64:
6658; GCN-NOHSA-VI:       ; %bb.0:
6659; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
6660; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
6661; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
6662; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
6663; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
6664; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
6665; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
6666; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
6667; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
6668; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
6669; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
6670; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
6671; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v30, 0
6672; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v11, 0
6673; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v15, 0
6674; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v19, 0
6675; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v23, 0
6676; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(1)
6677; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v16, 0xffff, v3
6678; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
6679; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v27, 0xffff, v4
6680; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v29, 16, v4
6681; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, 0
6682; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v28, v4
6683; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v18, 16, v3
6684; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v20, 0xffff, v6
6685; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v22, 16, v6
6686; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v24, 0xffff, v7
6687; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v26, 16, v7
6688; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v3, 0xffff, v5
6689; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
6690; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v6, 0
6691; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v25, v4
6692; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[27:30], off, s[0:3], 0 offset:64
6693; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v8, 0xffff, v0
6694; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v27, 0
6695; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v10, 16, v0
6696; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v12, 0xffff, v1
6697; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v14, 16, v1
6698; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, 0xffff, v2
6699; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
6700; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v21, v4
6701; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v17, v4
6702; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, v4
6703; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v13, v4
6704; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v9, v4
6705; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:80
6706; GCN-NOHSA-VI-NEXT:    s_nop 0
6707; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, 0
6708; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:112
6709; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:96
6710; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48
6711; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
6712; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16
6713; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0
6714; GCN-NOHSA-VI-NEXT:    s_endpgm
6715;
6716; EG-LABEL: global_zextload_v16i16_to_v16i64:
6717; EG:       ; %bb.0:
6718; EG-NEXT:    ALU 0, @16, KC0[CB0:0-32], KC1[]
6719; EG-NEXT:    TEX 1 @12
6720; EG-NEXT:    ALU 62, @17, KC0[CB0:0-32], KC1[]
6721; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T26.X, 0
6722; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T25.X, 0
6723; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T24.X, 0
6724; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T23.X, 0
6725; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T22.X, 0
6726; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T21.X, 0
6727; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T20.X, 0
6728; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T19.X, 1
6729; EG-NEXT:    CF_END
6730; EG-NEXT:    Fetch clause starting at 12:
6731; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
6732; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
6733; EG-NEXT:    ALU clause starting at 16:
6734; EG-NEXT:     MOV * T11.X, KC0[2].Z,
6735; EG-NEXT:    ALU clause starting at 17:
6736; EG-NEXT:     LSHR * T13.Z, T12.W, literal.x,
6737; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6738; EG-NEXT:     AND_INT T13.X, T12.W, literal.x,
6739; EG-NEXT:     MOV T13.Y, 0.0,
6740; EG-NEXT:     LSHR T14.Z, T12.Z, literal.y,
6741; EG-NEXT:     AND_INT * T14.X, T12.Z, literal.x,
6742; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6743; EG-NEXT:     MOV T14.Y, 0.0,
6744; EG-NEXT:     LSHR * T15.Z, T12.Y, literal.x,
6745; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6746; EG-NEXT:     AND_INT T15.X, T12.Y, literal.x,
6747; EG-NEXT:     MOV T15.Y, 0.0,
6748; EG-NEXT:     LSHR T12.Z, T12.X, literal.y,
6749; EG-NEXT:     AND_INT * T12.X, T12.X, literal.x,
6750; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6751; EG-NEXT:     MOV T12.Y, 0.0,
6752; EG-NEXT:     LSHR * T16.Z, T11.W, literal.x,
6753; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6754; EG-NEXT:     AND_INT T16.X, T11.W, literal.x,
6755; EG-NEXT:     MOV T16.Y, 0.0,
6756; EG-NEXT:     LSHR T17.Z, T11.Z, literal.y,
6757; EG-NEXT:     AND_INT * T17.X, T11.Z, literal.x,
6758; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6759; EG-NEXT:     MOV T17.Y, 0.0,
6760; EG-NEXT:     LSHR * T18.Z, T11.Y, literal.x,
6761; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6762; EG-NEXT:     AND_INT T18.X, T11.Y, literal.x,
6763; EG-NEXT:     MOV T18.Y, 0.0,
6764; EG-NEXT:     LSHR T11.Z, T11.X, literal.y,
6765; EG-NEXT:     AND_INT * T11.X, T11.X, literal.x,
6766; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6767; EG-NEXT:     MOV T11.Y, 0.0,
6768; EG-NEXT:     MOV T13.W, 0.0,
6769; EG-NEXT:     MOV * T14.W, 0.0,
6770; EG-NEXT:     MOV T15.W, 0.0,
6771; EG-NEXT:     MOV * T12.W, 0.0,
6772; EG-NEXT:     MOV T16.W, 0.0,
6773; EG-NEXT:     MOV * T17.W, 0.0,
6774; EG-NEXT:     MOV T18.W, 0.0,
6775; EG-NEXT:     MOV * T11.W, 0.0,
6776; EG-NEXT:     LSHR T19.X, KC0[2].Y, literal.x,
6777; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6778; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6779; EG-NEXT:     LSHR T20.X, PV.W, literal.x,
6780; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6781; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
6782; EG-NEXT:     LSHR T21.X, PV.W, literal.x,
6783; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6784; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
6785; EG-NEXT:     LSHR T22.X, PV.W, literal.x,
6786; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6787; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
6788; EG-NEXT:     LSHR T23.X, PV.W, literal.x,
6789; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6790; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
6791; EG-NEXT:     LSHR T24.X, PV.W, literal.x,
6792; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6793; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
6794; EG-NEXT:     LSHR T25.X, PV.W, literal.x,
6795; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6796; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
6797; EG-NEXT:     LSHR * T26.X, PV.W, literal.x,
6798; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
6799;
6800; CM-LABEL: global_zextload_v16i16_to_v16i64:
6801; CM:       ; %bb.0:
6802; CM-NEXT:    ALU 0, @16, KC0[CB0:0-32], KC1[]
6803; CM-NEXT:    TEX 1 @12
6804; CM-NEXT:    ALU 64, @17, KC0[CB0:0-32], KC1[]
6805; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T13, T26.X
6806; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T14, T25.X
6807; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T15, T24.X
6808; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T12, T23.X
6809; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T16, T22.X
6810; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T17, T21.X
6811; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T18, T20.X
6812; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T11, T19.X
6813; CM-NEXT:    CF_END
6814; CM-NEXT:    Fetch clause starting at 12:
6815; CM-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 0, #1
6816; CM-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 16, #1
6817; CM-NEXT:    ALU clause starting at 16:
6818; CM-NEXT:     MOV * T11.X, KC0[2].Z,
6819; CM-NEXT:    ALU clause starting at 17:
6820; CM-NEXT:     LSHR * T13.Z, T12.X, literal.x,
6821; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6822; CM-NEXT:     AND_INT T13.X, T12.X, literal.x,
6823; CM-NEXT:     MOV T13.Y, 0.0,
6824; CM-NEXT:     LSHR * T14.Z, T12.Y, literal.y,
6825; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6826; CM-NEXT:     AND_INT T14.X, T12.Y, literal.x,
6827; CM-NEXT:     MOV T14.Y, 0.0,
6828; CM-NEXT:     LSHR * T15.Z, T12.Z, literal.y,
6829; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6830; CM-NEXT:     AND_INT T15.X, T12.Z, literal.x,
6831; CM-NEXT:     MOV T15.Y, 0.0,
6832; CM-NEXT:     LSHR * T12.Z, T12.W, literal.y,
6833; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6834; CM-NEXT:     AND_INT T12.X, T12.W, literal.x,
6835; CM-NEXT:     MOV T12.Y, 0.0,
6836; CM-NEXT:     LSHR * T16.Z, T11.X, literal.y,
6837; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6838; CM-NEXT:     AND_INT T16.X, T11.X, literal.x,
6839; CM-NEXT:     MOV T16.Y, 0.0,
6840; CM-NEXT:     LSHR * T17.Z, T11.Y, literal.y,
6841; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6842; CM-NEXT:     AND_INT T17.X, T11.Y, literal.x,
6843; CM-NEXT:     MOV T17.Y, 0.0,
6844; CM-NEXT:     LSHR * T18.Z, T11.Z, literal.y,
6845; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6846; CM-NEXT:     AND_INT T18.X, T11.Z, literal.x,
6847; CM-NEXT:     MOV T18.Y, 0.0,
6848; CM-NEXT:     LSHR * T11.Z, T11.W, literal.y,
6849; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6850; CM-NEXT:     AND_INT T11.X, T11.W, literal.x,
6851; CM-NEXT:     MOV T11.Y, 0.0,
6852; CM-NEXT:     MOV * T13.W, 0.0,
6853; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
6854; CM-NEXT:     MOV * T14.W, 0.0,
6855; CM-NEXT:     MOV * T15.W, 0.0,
6856; CM-NEXT:     MOV * T12.W, 0.0,
6857; CM-NEXT:     MOV * T16.W, 0.0,
6858; CM-NEXT:     MOV * T17.W, 0.0,
6859; CM-NEXT:     MOV * T18.W, 0.0,
6860; CM-NEXT:     MOV * T11.W, 0.0,
6861; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
6862; CM-NEXT:    112(1.569454e-43), 0(0.000000e+00)
6863; CM-NEXT:     LSHR T19.X, PV.W, literal.x,
6864; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6865; CM-NEXT:    2(2.802597e-45), 96(1.345247e-43)
6866; CM-NEXT:     LSHR T20.X, PV.W, literal.x,
6867; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6868; CM-NEXT:    2(2.802597e-45), 80(1.121039e-43)
6869; CM-NEXT:     LSHR T21.X, PV.W, literal.x,
6870; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6871; CM-NEXT:    2(2.802597e-45), 64(8.968310e-44)
6872; CM-NEXT:     LSHR T22.X, PV.W, literal.x,
6873; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6874; CM-NEXT:    2(2.802597e-45), 48(6.726233e-44)
6875; CM-NEXT:     LSHR T23.X, PV.W, literal.x,
6876; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6877; CM-NEXT:    2(2.802597e-45), 32(4.484155e-44)
6878; CM-NEXT:     LSHR T24.X, PV.W, literal.x,
6879; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6880; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6881; CM-NEXT:     LSHR * T25.X, PV.W, literal.x,
6882; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
6883; CM-NEXT:     LSHR * T26.X, KC0[2].Y, literal.x,
6884; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
6885  %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
6886  %ext = zext <16 x i16> %load to <16 x i64>
6887  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
6888  ret void
6889}
6890
6891define amdgpu_kernel void @global_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
6892; GCN-NOHSA-SI-LABEL: global_sextload_v16i16_to_v16i64:
6893; GCN-NOHSA-SI:       ; %bb.0:
6894; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
6895; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
6896; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
6897; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s2
6898; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s3
6899; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
6900; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s6
6901; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s7
6902; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
6903; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
6904; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s4
6905; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s5
6906; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
6907; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, v7
6908; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, v3
6909; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v13, 16, v4
6910; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v14, 16, v2
6911; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v15, 16, v0
6912; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v8, v8, 0, 16
6913; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[10:11], v[6:7], 48
6914; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
6915; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112
6916; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6917; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[9:10], v[4:5], 48
6918; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v7, v5, 0, 16
6919; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v8, 31, v7
6920; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:80
6921; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v5, v0, 0, 16
6922; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6923; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v7, v15, 0, 16
6924; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v8, v12, 0, 16
6925; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[10:11], v[2:3], 48
6926; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
6927; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:48
6928; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6929; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[11:12], v[0:1], 48
6930; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v9, v1, 0, 16
6931; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v2, 0, 16
6932; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v2, v14, 0, 16
6933; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v15, v13, 0, 16
6934; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v13, v4, 0, 16
6935; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v6
6936; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v17, v6, 0, 16
6937; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v19, v1, 0, 16
6938; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
6939; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
6940; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v14, 31, v13
6941; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v18, 31, v17
6942; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v10, 31, v9
6943; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v8, 31, v7
6944; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
6945; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v16, 31, v15
6946; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v20, 31, v19
6947; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:16
6948; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[17:20], off, s[0:3], 0 offset:96
6949; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:64
6950; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
6951; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[5:8], off, s[0:3], 0
6952; GCN-NOHSA-SI-NEXT:    s_endpgm
6953;
6954; GCN-HSA-LABEL: global_sextload_v16i16_to_v16i64:
6955; GCN-HSA:       ; %bb.0:
6956; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
6957; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6958; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
6959; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
6960; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
6961; GCN-HSA-NEXT:    s_add_u32 s2, s2, 16
6962; GCN-HSA-NEXT:    s_addc_u32 s3, s3, 0
6963; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6964; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6965; GCN-HSA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
6966; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
6967; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6968; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s3
6969; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s2
6970; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
6971; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6972; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s3
6973; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s2
6974; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x70
6975; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6976; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s3
6977; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s2
6978; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x50
6979; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6980; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s1
6981; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s0
6982; GCN-HSA-NEXT:    s_waitcnt vmcnt(1)
6983; GCN-HSA-NEXT:    v_ashr_i64 v[10:11], v[0:1], 48
6984; GCN-HSA-NEXT:    v_bfe_i32 v8, v1, 0, 16
6985; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
6986; GCN-HSA-NEXT:    v_mov_b32_e32 v1, v3
6987; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
6988; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s3
6989; GCN-HSA-NEXT:    v_bfe_i32 v8, v1, 0, 16
6990; GCN-HSA-NEXT:    v_ashr_i64 v[10:11], v[2:3], 48
6991; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s2
6992; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
6993; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
6994; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6995; GCN-HSA-NEXT:    flat_store_dwordx4 v[14:15], v[8:11]
6996; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
6997; GCN-HSA-NEXT:    v_bfe_i32 v8, v2, 0, 16
6998; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
6999; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s3
7000; GCN-HSA-NEXT:    v_bfe_i32 v0, v0, 0, 16
7001; GCN-HSA-NEXT:    v_bfe_i32 v2, v2, 0, 16
7002; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s2
7003; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x60
7004; GCN-HSA-NEXT:    v_bfe_i32 v10, v1, 0, 16
7005; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
7006; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
7007; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
7008; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
7009; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v11, 31, v10
7010; GCN-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[0:3]
7011; GCN-HSA-NEXT:    flat_store_dwordx4 v[14:15], v[8:11]
7012; GCN-HSA-NEXT:    s_waitcnt vmcnt(4)
7013; GCN-HSA-NEXT:    v_ashr_i64 v[2:3], v[4:5], 48
7014; GCN-HSA-NEXT:    v_bfe_i32 v0, v5, 0, 16
7015; GCN-HSA-NEXT:    s_add_u32 s0, s0, 64
7016; GCN-HSA-NEXT:    v_mov_b32_e32 v11, v7
7017; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
7018; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
7019; GCN-HSA-NEXT:    s_addc_u32 s1, s1, 0
7020; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[0:3]
7021; GCN-HSA-NEXT:    v_bfe_i32 v10, v8, 0, 16
7022; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
7023; GCN-HSA-NEXT:    v_bfe_i32 v0, v4, 0, 16
7024; GCN-HSA-NEXT:    v_bfe_i32 v8, v6, 0, 16
7025; GCN-HSA-NEXT:    v_ashr_i64 v[6:7], v[6:7], 48
7026; GCN-HSA-NEXT:    v_bfe_i32 v4, v11, 0, 16
7027; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s3
7028; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s1
7029; GCN-HSA-NEXT:    v_bfe_i32 v2, v1, 0, 16
7030; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
7031; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s2
7032; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s0
7033; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
7034; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
7035; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
7036; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v11, 31, v10
7037; GCN-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[4:7]
7038; GCN-HSA-NEXT:    flat_store_dwordx4 v[14:15], v[8:11]
7039; GCN-HSA-NEXT:    flat_store_dwordx4 v[20:21], v[0:3]
7040; GCN-HSA-NEXT:    s_endpgm
7041;
7042; GCN-NOHSA-VI-LABEL: global_sextload_v16i16_to_v16i64:
7043; GCN-NOHSA-VI:       ; %bb.0:
7044; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
7045; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
7046; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
7047; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
7048; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
7049; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
7050; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
7051; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
7052; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
7053; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
7054; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
7055; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
7056; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(1)
7057; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v8, v0, 0, 16
7058; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
7059; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v9, 16, v5
7060; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v11, v9, 0, 16
7061; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v9, v5, 0, 16
7062; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v10, 31, v9
7063; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v12, 31, v11
7064; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
7065; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:80
7066; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v13, v7
7067; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v11, v5, 0, 16
7068; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v9, v4, 0, 16
7069; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v10, 31, v9
7070; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v12, 31, v11
7071; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v4, 16, v7
7072; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:64
7073; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
7074; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v9, v13, 0, 16
7075; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v11, v4, 0, 16
7076; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v10, 31, v9
7077; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v12, 31, v11
7078; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:112
7079; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
7080; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v12, v1, 0, 16
7081; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7082; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v14, v1, 0, 16
7083; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, v3
7084; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v16, v1, 0, 16
7085; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v6
7086; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v10, v0, 0, 16
7087; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
7088; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v2, 0, 16
7089; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v2, v4, 0, 16
7090; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v4, v6, 0, 16
7091; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v6, v1, 0, 16
7092; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v18, v3, 0, 16
7093; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
7094; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v7, 31, v6
7095; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
7096; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v13, 31, v12
7097; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
7098; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v11, 31, v10
7099; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v15, 31, v14
7100; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
7101; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v17, 31, v16
7102; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v19, 31, v18
7103; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:96
7104; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48
7105; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
7106; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16
7107; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0
7108; GCN-NOHSA-VI-NEXT:    s_endpgm
7109;
7110; EG-LABEL: global_sextload_v16i16_to_v16i64:
7111; EG:       ; %bb.0:
7112; EG-NEXT:    ALU 0, @16, KC0[CB0:0-32], KC1[]
7113; EG-NEXT:    TEX 1 @12
7114; EG-NEXT:    ALU 65, @17, KC0[CB0:0-32], KC1[]
7115; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T12.X, 0
7116; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T20.X, 0
7117; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T18.X, 0
7118; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T17.X, 0
7119; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T16.X, 0
7120; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T15.X, 0
7121; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T14.X, 0
7122; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T13.X, 1
7123; EG-NEXT:    CF_END
7124; EG-NEXT:    Fetch clause starting at 12:
7125; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
7126; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
7127; EG-NEXT:    ALU clause starting at 16:
7128; EG-NEXT:     MOV * T11.X, KC0[2].Z,
7129; EG-NEXT:    ALU clause starting at 17:
7130; EG-NEXT:     LSHR T13.X, KC0[2].Y, literal.x,
7131; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7132; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
7133; EG-NEXT:     LSHR T14.X, PV.W, literal.x,
7134; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7135; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
7136; EG-NEXT:     LSHR T15.X, PV.W, literal.x,
7137; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7138; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
7139; EG-NEXT:     LSHR T16.X, PV.W, literal.x,
7140; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7141; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
7142; EG-NEXT:     LSHR T17.X, PV.W, literal.x,
7143; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7144; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
7145; EG-NEXT:     LSHR T18.X, PV.W, literal.x,
7146; EG-NEXT:     ADD_INT T0.W, KC0[2].Y, literal.y,
7147; EG-NEXT:     ASHR * T19.W, T11.X, literal.z,
7148; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
7149; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7150; EG-NEXT:     LSHR T20.X, PV.W, literal.x,
7151; EG-NEXT:     ASHR T19.Z, T11.X, literal.y,
7152; EG-NEXT:     ASHR * T21.W, T11.Y, literal.z,
7153; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
7154; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7155; EG-NEXT:     BFE_INT T19.X, T11.X, 0.0, literal.x,
7156; EG-NEXT:     ASHR T21.Z, T11.Y, literal.x,
7157; EG-NEXT:     ASHR * T22.W, T11.Z, literal.y,
7158; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7159; EG-NEXT:     BFE_INT T21.X, T11.Y, 0.0, literal.x,
7160; EG-NEXT:     ASHR T19.Y, PV.X, literal.y,
7161; EG-NEXT:     ASHR T22.Z, T11.Z, literal.x,
7162; EG-NEXT:     ASHR * T23.W, T11.W, literal.y,
7163; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7164; EG-NEXT:     BFE_INT T22.X, T11.Z, 0.0, literal.x,
7165; EG-NEXT:     ASHR T21.Y, PV.X, literal.y,
7166; EG-NEXT:     ASHR T23.Z, T11.W, literal.x,
7167; EG-NEXT:     ASHR * T24.W, T12.X, literal.y,
7168; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7169; EG-NEXT:     BFE_INT T23.X, T11.W, 0.0, literal.x,
7170; EG-NEXT:     ASHR T22.Y, PV.X, literal.y,
7171; EG-NEXT:     ASHR T24.Z, T12.X, literal.x,
7172; EG-NEXT:     ASHR * T11.W, T12.Y, literal.y,
7173; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7174; EG-NEXT:     BFE_INT T24.X, T12.X, 0.0, literal.x,
7175; EG-NEXT:     ASHR T23.Y, PV.X, literal.y,
7176; EG-NEXT:     ASHR T11.Z, T12.Y, literal.x,
7177; EG-NEXT:     ASHR * T25.W, T12.Z, literal.y,
7178; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7179; EG-NEXT:     BFE_INT T11.X, T12.Y, 0.0, literal.x,
7180; EG-NEXT:     ASHR T24.Y, PV.X, literal.y,
7181; EG-NEXT:     ASHR T25.Z, T12.Z, literal.x,
7182; EG-NEXT:     ASHR * T26.W, T12.W, literal.y,
7183; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7184; EG-NEXT:     BFE_INT T25.X, T12.Z, 0.0, literal.x,
7185; EG-NEXT:     ASHR T11.Y, PV.X, literal.y,
7186; EG-NEXT:     ASHR * T26.Z, T12.W, literal.x,
7187; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7188; EG-NEXT:     BFE_INT T26.X, T12.W, 0.0, literal.x,
7189; EG-NEXT:     ASHR T25.Y, PV.X, literal.y,
7190; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
7191; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7192; EG-NEXT:    112(1.569454e-43), 0(0.000000e+00)
7193; EG-NEXT:     LSHR T12.X, PV.W, literal.x,
7194; EG-NEXT:     ASHR * T26.Y, PV.X, literal.y,
7195; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
7196;
7197; CM-LABEL: global_sextload_v16i16_to_v16i64:
7198; CM:       ; %bb.0:
7199; CM-NEXT:    ALU 0, @16, KC0[CB0:0-32], KC1[]
7200; CM-NEXT:    TEX 1 @12
7201; CM-NEXT:    ALU 65, @17, KC0[CB0:0-32], KC1[]
7202; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T12, T26.X
7203; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T25, T20.X
7204; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T24, T18.X
7205; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T23, T17.X
7206; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T11, T16.X
7207; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T22, T15.X
7208; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T21, T14.X
7209; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T19, T13.X
7210; CM-NEXT:    CF_END
7211; CM-NEXT:    Fetch clause starting at 12:
7212; CM-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 0, #1
7213; CM-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 16, #1
7214; CM-NEXT:    ALU clause starting at 16:
7215; CM-NEXT:     MOV * T11.X, KC0[2].Z,
7216; CM-NEXT:    ALU clause starting at 17:
7217; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
7218; CM-NEXT:    112(1.569454e-43), 0(0.000000e+00)
7219; CM-NEXT:     LSHR T13.X, PV.W, literal.x,
7220; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7221; CM-NEXT:    2(2.802597e-45), 96(1.345247e-43)
7222; CM-NEXT:     LSHR T14.X, PV.W, literal.x,
7223; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7224; CM-NEXT:    2(2.802597e-45), 80(1.121039e-43)
7225; CM-NEXT:     LSHR T15.X, PV.W, literal.x,
7226; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7227; CM-NEXT:    2(2.802597e-45), 64(8.968310e-44)
7228; CM-NEXT:     LSHR T16.X, PV.W, literal.x,
7229; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7230; CM-NEXT:    2(2.802597e-45), 48(6.726233e-44)
7231; CM-NEXT:     LSHR T17.X, PV.W, literal.x,
7232; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7233; CM-NEXT:    2(2.802597e-45), 32(4.484155e-44)
7234; CM-NEXT:     LSHR T18.X, PV.W, literal.x,
7235; CM-NEXT:     ADD_INT T0.Z, KC0[2].Y, literal.y,
7236; CM-NEXT:     ASHR * T19.W, T11.W, literal.z,
7237; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
7238; CM-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7239; CM-NEXT:     LSHR T20.X, PV.Z, literal.x,
7240; CM-NEXT:     ASHR T19.Z, T11.W, literal.y,
7241; CM-NEXT:     ASHR * T21.W, T11.Z, literal.z,
7242; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
7243; CM-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7244; CM-NEXT:     BFE_INT T19.X, T11.W, 0.0, literal.x,
7245; CM-NEXT:     ASHR T21.Z, T11.Z, literal.x,
7246; CM-NEXT:     ASHR * T22.W, T11.Y, literal.y,
7247; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7248; CM-NEXT:     BFE_INT T21.X, T11.Z, 0.0, literal.x,
7249; CM-NEXT:     ASHR T19.Y, PV.X, literal.y,
7250; CM-NEXT:     ASHR T22.Z, T11.Y, literal.x,
7251; CM-NEXT:     ASHR * T11.W, T11.X, literal.y,
7252; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7253; CM-NEXT:     BFE_INT T22.X, T11.Y, 0.0, literal.x,
7254; CM-NEXT:     ASHR T21.Y, PV.X, literal.y,
7255; CM-NEXT:     ASHR T11.Z, T11.X, literal.x,
7256; CM-NEXT:     ASHR * T23.W, T12.W, literal.y,
7257; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7258; CM-NEXT:     BFE_INT T11.X, T11.X, 0.0, literal.x,
7259; CM-NEXT:     ASHR T22.Y, PV.X, literal.y,
7260; CM-NEXT:     ASHR T23.Z, T12.W, literal.x,
7261; CM-NEXT:     ASHR * T24.W, T12.Z, literal.y,
7262; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7263; CM-NEXT:     BFE_INT T23.X, T12.W, 0.0, literal.x,
7264; CM-NEXT:     ASHR T11.Y, PV.X, literal.y,
7265; CM-NEXT:     ASHR T24.Z, T12.Z, literal.x,
7266; CM-NEXT:     ASHR * T25.W, T12.Y, literal.y,
7267; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7268; CM-NEXT:     BFE_INT T24.X, T12.Z, 0.0, literal.x,
7269; CM-NEXT:     ASHR T23.Y, PV.X, literal.y,
7270; CM-NEXT:     ASHR T25.Z, T12.Y, literal.x,
7271; CM-NEXT:     ASHR * T12.W, T12.X, literal.y,
7272; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7273; CM-NEXT:     BFE_INT T25.X, T12.Y, 0.0, literal.x,
7274; CM-NEXT:     ASHR T24.Y, PV.X, literal.y,
7275; CM-NEXT:     ASHR * T12.Z, T12.X, literal.x,
7276; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7277; CM-NEXT:     BFE_INT T12.X, T12.X, 0.0, literal.x,
7278; CM-NEXT:     ASHR * T25.Y, PV.X, literal.y,
7279; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7280; CM-NEXT:     LSHR T26.X, KC0[2].Y, literal.x,
7281; CM-NEXT:     ASHR * T12.Y, PV.X, literal.y,
7282; CM-NEXT:    2(2.802597e-45), 31(4.344025e-44)
7283  %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
7284  %ext = sext <16 x i16> %load to <16 x i64>
7285  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
7286  ret void
7287}
7288
7289define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
7290; GCN-NOHSA-SI-LABEL: global_zextload_v32i16_to_v32i64:
7291; GCN-NOHSA-SI:       ; %bb.0:
7292; GCN-NOHSA-SI-NEXT:    s_mov_b32 s12, SCRATCH_RSRC_DWORD0
7293; GCN-NOHSA-SI-NEXT:    s_mov_b32 s13, SCRATCH_RSRC_DWORD1
7294; GCN-NOHSA-SI-NEXT:    s_mov_b32 s14, -1
7295; GCN-NOHSA-SI-NEXT:    s_mov_b32 s15, 0xe8f000
7296; GCN-NOHSA-SI-NEXT:    s_add_u32 s12, s12, s3
7297; GCN-NOHSA-SI-NEXT:    s_addc_u32 s13, s13, 0
7298; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
7299; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
7300; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
7301; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
7302; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s2
7303; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s3
7304; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
7305; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s6
7306; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s7
7307; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[2:5], off, s[8:11], 0
7308; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[6:9], off, s[8:11], 0 offset:16
7309; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[10:13], off, s[8:11], 0 offset:32
7310; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[14:17], off, s[8:11], 0 offset:48
7311; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(3)
7312; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v23, 16, v3
7313; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v21, 16, v4
7314; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v20, 16, v2
7315; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v18, 0xffff, v2
7316; GCN-NOHSA-SI-NEXT:    buffer_store_dword v18, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
7317; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
7318; GCN-NOHSA-SI-NEXT:    buffer_store_dword v19, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill
7319; GCN-NOHSA-SI-NEXT:    buffer_store_dword v20, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill
7320; GCN-NOHSA-SI-NEXT:    buffer_store_dword v21, off, s[12:15], 0 offset:16 ; 4-byte Folded Spill
7321; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(2)
7322; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v19, 0xffff, v4
7323; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
7324; GCN-NOHSA-SI-NEXT:    buffer_store_dword v19, off, s[12:15], 0 offset:20 ; 4-byte Folded Spill
7325; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
7326; GCN-NOHSA-SI-NEXT:    buffer_store_dword v20, off, s[12:15], 0 offset:24 ; 4-byte Folded Spill
7327; GCN-NOHSA-SI-NEXT:    buffer_store_dword v21, off, s[12:15], 0 offset:28 ; 4-byte Folded Spill
7328; GCN-NOHSA-SI-NEXT:    buffer_store_dword v22, off, s[12:15], 0 offset:32 ; 4-byte Folded Spill
7329; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v4, 0xffff, v3
7330; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v32, 16, v5
7331; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v30, 0xffff, v5
7332; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v36, 16, v6
7333; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v34, 0xffff, v6
7334; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v28, 16, v8
7335; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v26, 0xffff, v8
7336; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v40, 16, v7
7337; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v38, 0xffff, v7
7338; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v44, 16, v9
7339; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v42, 0xffff, v9
7340; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v48, 16, v10
7341; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v46, 0xffff, v10
7342; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v24, 16, v12
7343; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
7344; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v22, 0xffff, v12
7345; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v52, 16, v11
7346; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v50, 0xffff, v11
7347; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v56, 16, v13
7348; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v54, 0xffff, v13
7349; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v2, 16, v17
7350; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v60, 16, v14
7351; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v58, 0xffff, v14
7352; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v20, 16, v16
7353; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v18, 0xffff, v16
7354; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v10, 16, v15
7355; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v8, 0xffff, v15
7356; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v0, 0xffff, v17
7357; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v1
7358; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, v1
7359; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, v1
7360; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v55, v1
7361; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v57, v1
7362; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v51, v1
7363; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v53, v1
7364; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v43, v1
7365; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v45, v1
7366; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v39, v1
7367; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v41, v1
7368; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v31, v1
7369; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v33, v1
7370; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, v23
7371; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, v1
7372; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, v1
7373; GCN-NOHSA-SI-NEXT:    buffer_store_dword v4, off, s[12:15], 0 offset:36 ; 4-byte Folded Spill
7374; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
7375; GCN-NOHSA-SI-NEXT:    buffer_store_dword v5, off, s[12:15], 0 offset:40 ; 4-byte Folded Spill
7376; GCN-NOHSA-SI-NEXT:    buffer_store_dword v6, off, s[12:15], 0 offset:44 ; 4-byte Folded Spill
7377; GCN-NOHSA-SI-NEXT:    buffer_store_dword v7, off, s[12:15], 0 offset:48 ; 4-byte Folded Spill
7378; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, v1
7379; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v59, v1
7380; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v23, v1
7381; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v47, v1
7382; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v27, v1
7383; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v35, v1
7384; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
7385; GCN-NOHSA-SI-NEXT:    buffer_load_dword v4, off, s[12:15], 0 offset:20 ; 4-byte Folded Reload
7386; GCN-NOHSA-SI-NEXT:    buffer_load_dword v5, off, s[12:15], 0 offset:24 ; 4-byte Folded Reload
7387; GCN-NOHSA-SI-NEXT:    buffer_load_dword v6, off, s[12:15], 0 offset:28 ; 4-byte Folded Reload
7388; GCN-NOHSA-SI-NEXT:    buffer_load_dword v7, off, s[12:15], 0 offset:32 ; 4-byte Folded Reload
7389; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
7390; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, v1
7391; GCN-NOHSA-SI-NEXT:    buffer_load_dword v12, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
7392; GCN-NOHSA-SI-NEXT:    buffer_load_dword v13, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
7393; GCN-NOHSA-SI-NEXT:    buffer_load_dword v14, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
7394; GCN-NOHSA-SI-NEXT:    buffer_load_dword v15, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload
7395; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
7396; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, v1
7397; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s4
7398; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s5
7399; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
7400; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v21, 0
7401; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v61, 0
7402; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v25, 0
7403; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
7404; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, v12
7405; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, v13
7406; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, v14
7407; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, 0
7408; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, 0
7409; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v37, 0
7410; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v29, 0
7411; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v49, 0
7412; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
7413; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[54:57], off, s[0:3], 0 offset:176
7414; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[50:53], off, s[0:3], 0 offset:144
7415; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[42:45], off, s[0:3], 0 offset:112
7416; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[38:41], off, s[0:3], 0 offset:80
7417; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[30:33], off, s[0:3], 0 offset:48
7418; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(5)
7419; GCN-NOHSA-SI-NEXT:    buffer_load_dword v8, off, s[12:15], 0 offset:36 ; 4-byte Folded Reload
7420; GCN-NOHSA-SI-NEXT:    buffer_load_dword v9, off, s[12:15], 0 offset:40 ; 4-byte Folded Reload
7421; GCN-NOHSA-SI-NEXT:    buffer_load_dword v10, off, s[12:15], 0 offset:44 ; 4-byte Folded Reload
7422; GCN-NOHSA-SI-NEXT:    buffer_load_dword v11, off, s[12:15], 0 offset:48 ; 4-byte Folded Reload
7423; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
7424; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16
7425; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:224
7426; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[58:61], off, s[0:3], 0 offset:192
7427; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[22:25], off, s[0:3], 0 offset:160
7428; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[46:49], off, s[0:3], 0 offset:128
7429; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[26:29], off, s[0:3], 0 offset:96
7430; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[34:37], off, s[0:3], 0 offset:64
7431; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32
7432; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
7433; GCN-NOHSA-SI-NEXT:    s_endpgm
7434;
7435; GCN-HSA-LABEL: global_zextload_v32i16_to_v32i64:
7436; GCN-HSA:       ; %bb.0:
7437; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
7438; GCN-HSA-NEXT:    v_mov_b32_e32 v4, 0
7439; GCN-HSA-NEXT:    v_mov_b32_e32 v6, v4
7440; GCN-HSA-NEXT:    v_mov_b32_e32 v8, v4
7441; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
7442; GCN-HSA-NEXT:    s_add_u32 s4, s2, 16
7443; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
7444; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
7445; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
7446; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
7447; GCN-HSA-NEXT:    s_add_u32 s4, s2, 32
7448; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
7449; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s5
7450; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s4
7451; GCN-HSA-NEXT:    flat_load_dwordx4 v[9:12], v[9:10]
7452; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s3
7453; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s2
7454; GCN-HSA-NEXT:    s_add_u32 s2, s2, 48
7455; GCN-HSA-NEXT:    s_addc_u32 s3, s3, 0
7456; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s3
7457; GCN-HSA-NEXT:    flat_load_dwordx4 v[13:16], v[13:14]
7458; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s2
7459; GCN-HSA-NEXT:    flat_load_dwordx4 v[17:20], v[17:18]
7460; GCN-HSA-NEXT:    s_add_u32 s4, s0, 48
7461; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
7462; GCN-HSA-NEXT:    s_add_u32 s6, s0, 16
7463; GCN-HSA-NEXT:    s_addc_u32 s7, s1, 0
7464; GCN-HSA-NEXT:    s_add_u32 s8, s0, 0xf0
7465; GCN-HSA-NEXT:    s_addc_u32 s9, s1, 0
7466; GCN-HSA-NEXT:    s_add_u32 s10, s0, 0xd0
7467; GCN-HSA-NEXT:    s_addc_u32 s11, s1, 0
7468; GCN-HSA-NEXT:    s_add_u32 s12, s0, 0xb0
7469; GCN-HSA-NEXT:    s_addc_u32 s13, s1, 0
7470; GCN-HSA-NEXT:    s_add_u32 s14, s0, 0x90
7471; GCN-HSA-NEXT:    s_addc_u32 s15, s1, 0
7472; GCN-HSA-NEXT:    s_add_u32 s16, s0, 0x70
7473; GCN-HSA-NEXT:    s_addc_u32 s17, s1, 0
7474; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s17
7475; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x50
7476; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s16
7477; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
7478; GCN-HSA-NEXT:    s_waitcnt vmcnt(3)
7479; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
7480; GCN-HSA-NEXT:    v_and_b32_e32 v5, 0xffff, v3
7481; GCN-HSA-NEXT:    flat_store_dwordx4 v[21:22], v[5:8]
7482; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s3
7483; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
7484; GCN-HSA-NEXT:    v_and_b32_e32 v5, 0xffff, v1
7485; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s2
7486; GCN-HSA-NEXT:    flat_store_dwordx4 v[21:22], v[5:8]
7487; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s13
7488; GCN-HSA-NEXT:    s_waitcnt vmcnt(4)
7489; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v7, 16, v12
7490; GCN-HSA-NEXT:    v_and_b32_e32 v5, 0xffff, v12
7491; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s12
7492; GCN-HSA-NEXT:    flat_store_dwordx4 v[21:22], v[5:8]
7493; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s15
7494; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v7, 16, v10
7495; GCN-HSA-NEXT:    v_and_b32_e32 v5, 0xffff, v10
7496; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s14
7497; GCN-HSA-NEXT:    flat_store_dwordx4 v[21:22], v[5:8]
7498; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s7
7499; GCN-HSA-NEXT:    s_waitcnt vmcnt(5)
7500; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v7, 16, v14
7501; GCN-HSA-NEXT:    v_and_b32_e32 v5, 0xffff, v14
7502; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s6
7503; GCN-HSA-NEXT:    flat_store_dwordx4 v[21:22], v[5:8]
7504; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
7505; GCN-HSA-NEXT:    s_waitcnt vmcnt(5)
7506; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v7, 16, v20
7507; GCN-HSA-NEXT:    v_and_b32_e32 v5, 0xffff, v20
7508; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s9
7509; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s8
7510; GCN-HSA-NEXT:    flat_store_dwordx4 v[20:21], v[5:8]
7511; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s11
7512; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v7, 16, v18
7513; GCN-HSA-NEXT:    v_and_b32_e32 v5, 0xffff, v18
7514; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s10
7515; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
7516; GCN-HSA-NEXT:    flat_store_dwordx4 v[20:21], v[5:8]
7517; GCN-HSA-NEXT:    v_and_b32_e32 v3, 0xffff, v16
7518; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v7, 16, v15
7519; GCN-HSA-NEXT:    v_and_b32_e32 v5, 0xffff, v15
7520; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s3
7521; GCN-HSA-NEXT:    v_mov_b32_e32 v8, 0
7522; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s2
7523; GCN-HSA-NEXT:    flat_store_dwordx4 v[14:15], v[5:8]
7524; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xe0
7525; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v7, 16, v13
7526; GCN-HSA-NEXT:    v_and_b32_e32 v5, 0xffff, v13
7527; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s1
7528; GCN-HSA-NEXT:    v_mov_b32_e32 v8, 0
7529; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s0
7530; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
7531; GCN-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[5:8]
7532; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s3
7533; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s2
7534; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xc0
7535; GCN-HSA-NEXT:    v_mov_b32_e32 v8, 0
7536; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v7, 16, v19
7537; GCN-HSA-NEXT:    v_and_b32_e32 v5, 0xffff, v19
7538; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
7539; GCN-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[5:8]
7540; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s3
7541; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s2
7542; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xa0
7543; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v7, 16, v17
7544; GCN-HSA-NEXT:    v_and_b32_e32 v5, 0xffff, v17
7545; GCN-HSA-NEXT:    v_mov_b32_e32 v8, 0
7546; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
7547; GCN-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[5:8]
7548; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v19, 16, v9
7549; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v7, 16, v11
7550; GCN-HSA-NEXT:    v_and_b32_e32 v5, 0xffff, v11
7551; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s3
7552; GCN-HSA-NEXT:    v_mov_b32_e32 v8, 0
7553; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s2
7554; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[5:8]
7555; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v10, 16, v0
7556; GCN-HSA-NEXT:    v_and_b32_e32 v8, 0xffff, v0
7557; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
7558; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v5, 16, v16
7559; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
7560; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x80
7561; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[3:6]
7562; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
7563; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
7564; GCN-HSA-NEXT:    v_and_b32_e32 v17, 0xffff, v9
7565; GCN-HSA-NEXT:    v_mov_b32_e32 v20, 0
7566; GCN-HSA-NEXT:    v_mov_b32_e32 v18, v4
7567; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
7568; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x60
7569; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[17:20]
7570; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
7571; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
7572; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v14, 16, v2
7573; GCN-HSA-NEXT:    v_and_b32_e32 v12, 0xffff, v2
7574; GCN-HSA-NEXT:    v_mov_b32_e32 v15, 0
7575; GCN-HSA-NEXT:    v_mov_b32_e32 v13, v4
7576; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
7577; GCN-HSA-NEXT:    s_add_u32 s0, s0, 64
7578; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[12:15]
7579; GCN-HSA-NEXT:    s_addc_u32 s1, s1, 0
7580; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
7581; GCN-HSA-NEXT:    v_mov_b32_e32 v11, 0
7582; GCN-HSA-NEXT:    v_mov_b32_e32 v9, v4
7583; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
7584; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
7585; GCN-HSA-NEXT:    s_endpgm
7586;
7587; GCN-NOHSA-VI-LABEL: global_zextload_v32i16_to_v32i64:
7588; GCN-NOHSA-VI:       ; %bb.0:
7589; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
7590; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
7591; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
7592; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
7593; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
7594; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
7595; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
7596; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
7597; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
7598; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
7599; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[30:33], off, s[8:11], 0 offset:32
7600; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[34:37], off, s[8:11], 0 offset:48
7601; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v57, 0
7602; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
7603; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
7604; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v55, 0
7605; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v53, v57
7606; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v46, v57
7607; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v48, v57
7608; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v28, v57
7609; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v9, v57
7610; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v11, v57
7611; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v19, 0
7612; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v15, 0
7613; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v50, v57
7614; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v13, v57
7615; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v17, v57
7616; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v41, 0
7617; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v23, 0
7618; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v43, v57
7619; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v39, v57
7620; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v59, v57
7621; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v25, v57
7622; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v21, v57
7623; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(3)
7624; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v10, 16, v1
7625; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(1)
7626; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v40, 16, v30
7627; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v38, 0xffff, v30
7628; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v44, 16, v32
7629; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v42, 0xffff, v32
7630; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v32, 16, v31
7631; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v30, 0xffff, v31
7632; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v47, 16, v33
7633; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v45, 0xffff, v33
7634; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
7635; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v54, 16, v36
7636; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v52, 0xffff, v36
7637; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v31, v57
7638; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v33, v57
7639; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v14, 16, v2
7640; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v18, 16, v0
7641; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v16, 0xffff, v0
7642; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v12, 0xffff, v2
7643; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v8, 0xffff, v1
7644; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v2, 16, v3
7645; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, 0xffff, v3
7646; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v22, 16, v4
7647; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v20, 0xffff, v4
7648; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v26, 16, v6
7649; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v24, 0xffff, v6
7650; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v6, 16, v5
7651; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v4, 0xffff, v5
7652; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v29, 16, v7
7653; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v27, 0xffff, v7
7654; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v51, 16, v34
7655; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v49, 0xffff, v34
7656; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v36, 16, v35
7657; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v34, 0xffff, v35
7658; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v58, 16, v37
7659; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v56, 0xffff, v37
7660; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v35, v57
7661; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v37, v57
7662; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[30:33], off, s[0:3], 0 offset:144
7663; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, v57
7664; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v30, v57
7665; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v7, v57
7666; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, v57
7667; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, v57
7668; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[52:55], off, s[0:3], 0 offset:224
7669; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[34:37], off, s[0:3], 0 offset:208
7670; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v52, 0
7671; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[45:48], off, s[0:3], 0 offset:176
7672; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[27:30], off, s[0:3], 0 offset:112
7673; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80
7674; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
7675; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16
7676; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v45, 0
7677; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v27, 0
7678; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[49:52], off, s[0:3], 0 offset:192
7679; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[42:45], off, s[0:3], 0 offset:160
7680; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[38:41], off, s[0:3], 0 offset:128
7681; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[56:59], off, s[0:3], 0 offset:240
7682; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:96
7683; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:64
7684; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32
7685; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0
7686; GCN-NOHSA-VI-NEXT:    s_endpgm
7687;
7688; EG-LABEL: global_zextload_v32i16_to_v32i64:
7689; EG:       ; %bb.0:
7690; EG-NEXT:    ALU 0, @30, KC0[CB0:0-32], KC1[]
7691; EG-NEXT:    TEX 2 @22
7692; EG-NEXT:    ALU 33, @31, KC0[], KC1[]
7693; EG-NEXT:    TEX 0 @28
7694; EG-NEXT:    ALU 93, @65, KC0[CB0:0-32], KC1[]
7695; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T50.X, 0
7696; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T49.X, 0
7697; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T48.X, 0
7698; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T47.X, 0
7699; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T46.X, 0
7700; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T45.X, 0
7701; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T44.X, 0
7702; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T43.X, 0
7703; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T42.X, 0
7704; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T41.X, 0
7705; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T40.X, 0
7706; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T39.X, 0
7707; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T38.X, 0
7708; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T37.X, 0
7709; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T36.X, 0
7710; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T35.X, 1
7711; EG-NEXT:    CF_END
7712; EG-NEXT:    Fetch clause starting at 22:
7713; EG-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 48, #1
7714; EG-NEXT:     VTX_READ_128 T21.XYZW, T19.X, 16, #1
7715; EG-NEXT:     VTX_READ_128 T22.XYZW, T19.X, 32, #1
7716; EG-NEXT:    Fetch clause starting at 28:
7717; EG-NEXT:     VTX_READ_128 T29.XYZW, T19.X, 0, #1
7718; EG-NEXT:    ALU clause starting at 30:
7719; EG-NEXT:     MOV * T19.X, KC0[2].Z,
7720; EG-NEXT:    ALU clause starting at 31:
7721; EG-NEXT:     LSHR * T23.Z, T20.Z, literal.x,
7722; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7723; EG-NEXT:     AND_INT T23.X, T20.Z, literal.x,
7724; EG-NEXT:     MOV T23.Y, 0.0,
7725; EG-NEXT:     LSHR T24.Z, T20.W, literal.y,
7726; EG-NEXT:     AND_INT * T24.X, T20.W, literal.x,
7727; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7728; EG-NEXT:     MOV T24.Y, 0.0,
7729; EG-NEXT:     LSHR * T25.Z, T20.X, literal.x,
7730; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7731; EG-NEXT:     AND_INT T25.X, T20.X, literal.x,
7732; EG-NEXT:     MOV T25.Y, 0.0,
7733; EG-NEXT:     LSHR T20.Z, T20.Y, literal.y,
7734; EG-NEXT:     AND_INT * T20.X, T20.Y, literal.x,
7735; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7736; EG-NEXT:     MOV T20.Y, 0.0,
7737; EG-NEXT:     LSHR * T26.Z, T22.Z, literal.x,
7738; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7739; EG-NEXT:     AND_INT T26.X, T22.Z, literal.x,
7740; EG-NEXT:     MOV T26.Y, 0.0,
7741; EG-NEXT:     LSHR T27.Z, T22.W, literal.y,
7742; EG-NEXT:     AND_INT * T27.X, T22.W, literal.x,
7743; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7744; EG-NEXT:     MOV T27.Y, 0.0,
7745; EG-NEXT:     LSHR * T28.Z, T22.X, literal.x,
7746; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7747; EG-NEXT:     AND_INT T28.X, T22.X, literal.x,
7748; EG-NEXT:     MOV T28.Y, 0.0,
7749; EG-NEXT:     LSHR T22.Z, T22.Y, literal.y,
7750; EG-NEXT:     AND_INT * T22.X, T22.Y, literal.x,
7751; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7752; EG-NEXT:     MOV T22.Y, 0.0,
7753; EG-NEXT:     LSHR * T19.Z, T21.Z, literal.x,
7754; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7755; EG-NEXT:    ALU clause starting at 65:
7756; EG-NEXT:     AND_INT T19.X, T21.Z, literal.x,
7757; EG-NEXT:     MOV T19.Y, 0.0,
7758; EG-NEXT:     LSHR T30.Z, T21.W, literal.y,
7759; EG-NEXT:     AND_INT * T30.X, T21.W, literal.x,
7760; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7761; EG-NEXT:     MOV T30.Y, 0.0,
7762; EG-NEXT:     LSHR * T31.Z, T21.X, literal.x,
7763; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7764; EG-NEXT:     AND_INT T31.X, T21.X, literal.x,
7765; EG-NEXT:     MOV T31.Y, 0.0,
7766; EG-NEXT:     LSHR T21.Z, T21.Y, literal.y,
7767; EG-NEXT:     AND_INT * T21.X, T21.Y, literal.x,
7768; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7769; EG-NEXT:     MOV T21.Y, 0.0,
7770; EG-NEXT:     LSHR * T32.Z, T29.Z, literal.x,
7771; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7772; EG-NEXT:     AND_INT T32.X, T29.Z, literal.x,
7773; EG-NEXT:     MOV T32.Y, 0.0,
7774; EG-NEXT:     LSHR T33.Z, T29.W, literal.y,
7775; EG-NEXT:     AND_INT * T33.X, T29.W, literal.x,
7776; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7777; EG-NEXT:     MOV T33.Y, 0.0,
7778; EG-NEXT:     LSHR * T34.Z, T29.X, literal.x,
7779; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7780; EG-NEXT:     AND_INT T34.X, T29.X, literal.x,
7781; EG-NEXT:     MOV T34.Y, 0.0,
7782; EG-NEXT:     LSHR T29.Z, T29.Y, literal.y,
7783; EG-NEXT:     AND_INT * T29.X, T29.Y, literal.x,
7784; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7785; EG-NEXT:     MOV T29.Y, 0.0,
7786; EG-NEXT:     MOV T23.W, 0.0,
7787; EG-NEXT:     MOV * T24.W, 0.0,
7788; EG-NEXT:     MOV T25.W, 0.0,
7789; EG-NEXT:     MOV * T20.W, 0.0,
7790; EG-NEXT:     MOV T26.W, 0.0,
7791; EG-NEXT:     MOV * T27.W, 0.0,
7792; EG-NEXT:     MOV T28.W, 0.0,
7793; EG-NEXT:     MOV * T22.W, 0.0,
7794; EG-NEXT:     MOV T19.W, 0.0,
7795; EG-NEXT:     MOV * T30.W, 0.0,
7796; EG-NEXT:     MOV T31.W, 0.0,
7797; EG-NEXT:     MOV * T21.W, 0.0,
7798; EG-NEXT:     MOV T32.W, 0.0,
7799; EG-NEXT:     MOV * T33.W, 0.0,
7800; EG-NEXT:     MOV T34.W, 0.0,
7801; EG-NEXT:     MOV * T29.W, 0.0,
7802; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
7803; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7804; EG-NEXT:     LSHR T35.X, PV.W, literal.x,
7805; EG-NEXT:     LSHR * T36.X, KC0[2].Y, literal.x,
7806; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
7807; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
7808; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
7809; EG-NEXT:     LSHR T37.X, PV.W, literal.x,
7810; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7811; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
7812; EG-NEXT:     LSHR T38.X, PV.W, literal.x,
7813; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7814; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
7815; EG-NEXT:     LSHR T39.X, PV.W, literal.x,
7816; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7817; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
7818; EG-NEXT:     LSHR T40.X, PV.W, literal.x,
7819; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7820; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
7821; EG-NEXT:     LSHR T41.X, PV.W, literal.x,
7822; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7823; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
7824; EG-NEXT:     LSHR T42.X, PV.W, literal.x,
7825; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7826; EG-NEXT:    2(2.802597e-45), 144(2.017870e-43)
7827; EG-NEXT:     LSHR T43.X, PV.W, literal.x,
7828; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7829; EG-NEXT:    2(2.802597e-45), 128(1.793662e-43)
7830; EG-NEXT:     LSHR T44.X, PV.W, literal.x,
7831; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7832; EG-NEXT:    2(2.802597e-45), 176(2.466285e-43)
7833; EG-NEXT:     LSHR T45.X, PV.W, literal.x,
7834; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7835; EG-NEXT:    2(2.802597e-45), 160(2.242078e-43)
7836; EG-NEXT:     LSHR T46.X, PV.W, literal.x,
7837; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7838; EG-NEXT:    2(2.802597e-45), 208(2.914701e-43)
7839; EG-NEXT:     LSHR T47.X, PV.W, literal.x,
7840; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7841; EG-NEXT:    2(2.802597e-45), 192(2.690493e-43)
7842; EG-NEXT:     LSHR T48.X, PV.W, literal.x,
7843; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7844; EG-NEXT:    2(2.802597e-45), 240(3.363116e-43)
7845; EG-NEXT:     LSHR T49.X, PV.W, literal.x,
7846; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7847; EG-NEXT:    2(2.802597e-45), 224(3.138909e-43)
7848; EG-NEXT:     LSHR * T50.X, PV.W, literal.x,
7849; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
7850;
7851; CM-LABEL: global_zextload_v32i16_to_v32i64:
7852; CM:       ; %bb.0:
7853; CM-NEXT:    ALU 0, @30, KC0[CB0:0-32], KC1[]
7854; CM-NEXT:    TEX 2 @22
7855; CM-NEXT:    ALU 33, @31, KC0[], KC1[]
7856; CM-NEXT:    TEX 0 @28
7857; CM-NEXT:    ALU 94, @65, KC0[CB0:0-32], KC1[]
7858; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T23, T50.X
7859; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T24, T49.X
7860; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T25, T48.X
7861; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T26, T47.X
7862; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T20, T46.X
7863; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T27, T45.X
7864; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T28, T44.X
7865; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T29, T43.X
7866; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T19, T42.X
7867; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T30, T41.X
7868; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T31, T40.X
7869; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T32, T39.X
7870; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T21, T38.X
7871; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T33, T37.X
7872; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T34, T36.X
7873; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T35, T22.X
7874; CM-NEXT:    CF_END
7875; CM-NEXT:    Fetch clause starting at 22:
7876; CM-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 0, #1
7877; CM-NEXT:     VTX_READ_128 T21.XYZW, T19.X, 32, #1
7878; CM-NEXT:     VTX_READ_128 T22.XYZW, T19.X, 16, #1
7879; CM-NEXT:    Fetch clause starting at 28:
7880; CM-NEXT:     VTX_READ_128 T22.XYZW, T19.X, 48, #1
7881; CM-NEXT:    ALU clause starting at 30:
7882; CM-NEXT:     MOV * T19.X, KC0[2].Z,
7883; CM-NEXT:    ALU clause starting at 31:
7884; CM-NEXT:     LSHR * T23.Z, T20.Y, literal.x,
7885; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7886; CM-NEXT:     AND_INT T23.X, T20.Y, literal.x,
7887; CM-NEXT:     MOV T23.Y, 0.0,
7888; CM-NEXT:     LSHR * T24.Z, T20.X, literal.y,
7889; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7890; CM-NEXT:     AND_INT T24.X, T20.X, literal.x,
7891; CM-NEXT:     MOV T24.Y, 0.0,
7892; CM-NEXT:     LSHR * T25.Z, T20.W, literal.y,
7893; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7894; CM-NEXT:     AND_INT T25.X, T20.W, literal.x,
7895; CM-NEXT:     MOV T25.Y, 0.0,
7896; CM-NEXT:     LSHR * T26.Z, T20.Z, literal.y,
7897; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7898; CM-NEXT:     AND_INT T26.X, T20.Z, literal.x,
7899; CM-NEXT:     MOV T26.Y, 0.0,
7900; CM-NEXT:     LSHR * T20.Z, T22.Y, literal.y,
7901; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7902; CM-NEXT:     AND_INT T20.X, T22.Y, literal.x,
7903; CM-NEXT:     MOV T20.Y, 0.0,
7904; CM-NEXT:     LSHR * T27.Z, T22.X, literal.y,
7905; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7906; CM-NEXT:     AND_INT T27.X, T22.X, literal.x,
7907; CM-NEXT:     MOV T27.Y, 0.0,
7908; CM-NEXT:     LSHR * T28.Z, T22.W, literal.y,
7909; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7910; CM-NEXT:     AND_INT T28.X, T22.W, literal.x,
7911; CM-NEXT:     MOV T28.Y, 0.0,
7912; CM-NEXT:     LSHR * T29.Z, T22.Z, literal.y,
7913; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7914; CM-NEXT:     AND_INT T29.X, T22.Z, literal.x,
7915; CM-NEXT:     MOV T29.Y, 0.0,
7916; CM-NEXT:     LSHR * T19.Z, T21.Y, literal.y,
7917; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7918; CM-NEXT:    ALU clause starting at 65:
7919; CM-NEXT:     AND_INT T19.X, T21.Y, literal.x,
7920; CM-NEXT:     MOV T19.Y, 0.0,
7921; CM-NEXT:     LSHR * T30.Z, T21.X, literal.y,
7922; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7923; CM-NEXT:     AND_INT T30.X, T21.X, literal.x,
7924; CM-NEXT:     MOV T30.Y, 0.0,
7925; CM-NEXT:     LSHR * T31.Z, T21.W, literal.y,
7926; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7927; CM-NEXT:     AND_INT T31.X, T21.W, literal.x,
7928; CM-NEXT:     MOV T31.Y, 0.0,
7929; CM-NEXT:     LSHR * T32.Z, T21.Z, literal.y,
7930; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7931; CM-NEXT:     AND_INT T32.X, T21.Z, literal.x,
7932; CM-NEXT:     MOV T32.Y, 0.0,
7933; CM-NEXT:     LSHR * T21.Z, T22.Y, literal.y,
7934; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7935; CM-NEXT:     AND_INT T21.X, T22.Y, literal.x,
7936; CM-NEXT:     MOV T21.Y, 0.0,
7937; CM-NEXT:     LSHR * T33.Z, T22.X, literal.y,
7938; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7939; CM-NEXT:     AND_INT T33.X, T22.X, literal.x,
7940; CM-NEXT:     MOV T33.Y, 0.0,
7941; CM-NEXT:     LSHR * T34.Z, T22.W, literal.y,
7942; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7943; CM-NEXT:     AND_INT T34.X, T22.W, literal.x,
7944; CM-NEXT:     MOV T34.Y, 0.0,
7945; CM-NEXT:     LSHR * T35.Z, T22.Z, literal.y,
7946; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7947; CM-NEXT:     AND_INT T35.X, T22.Z, literal.x,
7948; CM-NEXT:     MOV T35.Y, 0.0,
7949; CM-NEXT:     MOV * T23.W, 0.0,
7950; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
7951; CM-NEXT:     MOV * T24.W, 0.0,
7952; CM-NEXT:     MOV * T25.W, 0.0,
7953; CM-NEXT:     MOV * T26.W, 0.0,
7954; CM-NEXT:     MOV * T20.W, 0.0,
7955; CM-NEXT:     MOV * T27.W, 0.0,
7956; CM-NEXT:     MOV * T28.W, 0.0,
7957; CM-NEXT:     MOV * T29.W, 0.0,
7958; CM-NEXT:     MOV * T19.W, 0.0,
7959; CM-NEXT:     MOV * T30.W, 0.0,
7960; CM-NEXT:     MOV * T31.W, 0.0,
7961; CM-NEXT:     MOV * T32.W, 0.0,
7962; CM-NEXT:     MOV * T21.W, 0.0,
7963; CM-NEXT:     MOV * T33.W, 0.0,
7964; CM-NEXT:     MOV * T34.W, 0.0,
7965; CM-NEXT:     MOV * T35.W, 0.0,
7966; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
7967; CM-NEXT:    224(3.138909e-43), 0(0.000000e+00)
7968; CM-NEXT:     LSHR T22.X, PV.W, literal.x,
7969; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7970; CM-NEXT:    2(2.802597e-45), 240(3.363116e-43)
7971; CM-NEXT:     LSHR T36.X, PV.W, literal.x,
7972; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7973; CM-NEXT:    2(2.802597e-45), 192(2.690493e-43)
7974; CM-NEXT:     LSHR T37.X, PV.W, literal.x,
7975; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7976; CM-NEXT:    2(2.802597e-45), 208(2.914701e-43)
7977; CM-NEXT:     LSHR T38.X, PV.W, literal.x,
7978; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7979; CM-NEXT:    2(2.802597e-45), 160(2.242078e-43)
7980; CM-NEXT:     LSHR T39.X, PV.W, literal.x,
7981; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7982; CM-NEXT:    2(2.802597e-45), 176(2.466285e-43)
7983; CM-NEXT:     LSHR T40.X, PV.W, literal.x,
7984; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7985; CM-NEXT:    2(2.802597e-45), 128(1.793662e-43)
7986; CM-NEXT:     LSHR T41.X, PV.W, literal.x,
7987; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7988; CM-NEXT:    2(2.802597e-45), 144(2.017870e-43)
7989; CM-NEXT:     LSHR T42.X, PV.W, literal.x,
7990; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7991; CM-NEXT:    2(2.802597e-45), 96(1.345247e-43)
7992; CM-NEXT:     LSHR T43.X, PV.W, literal.x,
7993; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7994; CM-NEXT:    2(2.802597e-45), 112(1.569454e-43)
7995; CM-NEXT:     LSHR T44.X, PV.W, literal.x,
7996; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7997; CM-NEXT:    2(2.802597e-45), 64(8.968310e-44)
7998; CM-NEXT:     LSHR T45.X, PV.W, literal.x,
7999; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8000; CM-NEXT:    2(2.802597e-45), 80(1.121039e-43)
8001; CM-NEXT:     LSHR T46.X, PV.W, literal.x,
8002; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8003; CM-NEXT:    2(2.802597e-45), 32(4.484155e-44)
8004; CM-NEXT:     LSHR T47.X, PV.W, literal.x,
8005; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8006; CM-NEXT:    2(2.802597e-45), 48(6.726233e-44)
8007; CM-NEXT:     LSHR * T48.X, PV.W, literal.x,
8008; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
8009; CM-NEXT:     LSHR T49.X, KC0[2].Y, literal.x,
8010; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8011; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
8012; CM-NEXT:     LSHR * T50.X, PV.W, literal.x,
8013; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
8014  %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
8015  %ext = zext <32 x i16> %load to <32 x i64>
8016  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
8017  ret void
8018}
8019
8020define amdgpu_kernel void @global_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
8021; GCN-NOHSA-SI-LABEL: global_sextload_v32i16_to_v32i64:
8022; GCN-NOHSA-SI:       ; %bb.0:
8023; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
8024; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
8025; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
8026; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s2
8027; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s3
8028; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
8029; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s6
8030; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s7
8031; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48
8032; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:32
8033; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s4
8034; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s5
8035; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[8:11], 0
8036; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
8037; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(3)
8038; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, v15
8039; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v16, v16, 0, 16
8040; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[18:19], v[14:15], 48
8041; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v17, 31, v16
8042; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:240
8043; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
8044; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[17:18], v[12:13], 48
8045; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v15, v13, 0, 16
8046; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v16, 31, v15
8047; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:208
8048; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(4)
8049; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, v3
8050; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
8051; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v15, v13, 0, 16
8052; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[17:18], v[2:3], 48
8053; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v16, 31, v15
8054; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:176
8055; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
8056; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[17:18], v[0:1], 48
8057; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v15, v1, 0, 16
8058; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v16, 31, v15
8059; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:144
8060; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(4)
8061; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, v7
8062; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
8063; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v15, v1, 0, 16
8064; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[17:18], v[6:7], 48
8065; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v16, 31, v15
8066; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:112
8067; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
8068; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[17:18], v[4:5], 48
8069; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v15, v5, 0, 16
8070; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v16, 31, v15
8071; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:80
8072; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, v11
8073; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
8074; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v15, v1, 0, 16
8075; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[17:18], v[10:11], 48
8076; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v16, 31, v15
8077; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:48
8078; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
8079; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[17:18], v[8:9], 48
8080; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v15, v9, 0, 16
8081; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v16, 31, v15
8082; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:16
8083; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v14
8084; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
8085; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v15, v1, 0, 16
8086; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v13, v14, 0, 16
8087; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v14, 31, v13
8088; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v16, 31, v15
8089; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:224
8090; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v12
8091; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
8092; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v11, v12, 0, 16
8093; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
8094; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v13, v1, 0, 16
8095; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v9, 16, v4
8096; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v12, 31, v11
8097; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v14, 31, v13
8098; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:192
8099; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v7, 16, v10
8100; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
8101; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v13, v3, 0, 16
8102; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v11, v2, 0, 16
8103; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v12, 31, v11
8104; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v14, 31, v13
8105; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:160
8106; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v8
8107; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v3, v1, 0, 16
8108; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v1, v8, 0, 16
8109; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v5, v10, 0, 16
8110; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v7, v7, 0, 16
8111; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
8112; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v11, v9, 0, 16
8113; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v9, v4, 0, 16
8114; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v2, 16, v6
8115; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v13, v6, 0, 16
8116; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v15, v2, 0, 16
8117; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
8118; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v17, v0, 0, 16
8119; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v19, v2, 0, 16
8120; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
8121; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
8122; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v10, 31, v9
8123; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v14, 31, v13
8124; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v18, 31, v17
8125; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
8126; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v8, 31, v7
8127; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v12, 31, v11
8128; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v16, 31, v15
8129; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v20, 31, v19
8130; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[17:20], off, s[0:3], 0 offset:128
8131; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:96
8132; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:64
8133; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[5:8], off, s[0:3], 0 offset:32
8134; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[1:4], off, s[0:3], 0
8135; GCN-NOHSA-SI-NEXT:    s_endpgm
8136;
8137; GCN-HSA-LABEL: global_sextload_v32i16_to_v32i64:
8138; GCN-HSA:       ; %bb.0:
8139; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
8140; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
8141; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
8142; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
8143; GCN-HSA-NEXT:    flat_load_dwordx4 v[8:11], v[0:1]
8144; GCN-HSA-NEXT:    s_add_u32 s4, s2, 48
8145; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
8146; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
8147; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
8148; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
8149; GCN-HSA-NEXT:    s_add_u32 s4, s2, 32
8150; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
8151; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
8152; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
8153; GCN-HSA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
8154; GCN-HSA-NEXT:    s_add_u32 s2, s2, 16
8155; GCN-HSA-NEXT:    s_addc_u32 s3, s3, 0
8156; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s3
8157; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s2
8158; GCN-HSA-NEXT:    flat_load_dwordx4 v[12:15], v[12:13]
8159; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
8160; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
8161; GCN-HSA-NEXT:    s_add_u32 s4, s0, 16
8162; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
8163; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s5
8164; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s4
8165; GCN-HSA-NEXT:    s_waitcnt vmcnt(3)
8166; GCN-HSA-NEXT:    v_ashr_i64 v[18:19], v[8:9], 48
8167; GCN-HSA-NEXT:    v_bfe_i32 v16, v9, 0, 16
8168; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v17, 31, v16
8169; GCN-HSA-NEXT:    flat_store_dwordx4 v[20:21], v[16:19]
8170; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s3
8171; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s2
8172; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xf0
8173; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
8174; GCN-HSA-NEXT:    s_add_u32 s4, s0, 0xd0
8175; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
8176; GCN-HSA-NEXT:    s_add_u32 s6, s0, 0xb0
8177; GCN-HSA-NEXT:    s_addc_u32 s7, s1, 0
8178; GCN-HSA-NEXT:    s_add_u32 s8, s0, 0x90
8179; GCN-HSA-NEXT:    s_addc_u32 s9, s1, 0
8180; GCN-HSA-NEXT:    s_add_u32 s10, s0, 0x70
8181; GCN-HSA-NEXT:    s_addc_u32 s11, s1, 0
8182; GCN-HSA-NEXT:    v_mov_b32_e32 v9, v11
8183; GCN-HSA-NEXT:    s_add_u32 s12, s0, 0x50
8184; GCN-HSA-NEXT:    v_bfe_i32 v16, v9, 0, 16
8185; GCN-HSA-NEXT:    v_ashr_i64 v[18:19], v[10:11], 48
8186; GCN-HSA-NEXT:    s_addc_u32 s13, s1, 0
8187; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v17, 31, v16
8188; GCN-HSA-NEXT:    s_add_u32 s14, s0, 32
8189; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v9, 16, v10
8190; GCN-HSA-NEXT:    flat_store_dwordx4 v[20:21], v[16:19]
8191; GCN-HSA-NEXT:    s_addc_u32 s15, s1, 0
8192; GCN-HSA-NEXT:    v_bfe_i32 v18, v9, 0, 16
8193; GCN-HSA-NEXT:    v_bfe_i32 v16, v10, 0, 16
8194; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s14
8195; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v17, 31, v16
8196; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v19, 31, v18
8197; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s15
8198; GCN-HSA-NEXT:    flat_store_dwordx4 v[9:10], v[16:19]
8199; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v9, 16, v8
8200; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s1
8201; GCN-HSA-NEXT:    v_bfe_i32 v8, v8, 0, 16
8202; GCN-HSA-NEXT:    v_bfe_i32 v10, v9, 0, 16
8203; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s0
8204; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
8205; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v11, 31, v10
8206; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
8207; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s5
8208; GCN-HSA-NEXT:    s_waitcnt vmcnt(6)
8209; GCN-HSA-NEXT:    v_ashr_i64 v[10:11], v[0:1], 48
8210; GCN-HSA-NEXT:    v_bfe_i32 v8, v1, 0, 16
8211; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s4
8212; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
8213; GCN-HSA-NEXT:    v_mov_b32_e32 v1, v3
8214; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s3
8215; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
8216; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s2
8217; GCN-HSA-NEXT:    v_bfe_i32 v8, v1, 0, 16
8218; GCN-HSA-NEXT:    v_ashr_i64 v[10:11], v[2:3], 48
8219; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
8220; GCN-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[8:11]
8221; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s9
8222; GCN-HSA-NEXT:    s_waitcnt vmcnt(7)
8223; GCN-HSA-NEXT:    v_ashr_i64 v[10:11], v[4:5], 48
8224; GCN-HSA-NEXT:    v_bfe_i32 v8, v5, 0, 16
8225; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s8
8226; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
8227; GCN-HSA-NEXT:    v_mov_b32_e32 v1, v7
8228; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s7
8229; GCN-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[8:11]
8230; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s6
8231; GCN-HSA-NEXT:    v_bfe_i32 v8, v1, 0, 16
8232; GCN-HSA-NEXT:    v_ashr_i64 v[10:11], v[6:7], 48
8233; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
8234; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
8235; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s13
8236; GCN-HSA-NEXT:    s_waitcnt vmcnt(8)
8237; GCN-HSA-NEXT:    v_ashr_i64 v[9:10], v[12:13], 48
8238; GCN-HSA-NEXT:    v_bfe_i32 v7, v13, 0, 16
8239; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s12
8240; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v8, 31, v7
8241; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v15
8242; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s11
8243; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[7:10]
8244; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s10
8245; GCN-HSA-NEXT:    v_bfe_i32 v7, v3, 0, 16
8246; GCN-HSA-NEXT:    v_ashr_i64 v[9:10], v[14:15], 48
8247; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
8248; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v8, 31, v7
8249; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xe0
8250; GCN-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[7:10]
8251; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
8252; GCN-HSA-NEXT:    v_bfe_i32 v7, v2, 0, 16
8253; GCN-HSA-NEXT:    v_bfe_i32 v9, v1, 0, 16
8254; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s2
8255; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s3
8256; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xc0
8257; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v8, 31, v7
8258; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v10, 31, v9
8259; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
8260; GCN-HSA-NEXT:    flat_store_dwordx4 v[1:2], v[7:10]
8261; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v5, 16, v6
8262; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s3
8263; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s2
8264; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xa0
8265; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
8266; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v13, 16, v4
8267; GCN-HSA-NEXT:    v_bfe_i32 v11, v4, 0, 16
8268; GCN-HSA-NEXT:    v_bfe_i32 v17, v5, 0, 16
8269; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
8270; GCN-HSA-NEXT:    v_bfe_i32 v15, v6, 0, 16
8271; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
8272; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x80
8273; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
8274; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v16, 31, v15
8275; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v18, 31, v17
8276; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
8277; GCN-HSA-NEXT:    v_bfe_i32 v0, v0, 0, 16
8278; GCN-HSA-NEXT:    v_bfe_i32 v2, v3, 0, 16
8279; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[15:18]
8280; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
8281; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
8282; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
8283; GCN-HSA-NEXT:    v_bfe_i32 v13, v13, 0, 16
8284; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
8285; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x60
8286; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v9, 16, v14
8287; GCN-HSA-NEXT:    flat_store_dwordx4 v[7:8], v[0:3]
8288; GCN-HSA-NEXT:    v_bfe_i32 v7, v14, 0, 16
8289; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v2, 16, v12
8290; GCN-HSA-NEXT:    v_bfe_i32 v0, v12, 0, 16
8291; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v12, 31, v11
8292; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v14, 31, v13
8293; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
8294; GCN-HSA-NEXT:    v_bfe_i32 v9, v9, 0, 16
8295; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[11:14]
8296; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
8297; GCN-HSA-NEXT:    s_add_u32 s0, s0, 64
8298; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v8, 31, v7
8299; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v10, 31, v9
8300; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
8301; GCN-HSA-NEXT:    s_addc_u32 s1, s1, 0
8302; GCN-HSA-NEXT:    v_bfe_i32 v2, v2, 0, 16
8303; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[7:10]
8304; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
8305; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
8306; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
8307; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
8308; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8309; GCN-HSA-NEXT:    s_endpgm
8310;
8311; GCN-NOHSA-VI-LABEL: global_sextload_v32i16_to_v32i64:
8312; GCN-NOHSA-VI:       ; %bb.0:
8313; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
8314; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
8315; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
8316; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
8317; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
8318; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
8319; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
8320; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
8321; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48
8322; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:32
8323; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
8324; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[8:11], 0
8325; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
8326; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
8327; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(3)
8328; GCN-NOHSA-VI-NEXT:    v_ashrrev_i64 v[18:19], 48, v[12:13]
8329; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v16, v13, 0, 16
8330; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v17, 31, v16
8331; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:208
8332; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v13, v14, 0, 16
8333; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(3)
8334; GCN-NOHSA-VI-NEXT:    v_ashrrev_i64 v[18:19], 48, v[0:1]
8335; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v16, v1, 0, 16
8336; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v17, 31, v16
8337; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:144
8338; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, v15
8339; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(3)
8340; GCN-NOHSA-VI-NEXT:    v_ashrrev_i64 v[18:19], 48, v[4:5]
8341; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v16, v5, 0, 16
8342; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v17, 31, v16
8343; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:80
8344; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(3)
8345; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v5, v10, 0, 16
8346; GCN-NOHSA-VI-NEXT:    v_ashrrev_i64 v[18:19], 48, v[8:9]
8347; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v16, v9, 0, 16
8348; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v17, 31, v16
8349; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16
8350; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v9, 16, v4
8351; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v16, v1, 0, 16
8352; GCN-NOHSA-VI-NEXT:    v_ashrrev_i64 v[18:19], 48, v[14:15]
8353; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v17, 31, v16
8354; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, v3
8355; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:240
8356; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v15, v1, 0, 16
8357; GCN-NOHSA-VI-NEXT:    v_ashrrev_i64 v[17:18], 48, v[2:3]
8358; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v16, 31, v15
8359; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, v7
8360; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:176
8361; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
8362; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v15, v1, 0, 16
8363; GCN-NOHSA-VI-NEXT:    v_ashrrev_i64 v[17:18], 48, v[6:7]
8364; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v16, 31, v15
8365; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, v11
8366; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:112
8367; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v7, 16, v10
8368; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v15, v1, 0, 16
8369; GCN-NOHSA-VI-NEXT:    v_ashrrev_i64 v[17:18], 48, v[10:11]
8370; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v16, 31, v15
8371; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v14
8372; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:48
8373; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v14, 31, v13
8374; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v15, v1, 0, 16
8375; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v16, 31, v15
8376; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v12
8377; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:224
8378; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v11, v12, 0, 16
8379; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v13, v1, 0, 16
8380; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v12, 31, v11
8381; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v14, 31, v13
8382; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:192
8383; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v8
8384; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v11, v2, 0, 16
8385; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v2, 16, v6
8386; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v13, v3, 0, 16
8387; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v15, v2, 0, 16
8388; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
8389; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v12, 31, v11
8390; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v14, 31, v13
8391; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v17, v0, 0, 16
8392; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v19, v2, 0, 16
8393; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:160
8394; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v3, v1, 0, 16
8395; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v1, v8, 0, 16
8396; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v7, v7, 0, 16
8397; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v11, v9, 0, 16
8398; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v9, v4, 0, 16
8399; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v13, v6, 0, 16
8400; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v18, 31, v17
8401; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v20, 31, v19
8402; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
8403; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
8404; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v10, 31, v9
8405; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v14, 31, v13
8406; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
8407; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v8, 31, v7
8408; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v12, 31, v11
8409; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v16, 31, v15
8410; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[17:20], off, s[0:3], 0 offset:128
8411; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:96
8412; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:64
8413; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[5:8], off, s[0:3], 0 offset:32
8414; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[1:4], off, s[0:3], 0
8415; GCN-NOHSA-VI-NEXT:    s_endpgm
8416;
8417; EG-LABEL: global_sextload_v32i16_to_v32i64:
8418; EG:       ; %bb.0:
8419; EG-NEXT:    ALU 0, @30, KC0[CB0:0-32], KC1[]
8420; EG-NEXT:    TEX 0 @22
8421; EG-NEXT:    ALU 56, @31, KC0[CB0:0-32], KC1[]
8422; EG-NEXT:    TEX 2 @24
8423; EG-NEXT:    ALU 74, @88, KC0[CB0:0-32], KC1[]
8424; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T38.X, 0
8425; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T36.X, 0
8426; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T34.X, 0
8427; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T33.X, 0
8428; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T32.X, 0
8429; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T31.X, 0
8430; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T30.X, 0
8431; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T29.X, 0
8432; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T28.X, 0
8433; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T27.X, 0
8434; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T26.X, 0
8435; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T25.X, 0
8436; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T24.X, 0
8437; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T23.X, 0
8438; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T22.X, 0
8439; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T21.X, 1
8440; EG-NEXT:    CF_END
8441; EG-NEXT:    Fetch clause starting at 22:
8442; EG-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 0, #1
8443; EG-NEXT:    Fetch clause starting at 24:
8444; EG-NEXT:     VTX_READ_128 T38.XYZW, T19.X, 48, #1
8445; EG-NEXT:     VTX_READ_128 T39.XYZW, T19.X, 32, #1
8446; EG-NEXT:     VTX_READ_128 T40.XYZW, T19.X, 16, #1
8447; EG-NEXT:    ALU clause starting at 30:
8448; EG-NEXT:     MOV * T19.X, KC0[2].Z,
8449; EG-NEXT:    ALU clause starting at 31:
8450; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
8451; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
8452; EG-NEXT:     LSHR T21.X, PV.W, literal.x,
8453; EG-NEXT:     LSHR * T22.X, KC0[2].Y, literal.x,
8454; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
8455; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
8456; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
8457; EG-NEXT:     LSHR T23.X, PV.W, literal.x,
8458; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8459; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
8460; EG-NEXT:     LSHR T24.X, PV.W, literal.x,
8461; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8462; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
8463; EG-NEXT:     LSHR T25.X, PV.W, literal.x,
8464; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8465; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
8466; EG-NEXT:     LSHR T26.X, PV.W, literal.x,
8467; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8468; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
8469; EG-NEXT:     LSHR T27.X, PV.W, literal.x,
8470; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8471; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
8472; EG-NEXT:     LSHR T28.X, PV.W, literal.x,
8473; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8474; EG-NEXT:    2(2.802597e-45), 144(2.017870e-43)
8475; EG-NEXT:     LSHR T29.X, PV.W, literal.x,
8476; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8477; EG-NEXT:    2(2.802597e-45), 128(1.793662e-43)
8478; EG-NEXT:     LSHR T30.X, PV.W, literal.x,
8479; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8480; EG-NEXT:    2(2.802597e-45), 176(2.466285e-43)
8481; EG-NEXT:     LSHR T31.X, PV.W, literal.x,
8482; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8483; EG-NEXT:    2(2.802597e-45), 160(2.242078e-43)
8484; EG-NEXT:     LSHR T32.X, PV.W, literal.x,
8485; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8486; EG-NEXT:    2(2.802597e-45), 208(2.914701e-43)
8487; EG-NEXT:     LSHR T33.X, PV.W, literal.x,
8488; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8489; EG-NEXT:    2(2.802597e-45), 192(2.690493e-43)
8490; EG-NEXT:     LSHR T34.X, PV.W, literal.x,
8491; EG-NEXT:     ADD_INT T0.W, KC0[2].Y, literal.y,
8492; EG-NEXT:     ASHR * T35.W, T20.Y, literal.z,
8493; EG-NEXT:    2(2.802597e-45), 240(3.363116e-43)
8494; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8495; EG-NEXT:     LSHR T36.X, PV.W, literal.x,
8496; EG-NEXT:     ASHR T35.Z, T20.Y, literal.y,
8497; EG-NEXT:     ASHR * T37.W, T20.X, literal.z,
8498; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
8499; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8500; EG-NEXT:     BFE_INT T35.X, T20.Y, 0.0, literal.x,
8501; EG-NEXT:     ASHR * T37.Z, T20.X, literal.x,
8502; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
8503; EG-NEXT:     BFE_INT T37.X, T20.X, 0.0, literal.x,
8504; EG-NEXT:     ASHR T35.Y, PV.X, literal.y,
8505; EG-NEXT:     ASHR * T19.W, T20.W, literal.y,
8506; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8507; EG-NEXT:    ALU clause starting at 88:
8508; EG-NEXT:     ASHR T19.Z, T20.W, literal.x,
8509; EG-NEXT:     ASHR * T41.W, T20.Z, literal.y,
8510; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8511; EG-NEXT:     BFE_INT T19.X, T20.W, 0.0, literal.x,
8512; EG-NEXT:     ASHR T37.Y, T37.X, literal.y,
8513; EG-NEXT:     ASHR T41.Z, T20.Z, literal.x,
8514; EG-NEXT:     ASHR * T20.W, T40.Y, literal.y,
8515; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8516; EG-NEXT:     BFE_INT T41.X, T20.Z, 0.0, literal.x,
8517; EG-NEXT:     ASHR T19.Y, PV.X, literal.y,
8518; EG-NEXT:     ASHR T20.Z, T40.Y, literal.x,
8519; EG-NEXT:     ASHR * T42.W, T40.X, literal.y,
8520; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8521; EG-NEXT:     BFE_INT T20.X, T40.Y, 0.0, literal.x,
8522; EG-NEXT:     ASHR T41.Y, PV.X, literal.y,
8523; EG-NEXT:     ASHR T42.Z, T40.X, literal.x,
8524; EG-NEXT:     ASHR * T43.W, T40.W, literal.y,
8525; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8526; EG-NEXT:     BFE_INT T42.X, T40.X, 0.0, literal.x,
8527; EG-NEXT:     ASHR T20.Y, PV.X, literal.y,
8528; EG-NEXT:     ASHR T43.Z, T40.W, literal.x,
8529; EG-NEXT:     ASHR * T44.W, T40.Z, literal.y,
8530; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8531; EG-NEXT:     BFE_INT T43.X, T40.W, 0.0, literal.x,
8532; EG-NEXT:     ASHR T42.Y, PV.X, literal.y,
8533; EG-NEXT:     ASHR T44.Z, T40.Z, literal.x,
8534; EG-NEXT:     ASHR * T40.W, T39.Y, literal.y,
8535; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8536; EG-NEXT:     BFE_INT T44.X, T40.Z, 0.0, literal.x,
8537; EG-NEXT:     ASHR T43.Y, PV.X, literal.y,
8538; EG-NEXT:     ASHR T40.Z, T39.Y, literal.x,
8539; EG-NEXT:     ASHR * T45.W, T39.X, literal.y,
8540; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8541; EG-NEXT:     BFE_INT T40.X, T39.Y, 0.0, literal.x,
8542; EG-NEXT:     ASHR T44.Y, PV.X, literal.y,
8543; EG-NEXT:     ASHR T45.Z, T39.X, literal.x,
8544; EG-NEXT:     ASHR * T46.W, T39.W, literal.y,
8545; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8546; EG-NEXT:     BFE_INT T45.X, T39.X, 0.0, literal.x,
8547; EG-NEXT:     ASHR T40.Y, PV.X, literal.y,
8548; EG-NEXT:     ASHR T46.Z, T39.W, literal.x,
8549; EG-NEXT:     ASHR * T47.W, T39.Z, literal.y,
8550; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8551; EG-NEXT:     BFE_INT T46.X, T39.W, 0.0, literal.x,
8552; EG-NEXT:     ASHR T45.Y, PV.X, literal.y,
8553; EG-NEXT:     ASHR T47.Z, T39.Z, literal.x,
8554; EG-NEXT:     ASHR * T39.W, T38.Y, literal.y,
8555; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8556; EG-NEXT:     BFE_INT T47.X, T39.Z, 0.0, literal.x,
8557; EG-NEXT:     ASHR T46.Y, PV.X, literal.y,
8558; EG-NEXT:     ASHR T39.Z, T38.Y, literal.x,
8559; EG-NEXT:     ASHR * T48.W, T38.X, literal.y,
8560; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8561; EG-NEXT:     BFE_INT T39.X, T38.Y, 0.0, literal.x,
8562; EG-NEXT:     ASHR T47.Y, PV.X, literal.y,
8563; EG-NEXT:     ASHR T48.Z, T38.X, literal.x,
8564; EG-NEXT:     ASHR * T49.W, T38.W, literal.y,
8565; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8566; EG-NEXT:     BFE_INT T48.X, T38.X, 0.0, literal.x,
8567; EG-NEXT:     ASHR T39.Y, PV.X, literal.y,
8568; EG-NEXT:     ASHR T49.Z, T38.W, literal.x,
8569; EG-NEXT:     ASHR * T50.W, T38.Z, literal.y,
8570; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8571; EG-NEXT:     BFE_INT T49.X, T38.W, 0.0, literal.x,
8572; EG-NEXT:     ASHR T48.Y, PV.X, literal.y,
8573; EG-NEXT:     ASHR * T50.Z, T38.Z, literal.x,
8574; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8575; EG-NEXT:     BFE_INT T50.X, T38.Z, 0.0, literal.x,
8576; EG-NEXT:     ASHR T49.Y, PV.X, literal.y,
8577; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
8578; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8579; EG-NEXT:    224(3.138909e-43), 0(0.000000e+00)
8580; EG-NEXT:     LSHR T38.X, PV.W, literal.x,
8581; EG-NEXT:     ASHR * T50.Y, PV.X, literal.y,
8582; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
8583;
8584; CM-LABEL: global_sextload_v32i16_to_v32i64:
8585; CM:       ; %bb.0:
8586; CM-NEXT:    ALU 0, @30, KC0[CB0:0-32], KC1[]
8587; CM-NEXT:    TEX 0 @22
8588; CM-NEXT:    ALU 55, @31, KC0[CB0:0-32], KC1[]
8589; CM-NEXT:    TEX 2 @24
8590; CM-NEXT:    ALU 73, @87, KC0[CB0:0-32], KC1[]
8591; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T38, T50.X
8592; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T49, T36.X
8593; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T48, T34.X
8594; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T47, T33.X
8595; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T39, T32.X
8596; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T46, T31.X
8597; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T45, T30.X
8598; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T44, T29.X
8599; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T40, T28.X
8600; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T43, T27.X
8601; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T42, T26.X
8602; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T41, T25.X
8603; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T20, T24.X
8604; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T19, T23.X
8605; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T37, T22.X
8606; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T35, T21.X
8607; CM-NEXT:    CF_END
8608; CM-NEXT:    Fetch clause starting at 22:
8609; CM-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 48, #1
8610; CM-NEXT:    Fetch clause starting at 24:
8611; CM-NEXT:     VTX_READ_128 T38.XYZW, T19.X, 0, #1
8612; CM-NEXT:     VTX_READ_128 T39.XYZW, T19.X, 16, #1
8613; CM-NEXT:     VTX_READ_128 T40.XYZW, T19.X, 32, #1
8614; CM-NEXT:    ALU clause starting at 30:
8615; CM-NEXT:     MOV * T19.X, KC0[2].Z,
8616; CM-NEXT:    ALU clause starting at 31:
8617; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
8618; CM-NEXT:    224(3.138909e-43), 0(0.000000e+00)
8619; CM-NEXT:     LSHR T21.X, PV.W, literal.x,
8620; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8621; CM-NEXT:    2(2.802597e-45), 240(3.363116e-43)
8622; CM-NEXT:     LSHR T22.X, PV.W, literal.x,
8623; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8624; CM-NEXT:    2(2.802597e-45), 192(2.690493e-43)
8625; CM-NEXT:     LSHR T23.X, PV.W, literal.x,
8626; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8627; CM-NEXT:    2(2.802597e-45), 208(2.914701e-43)
8628; CM-NEXT:     LSHR T24.X, PV.W, literal.x,
8629; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8630; CM-NEXT:    2(2.802597e-45), 160(2.242078e-43)
8631; CM-NEXT:     LSHR T25.X, PV.W, literal.x,
8632; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8633; CM-NEXT:    2(2.802597e-45), 176(2.466285e-43)
8634; CM-NEXT:     LSHR T26.X, PV.W, literal.x,
8635; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8636; CM-NEXT:    2(2.802597e-45), 128(1.793662e-43)
8637; CM-NEXT:     LSHR T27.X, PV.W, literal.x,
8638; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8639; CM-NEXT:    2(2.802597e-45), 144(2.017870e-43)
8640; CM-NEXT:     LSHR T28.X, PV.W, literal.x,
8641; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8642; CM-NEXT:    2(2.802597e-45), 96(1.345247e-43)
8643; CM-NEXT:     LSHR T29.X, PV.W, literal.x,
8644; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8645; CM-NEXT:    2(2.802597e-45), 112(1.569454e-43)
8646; CM-NEXT:     LSHR T30.X, PV.W, literal.x,
8647; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8648; CM-NEXT:    2(2.802597e-45), 64(8.968310e-44)
8649; CM-NEXT:     LSHR T31.X, PV.W, literal.x,
8650; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8651; CM-NEXT:    2(2.802597e-45), 80(1.121039e-43)
8652; CM-NEXT:     LSHR T32.X, PV.W, literal.x,
8653; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8654; CM-NEXT:    2(2.802597e-45), 32(4.484155e-44)
8655; CM-NEXT:     LSHR T33.X, PV.W, literal.x,
8656; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8657; CM-NEXT:    2(2.802597e-45), 48(6.726233e-44)
8658; CM-NEXT:     LSHR T34.X, PV.W, literal.x,
8659; CM-NEXT:     ASHR * T35.W, T20.Z, literal.y,
8660; CM-NEXT:    2(2.802597e-45), 31(4.344025e-44)
8661; CM-NEXT:     LSHR T36.X, KC0[2].Y, literal.x,
8662; CM-NEXT:     ASHR T35.Z, T20.Z, literal.y,
8663; CM-NEXT:     ASHR * T37.W, T20.W, literal.z,
8664; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
8665; CM-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8666; CM-NEXT:     BFE_INT T35.X, T20.Z, 0.0, literal.x,
8667; CM-NEXT:     ASHR * T37.Z, T20.W, literal.x,
8668; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
8669; CM-NEXT:     BFE_INT T37.X, T20.W, 0.0, literal.x,
8670; CM-NEXT:     ASHR T35.Y, PV.X, literal.y,
8671; CM-NEXT:     ASHR * T19.W, T20.X, literal.y,
8672; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8673; CM-NEXT:    ALU clause starting at 87:
8674; CM-NEXT:     ASHR T19.Z, T20.X, literal.x,
8675; CM-NEXT:     ASHR * T20.W, T20.Y, literal.y,
8676; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8677; CM-NEXT:     BFE_INT T19.X, T20.X, 0.0, literal.x,
8678; CM-NEXT:     ASHR T37.Y, T37.X, literal.y, BS:VEC_120/SCL_212
8679; CM-NEXT:     ASHR T20.Z, T20.Y, literal.x,
8680; CM-NEXT:     ASHR * T41.W, T40.Z, literal.y,
8681; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8682; CM-NEXT:     BFE_INT T20.X, T20.Y, 0.0, literal.x,
8683; CM-NEXT:     ASHR T19.Y, PV.X, literal.y,
8684; CM-NEXT:     ASHR T41.Z, T40.Z, literal.x,
8685; CM-NEXT:     ASHR * T42.W, T40.W, literal.y,
8686; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8687; CM-NEXT:     BFE_INT T41.X, T40.Z, 0.0, literal.x,
8688; CM-NEXT:     ASHR T20.Y, PV.X, literal.y,
8689; CM-NEXT:     ASHR T42.Z, T40.W, literal.x,
8690; CM-NEXT:     ASHR * T43.W, T40.X, literal.y,
8691; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8692; CM-NEXT:     BFE_INT T42.X, T40.W, 0.0, literal.x,
8693; CM-NEXT:     ASHR T41.Y, PV.X, literal.y,
8694; CM-NEXT:     ASHR T43.Z, T40.X, literal.x,
8695; CM-NEXT:     ASHR * T40.W, T40.Y, literal.y,
8696; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8697; CM-NEXT:     BFE_INT T43.X, T40.X, 0.0, literal.x,
8698; CM-NEXT:     ASHR T42.Y, PV.X, literal.y,
8699; CM-NEXT:     ASHR T40.Z, T40.Y, literal.x,
8700; CM-NEXT:     ASHR * T44.W, T39.Z, literal.y,
8701; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8702; CM-NEXT:     BFE_INT T40.X, T40.Y, 0.0, literal.x,
8703; CM-NEXT:     ASHR T43.Y, PV.X, literal.y,
8704; CM-NEXT:     ASHR T44.Z, T39.Z, literal.x,
8705; CM-NEXT:     ASHR * T45.W, T39.W, literal.y,
8706; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8707; CM-NEXT:     BFE_INT T44.X, T39.Z, 0.0, literal.x,
8708; CM-NEXT:     ASHR T40.Y, PV.X, literal.y,
8709; CM-NEXT:     ASHR T45.Z, T39.W, literal.x,
8710; CM-NEXT:     ASHR * T46.W, T39.X, literal.y,
8711; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8712; CM-NEXT:     BFE_INT T45.X, T39.W, 0.0, literal.x,
8713; CM-NEXT:     ASHR T44.Y, PV.X, literal.y,
8714; CM-NEXT:     ASHR T46.Z, T39.X, literal.x,
8715; CM-NEXT:     ASHR * T39.W, T39.Y, literal.y,
8716; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8717; CM-NEXT:     BFE_INT T46.X, T39.X, 0.0, literal.x,
8718; CM-NEXT:     ASHR T45.Y, PV.X, literal.y,
8719; CM-NEXT:     ASHR T39.Z, T39.Y, literal.x,
8720; CM-NEXT:     ASHR * T47.W, T38.Z, literal.y,
8721; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8722; CM-NEXT:     BFE_INT T39.X, T39.Y, 0.0, literal.x,
8723; CM-NEXT:     ASHR T46.Y, PV.X, literal.y,
8724; CM-NEXT:     ASHR T47.Z, T38.Z, literal.x,
8725; CM-NEXT:     ASHR * T48.W, T38.W, literal.y,
8726; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8727; CM-NEXT:     BFE_INT T47.X, T38.Z, 0.0, literal.x,
8728; CM-NEXT:     ASHR T39.Y, PV.X, literal.y,
8729; CM-NEXT:     ASHR T48.Z, T38.W, literal.x,
8730; CM-NEXT:     ASHR * T49.W, T38.X, literal.y,
8731; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8732; CM-NEXT:     BFE_INT T48.X, T38.W, 0.0, literal.x,
8733; CM-NEXT:     ASHR T47.Y, PV.X, literal.y,
8734; CM-NEXT:     ASHR T49.Z, T38.X, literal.x,
8735; CM-NEXT:     ASHR * T38.W, T38.Y, literal.y,
8736; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8737; CM-NEXT:     BFE_INT T49.X, T38.X, 0.0, literal.x,
8738; CM-NEXT:     ASHR T48.Y, PV.X, literal.y,
8739; CM-NEXT:     ASHR * T38.Z, T38.Y, literal.x,
8740; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8741; CM-NEXT:     BFE_INT T38.X, T38.Y, 0.0, literal.x,
8742; CM-NEXT:     ASHR T49.Y, PV.X, literal.y,
8743; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
8744; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8745; CM-NEXT:     LSHR T50.X, PV.W, literal.x,
8746; CM-NEXT:     ASHR * T38.Y, PV.X, literal.y,
8747; CM-NEXT:    2(2.802597e-45), 31(4.344025e-44)
8748  %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
8749  %ext = sext <32 x i16> %load to <32 x i64>
8750  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
8751  ret void
8752}
8753
8754; define amdgpu_kernel void @global_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
8755;   %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
8756;   %ext = zext <64 x i16> %load to <64 x i64>
8757;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
8758;   ret void
8759; }
8760
8761; define amdgpu_kernel void @global_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
8762;   %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
8763;   %ext = sext <64 x i16> %load to <64 x i64>
8764;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
8765;   ret void
8766; }
8767
8768attributes #0 = { nounwind }
8769