1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
3; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
4; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
5
6define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
7; GFX9-LABEL: sample_1d:
8; GFX9:       ; %bb.0: ; %main_body
9; GFX9-NEXT:    s_mov_b64 s[12:13], exec
10; GFX9-NEXT:    s_wqm_b64 exec, exec
11; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
12; GFX9-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
13; GFX9-NEXT:    s_waitcnt vmcnt(0)
14; GFX9-NEXT:    ; return to shader part epilog
15;
16; GFX10-LABEL: sample_1d:
17; GFX10:       ; %bb.0: ; %main_body
18; GFX10-NEXT:    s_mov_b32 s12, exec_lo
19; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
20; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
21; GFX10-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
22; GFX10-NEXT:    s_waitcnt vmcnt(0)
23; GFX10-NEXT:    ; return to shader part epilog
24main_body:
25  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
26  ret <4 x float> %v
27}
28
29define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
30; GFX9-LABEL: sample_2d:
31; GFX9:       ; %bb.0: ; %main_body
32; GFX9-NEXT:    s_mov_b64 s[12:13], exec
33; GFX9-NEXT:    s_wqm_b64 exec, exec
34; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
35; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
36; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
37; GFX9-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
38; GFX9-NEXT:    s_waitcnt vmcnt(0)
39; GFX9-NEXT:    ; return to shader part epilog
40;
41; GFX10-LABEL: sample_2d:
42; GFX10:       ; %bb.0: ; %main_body
43; GFX10-NEXT:    s_mov_b32 s12, exec_lo
44; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
45; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
46; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
47; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
48; GFX10-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
49; GFX10-NEXT:    s_waitcnt vmcnt(0)
50; GFX10-NEXT:    ; return to shader part epilog
51main_body:
52  %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
53  ret <4 x float> %v
54}
55
56define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
57; GFX9-LABEL: sample_3d:
58; GFX9:       ; %bb.0: ; %main_body
59; GFX9-NEXT:    s_mov_b64 s[12:13], exec
60; GFX9-NEXT:    s_wqm_b64 exec, exec
61; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
62; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
63; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
64; GFX9-NEXT:    image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16
65; GFX9-NEXT:    s_waitcnt vmcnt(0)
66; GFX9-NEXT:    ; return to shader part epilog
67;
68; GFX10-LABEL: sample_3d:
69; GFX10:       ; %bb.0: ; %main_body
70; GFX10-NEXT:    s_mov_b32 s12, exec_lo
71; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
72; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
73; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
74; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
75; GFX10-NEXT:    image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
76; GFX10-NEXT:    s_waitcnt vmcnt(0)
77; GFX10-NEXT:    ; return to shader part epilog
78main_body:
79  %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
80  ret <4 x float> %v
81}
82
83define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
84; GFX9-LABEL: sample_cube:
85; GFX9:       ; %bb.0: ; %main_body
86; GFX9-NEXT:    s_mov_b64 s[12:13], exec
87; GFX9-NEXT:    s_wqm_b64 exec, exec
88; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
89; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
90; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
91; GFX9-NEXT:    image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 da
92; GFX9-NEXT:    s_waitcnt vmcnt(0)
93; GFX9-NEXT:    ; return to shader part epilog
94;
95; GFX10-LABEL: sample_cube:
96; GFX10:       ; %bb.0: ; %main_body
97; GFX10-NEXT:    s_mov_b32 s12, exec_lo
98; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
99; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
100; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
101; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
102; GFX10-NEXT:    image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE a16
103; GFX10-NEXT:    s_waitcnt vmcnt(0)
104; GFX10-NEXT:    ; return to shader part epilog
105main_body:
106  %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
107  ret <4 x float> %v
108}
109
110define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
111; GFX9-LABEL: sample_1darray:
112; GFX9:       ; %bb.0: ; %main_body
113; GFX9-NEXT:    s_mov_b64 s[12:13], exec
114; GFX9-NEXT:    s_wqm_b64 exec, exec
115; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
116; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
117; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
118; GFX9-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 da
119; GFX9-NEXT:    s_waitcnt vmcnt(0)
120; GFX9-NEXT:    ; return to shader part epilog
121;
122; GFX10-LABEL: sample_1darray:
123; GFX10:       ; %bb.0: ; %main_body
124; GFX10-NEXT:    s_mov_b32 s12, exec_lo
125; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
126; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
127; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
128; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
129; GFX10-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY a16
130; GFX10-NEXT:    s_waitcnt vmcnt(0)
131; GFX10-NEXT:    ; return to shader part epilog
132main_body:
133  %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half %s, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
134  ret <4 x float> %v
135}
136
137define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
138; GFX9-LABEL: sample_2darray:
139; GFX9:       ; %bb.0: ; %main_body
140; GFX9-NEXT:    s_mov_b64 s[12:13], exec
141; GFX9-NEXT:    s_wqm_b64 exec, exec
142; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
143; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
144; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
145; GFX9-NEXT:    image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 da
146; GFX9-NEXT:    s_waitcnt vmcnt(0)
147; GFX9-NEXT:    ; return to shader part epilog
148;
149; GFX10-LABEL: sample_2darray:
150; GFX10:       ; %bb.0: ; %main_body
151; GFX10-NEXT:    s_mov_b32 s12, exec_lo
152; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
153; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
154; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
155; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
156; GFX10-NEXT:    image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY a16
157; GFX10-NEXT:    s_waitcnt vmcnt(0)
158; GFX10-NEXT:    ; return to shader part epilog
159main_body:
160  %v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
161  ret <4 x float> %v
162}
163
164define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
165; GFX9-LABEL: sample_c_1d:
166; GFX9:       ; %bb.0: ; %main_body
167; GFX9-NEXT:    s_mov_b64 s[12:13], exec
168; GFX9-NEXT:    s_wqm_b64 exec, exec
169; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
170; GFX9-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
171; GFX9-NEXT:    s_waitcnt vmcnt(0)
172; GFX9-NEXT:    ; return to shader part epilog
173;
174; GFX10-LABEL: sample_c_1d:
175; GFX10:       ; %bb.0: ; %main_body
176; GFX10-NEXT:    s_mov_b32 s12, exec_lo
177; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
178; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
179; GFX10-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
180; GFX10-NEXT:    s_waitcnt vmcnt(0)
181; GFX10-NEXT:    ; return to shader part epilog
182main_body:
183  %v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
184  ret <4 x float> %v
185}
186
187define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
188; GFX9-LABEL: sample_c_2d:
189; GFX9:       ; %bb.0: ; %main_body
190; GFX9-NEXT:    s_mov_b64 s[12:13], exec
191; GFX9-NEXT:    s_wqm_b64 exec, exec
192; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
193; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
194; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
195; GFX9-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
196; GFX9-NEXT:    s_waitcnt vmcnt(0)
197; GFX9-NEXT:    ; return to shader part epilog
198;
199; GFX10-LABEL: sample_c_2d:
200; GFX10:       ; %bb.0: ; %main_body
201; GFX10-NEXT:    s_mov_b32 s12, exec_lo
202; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
203; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
204; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
205; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
206; GFX10-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
207; GFX10-NEXT:    s_waitcnt vmcnt(0)
208; GFX10-NEXT:    ; return to shader part epilog
209main_body:
210  %v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
211  ret <4 x float> %v
212}
213
214define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) {
215; GFX9-LABEL: sample_cl_1d:
216; GFX9:       ; %bb.0: ; %main_body
217; GFX9-NEXT:    s_mov_b64 s[12:13], exec
218; GFX9-NEXT:    s_wqm_b64 exec, exec
219; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
220; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
221; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
222; GFX9-NEXT:    image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
223; GFX9-NEXT:    s_waitcnt vmcnt(0)
224; GFX9-NEXT:    ; return to shader part epilog
225;
226; GFX10-LABEL: sample_cl_1d:
227; GFX10:       ; %bb.0: ; %main_body
228; GFX10-NEXT:    s_mov_b32 s12, exec_lo
229; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
230; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
231; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
232; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
233; GFX10-NEXT:    image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
234; GFX10-NEXT:    s_waitcnt vmcnt(0)
235; GFX10-NEXT:    ; return to shader part epilog
236main_body:
237  %v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
238  ret <4 x float> %v
239}
240
241define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
242; GFX9-LABEL: sample_cl_2d:
243; GFX9:       ; %bb.0: ; %main_body
244; GFX9-NEXT:    s_mov_b64 s[12:13], exec
245; GFX9-NEXT:    s_wqm_b64 exec, exec
246; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
247; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
248; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
249; GFX9-NEXT:    image_sample_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16
250; GFX9-NEXT:    s_waitcnt vmcnt(0)
251; GFX9-NEXT:    ; return to shader part epilog
252;
253; GFX10-LABEL: sample_cl_2d:
254; GFX10:       ; %bb.0: ; %main_body
255; GFX10-NEXT:    s_mov_b32 s12, exec_lo
256; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
257; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
258; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
259; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
260; GFX10-NEXT:    image_sample_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
261; GFX10-NEXT:    s_waitcnt vmcnt(0)
262; GFX10-NEXT:    ; return to shader part epilog
263main_body:
264  %v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
265  ret <4 x float> %v
266}
267
268define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) {
269; GFX9-LABEL: sample_c_cl_1d:
270; GFX9:       ; %bb.0: ; %main_body
271; GFX9-NEXT:    s_mov_b64 s[12:13], exec
272; GFX9-NEXT:    s_wqm_b64 exec, exec
273; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
274; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
275; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
276; GFX9-NEXT:    image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
277; GFX9-NEXT:    s_waitcnt vmcnt(0)
278; GFX9-NEXT:    ; return to shader part epilog
279;
280; GFX10-LABEL: sample_c_cl_1d:
281; GFX10:       ; %bb.0: ; %main_body
282; GFX10-NEXT:    s_mov_b32 s12, exec_lo
283; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
284; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
285; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
286; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
287; GFX10-NEXT:    image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
288; GFX10-NEXT:    s_waitcnt vmcnt(0)
289; GFX10-NEXT:    ; return to shader part epilog
290main_body:
291  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
292  ret <4 x float> %v
293}
294
295define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
296; GFX9-LABEL: sample_c_cl_2d:
297; GFX9:       ; %bb.0: ; %main_body
298; GFX9-NEXT:    s_mov_b64 s[12:13], exec
299; GFX9-NEXT:    s_wqm_b64 exec, exec
300; GFX9-NEXT:    v_mov_b32_e32 v5, v3
301; GFX9-NEXT:    v_mov_b32_e32 v3, v0
302; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v1
303; GFX9-NEXT:    v_lshl_or_b32 v4, v2, 16, v0
304; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
305; GFX9-NEXT:    image_sample_c_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16
306; GFX9-NEXT:    s_waitcnt vmcnt(0)
307; GFX9-NEXT:    ; return to shader part epilog
308;
309; GFX10-LABEL: sample_c_cl_2d:
310; GFX10:       ; %bb.0: ; %main_body
311; GFX10-NEXT:    s_mov_b32 s12, exec_lo
312; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
313; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
314; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
315; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
316; GFX10-NEXT:    image_sample_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
317; GFX10-NEXT:    s_waitcnt vmcnt(0)
318; GFX10-NEXT:    ; return to shader part epilog
319main_body:
320  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
321  ret <4 x float> %v
322}
323
324define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s) {
325; GFX9-LABEL: sample_b_1d:
326; GFX9:       ; %bb.0: ; %main_body
327; GFX9-NEXT:    s_mov_b64 s[12:13], exec
328; GFX9-NEXT:    s_wqm_b64 exec, exec
329; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
330; GFX9-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
331; GFX9-NEXT:    s_waitcnt vmcnt(0)
332; GFX9-NEXT:    ; return to shader part epilog
333;
334; GFX10-LABEL: sample_b_1d:
335; GFX10:       ; %bb.0: ; %main_body
336; GFX10-NEXT:    s_mov_b32 s12, exec_lo
337; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
338; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
339; GFX10-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
340; GFX10-NEXT:    s_waitcnt vmcnt(0)
341; GFX10-NEXT:    ; return to shader part epilog
342main_body:
343  %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32 15, half %bias, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
344  ret <4 x float> %v
345}
346
347define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t) {
348; GFX9-LABEL: sample_b_2d:
349; GFX9:       ; %bb.0: ; %main_body
350; GFX9-NEXT:    s_mov_b64 s[12:13], exec
351; GFX9-NEXT:    s_wqm_b64 exec, exec
352; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
353; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
354; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
355; GFX9-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
356; GFX9-NEXT:    s_waitcnt vmcnt(0)
357; GFX9-NEXT:    ; return to shader part epilog
358;
359; GFX10-LABEL: sample_b_2d:
360; GFX10:       ; %bb.0: ; %main_body
361; GFX10-NEXT:    s_mov_b32 s12, exec_lo
362; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
363; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
364; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
365; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
366; GFX10-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
367; GFX10-NEXT:    s_waitcnt vmcnt(0)
368; GFX10-NEXT:    ; return to shader part epilog
369main_body:
370  %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32 15, half %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
371  ret <4 x float> %v
372}
373
374define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s) {
375; GFX9-LABEL: sample_c_b_1d:
376; GFX9:       ; %bb.0: ; %main_body
377; GFX9-NEXT:    s_mov_b64 s[12:13], exec
378; GFX9-NEXT:    s_wqm_b64 exec, exec
379; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
380; GFX9-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
381; GFX9-NEXT:    s_waitcnt vmcnt(0)
382; GFX9-NEXT:    ; return to shader part epilog
383;
384; GFX10-LABEL: sample_c_b_1d:
385; GFX10:       ; %bb.0: ; %main_body
386; GFX10-NEXT:    s_mov_b32 s12, exec_lo
387; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
388; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
389; GFX10-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
390; GFX10-NEXT:    s_waitcnt vmcnt(0)
391; GFX10-NEXT:    ; return to shader part epilog
392main_body:
393  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
394  ret <4 x float> %v
395}
396
397define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t) {
398; GFX9-LABEL: sample_c_b_2d:
399; GFX9:       ; %bb.0: ; %main_body
400; GFX9-NEXT:    s_mov_b64 s[12:13], exec
401; GFX9-NEXT:    s_wqm_b64 exec, exec
402; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v2
403; GFX9-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
404; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
405; GFX9-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
406; GFX9-NEXT:    s_waitcnt vmcnt(0)
407; GFX9-NEXT:    ; return to shader part epilog
408;
409; GFX10-LABEL: sample_c_b_2d:
410; GFX10:       ; %bb.0: ; %main_body
411; GFX10-NEXT:    s_mov_b32 s12, exec_lo
412; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
413; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
414; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
415; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
416; GFX10-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
417; GFX10-NEXT:    s_waitcnt vmcnt(0)
418; GFX10-NEXT:    ; return to shader part epilog
419main_body:
420  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
421  ret <4 x float> %v
422}
423
424define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %clamp) {
425; GFX9-LABEL: sample_b_cl_1d:
426; GFX9:       ; %bb.0: ; %main_body
427; GFX9-NEXT:    s_mov_b64 s[12:13], exec
428; GFX9-NEXT:    s_wqm_b64 exec, exec
429; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
430; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
431; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
432; GFX9-NEXT:    image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
433; GFX9-NEXT:    s_waitcnt vmcnt(0)
434; GFX9-NEXT:    ; return to shader part epilog
435;
436; GFX10-LABEL: sample_b_cl_1d:
437; GFX10:       ; %bb.0: ; %main_body
438; GFX10-NEXT:    s_mov_b32 s12, exec_lo
439; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
440; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
441; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
442; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
443; GFX10-NEXT:    image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
444; GFX10-NEXT:    s_waitcnt vmcnt(0)
445; GFX10-NEXT:    ; return to shader part epilog
446main_body:
447  %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32 15, half %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
448  ret <4 x float> %v
449}
450
451define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t, half %clamp) {
452; GFX9-LABEL: sample_b_cl_2d:
453; GFX9:       ; %bb.0: ; %main_body
454; GFX9-NEXT:    s_mov_b64 s[12:13], exec
455; GFX9-NEXT:    s_wqm_b64 exec, exec
456; GFX9-NEXT:    v_mov_b32_e32 v5, v3
457; GFX9-NEXT:    v_mov_b32_e32 v3, v0
458; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v1
459; GFX9-NEXT:    v_lshl_or_b32 v4, v2, 16, v0
460; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
461; GFX9-NEXT:    image_sample_b_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16
462; GFX9-NEXT:    s_waitcnt vmcnt(0)
463; GFX9-NEXT:    ; return to shader part epilog
464;
465; GFX10-LABEL: sample_b_cl_2d:
466; GFX10:       ; %bb.0: ; %main_body
467; GFX10-NEXT:    s_mov_b32 s12, exec_lo
468; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
469; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
470; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
471; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
472; GFX10-NEXT:    image_sample_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
473; GFX10-NEXT:    s_waitcnt vmcnt(0)
474; GFX10-NEXT:    ; return to shader part epilog
475main_body:
476  %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32 15, half %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
477  ret <4 x float> %v
478}
479
480define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %clamp) {
481; GFX9-LABEL: sample_c_b_cl_1d:
482; GFX9:       ; %bb.0: ; %main_body
483; GFX9-NEXT:    s_mov_b64 s[12:13], exec
484; GFX9-NEXT:    s_wqm_b64 exec, exec
485; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v2
486; GFX9-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
487; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
488; GFX9-NEXT:    image_sample_c_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
489; GFX9-NEXT:    s_waitcnt vmcnt(0)
490; GFX9-NEXT:    ; return to shader part epilog
491;
492; GFX10-LABEL: sample_c_b_cl_1d:
493; GFX10:       ; %bb.0: ; %main_body
494; GFX10-NEXT:    s_mov_b32 s12, exec_lo
495; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
496; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
497; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
498; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
499; GFX10-NEXT:    image_sample_c_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
500; GFX10-NEXT:    s_waitcnt vmcnt(0)
501; GFX10-NEXT:    ; return to shader part epilog
502main_body:
503  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
504  ret <4 x float> %v
505}
506
507define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t, half %clamp) {
508; GFX9-LABEL: sample_c_b_cl_2d:
509; GFX9:       ; %bb.0: ; %main_body
510; GFX9-NEXT:    s_mov_b64 s[12:13], exec
511; GFX9-NEXT:    s_wqm_b64 exec, exec
512; GFX9-NEXT:    v_mov_b32_e32 v7, v4
513; GFX9-NEXT:    v_mov_b32_e32 v4, v0
514; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v2
515; GFX9-NEXT:    v_mov_b32_e32 v5, v1
516; GFX9-NEXT:    v_lshl_or_b32 v6, v3, 16, v0
517; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
518; GFX9-NEXT:    image_sample_c_b_cl v[0:3], v[4:7], s[0:7], s[8:11] dmask:0xf a16
519; GFX9-NEXT:    s_waitcnt vmcnt(0)
520; GFX9-NEXT:    ; return to shader part epilog
521;
522; GFX10-LABEL: sample_c_b_cl_2d:
523; GFX10:       ; %bb.0: ; %main_body
524; GFX10-NEXT:    s_mov_b32 s12, exec_lo
525; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
526; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
527; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
528; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
529; GFX10-NEXT:    image_sample_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
530; GFX10-NEXT:    s_waitcnt vmcnt(0)
531; GFX10-NEXT:    ; return to shader part epilog
532main_body:
533  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
534  ret <4 x float> %v
535}
536
537define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
538; GFX9-LABEL: sample_d_1d:
539; GFX9:       ; %bb.0: ; %main_body
540; GFX9-NEXT:    image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
541; GFX9-NEXT:    s_waitcnt vmcnt(0)
542; GFX9-NEXT:    ; return to shader part epilog
543;
544; GFX10-LABEL: sample_d_1d:
545; GFX10:       ; %bb.0: ; %main_body
546; GFX10-NEXT:    image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
547; GFX10-NEXT:    s_waitcnt vmcnt(0)
548; GFX10-NEXT:    ; return to shader part epilog
549main_body:
550  %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
551  ret <4 x float> %v
552}
553
554define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
555; GFX9-LABEL: sample_d_2d:
556; GFX9:       ; %bb.0: ; %main_body
557; GFX9-NEXT:    v_and_b32_e32 v4, 0xffff, v4
558; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v2
559; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
560; GFX9-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
561; GFX9-NEXT:    v_lshl_or_b32 v3, v3, 16, v2
562; GFX9-NEXT:    v_lshl_or_b32 v2, v1, 16, v0
563; GFX9-NEXT:    image_sample_d v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf a16
564; GFX9-NEXT:    s_waitcnt vmcnt(0)
565; GFX9-NEXT:    ; return to shader part epilog
566;
567; GFX10-LABEL: sample_d_2d:
568; GFX10:       ; %bb.0: ; %main_body
569; GFX10-NEXT:    v_and_b32_e32 v4, 0xffff, v4
570; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
571; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
572; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
573; GFX10-NEXT:    v_lshl_or_b32 v3, v3, 16, v2
574; GFX10-NEXT:    v_lshl_or_b32 v2, v1, 16, v0
575; GFX10-NEXT:    image_sample_d_g16 v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
576; GFX10-NEXT:    s_waitcnt vmcnt(0)
577; GFX10-NEXT:    ; return to shader part epilog
578main_body:
579  %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
580  ret <4 x float> %v
581}
582
583define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) {
584; GFX9-LABEL: sample_d_3d:
585; GFX9:       ; %bb.0: ; %main_body
586; GFX9-NEXT:    v_mov_b32_e32 v12, v8
587; GFX9-NEXT:    v_mov_b32_e32 v8, v2
588; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v6
589; GFX9-NEXT:    v_lshl_or_b32 v11, v7, 16, v2
590; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v3
591; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
592; GFX9-NEXT:    v_mov_b32_e32 v10, v5
593; GFX9-NEXT:    v_lshl_or_b32 v9, v4, 16, v2
594; GFX9-NEXT:    v_lshl_or_b32 v7, v1, 16, v0
595; GFX9-NEXT:    image_sample_d v[0:3], v[7:12], s[0:7], s[8:11] dmask:0xf a16
596; GFX9-NEXT:    s_waitcnt vmcnt(0)
597; GFX9-NEXT:    ; return to shader part epilog
598;
599; GFX10-LABEL: sample_d_3d:
600; GFX10:       ; %bb.0: ; %main_body
601; GFX10-NEXT:    v_mov_b32_e32 v12, v8
602; GFX10-NEXT:    v_mov_b32_e32 v8, v2
603; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v6
604; GFX10-NEXT:    v_and_b32_e32 v3, 0xffff, v3
605; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
606; GFX10-NEXT:    v_mov_b32_e32 v10, v5
607; GFX10-NEXT:    v_lshl_or_b32 v11, v7, 16, v2
608; GFX10-NEXT:    v_lshl_or_b32 v9, v4, 16, v3
609; GFX10-NEXT:    v_lshl_or_b32 v7, v1, 16, v0
610; GFX10-NEXT:    image_sample_d_g16 v[0:3], v[7:12], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
611; GFX10-NEXT:    s_waitcnt vmcnt(0)
612; GFX10-NEXT:    ; return to shader part epilog
613main_body:
614  %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
615  ret <4 x float> %v
616}
617
618define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
619; GFX9-LABEL: sample_c_d_1d:
620; GFX9:       ; %bb.0: ; %main_body
621; GFX9-NEXT:    image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
622; GFX9-NEXT:    s_waitcnt vmcnt(0)
623; GFX9-NEXT:    ; return to shader part epilog
624;
625; GFX10-LABEL: sample_c_d_1d:
626; GFX10:       ; %bb.0: ; %main_body
627; GFX10-NEXT:    image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
628; GFX10-NEXT:    s_waitcnt vmcnt(0)
629; GFX10-NEXT:    ; return to shader part epilog
630main_body:
631  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
632  ret <4 x float> %v
633}
634
635define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
636; GFX9-LABEL: sample_c_d_2d:
637; GFX9:       ; %bb.0: ; %main_body
638; GFX9-NEXT:    v_mov_b32_e32 v7, v3
639; GFX9-NEXT:    v_mov_b32_e32 v8, v2
640; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v5
641; GFX9-NEXT:    v_lshl_or_b32 v3, v6, 16, v2
642; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v7
643; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
644; GFX9-NEXT:    v_lshl_or_b32 v2, v4, 16, v2
645; GFX9-NEXT:    v_lshl_or_b32 v1, v8, 16, v1
646; GFX9-NEXT:    image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
647; GFX9-NEXT:    s_waitcnt vmcnt(0)
648; GFX9-NEXT:    ; return to shader part epilog
649;
650; GFX10-LABEL: sample_c_d_2d:
651; GFX10:       ; %bb.0: ; %main_body
652; GFX10-NEXT:    v_and_b32_e32 v5, 0xffff, v5
653; GFX10-NEXT:    v_and_b32_e32 v3, 0xffff, v3
654; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
655; GFX10-NEXT:    v_lshl_or_b32 v5, v6, 16, v5
656; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
657; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
658; GFX10-NEXT:    image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
659; GFX10-NEXT:    s_waitcnt vmcnt(0)
660; GFX10-NEXT:    ; return to shader part epilog
661main_body:
662  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
663  ret <4 x float> %v
664}
665
666define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
667; GFX9-LABEL: sample_d_cl_1d:
668; GFX9:       ; %bb.0: ; %main_body
669; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v2
670; GFX9-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
671; GFX9-NEXT:    image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
672; GFX9-NEXT:    s_waitcnt vmcnt(0)
673; GFX9-NEXT:    ; return to shader part epilog
674;
675; GFX10-LABEL: sample_d_cl_1d:
676; GFX10:       ; %bb.0: ; %main_body
677; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
678; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
679; GFX10-NEXT:    image_sample_d_cl_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
680; GFX10-NEXT:    s_waitcnt vmcnt(0)
681; GFX10-NEXT:    ; return to shader part epilog
682main_body:
683  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
684  ret <4 x float> %v
685}
686
687define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
688; GFX9-LABEL: sample_d_cl_2d:
689; GFX9:       ; %bb.0: ; %main_body
690; GFX9-NEXT:    v_and_b32_e32 v4, 0xffff, v4
691; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v2
692; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
693; GFX9-NEXT:    v_lshl_or_b32 v5, v5, 16, v4
694; GFX9-NEXT:    v_lshl_or_b32 v4, v3, 16, v2
695; GFX9-NEXT:    v_lshl_or_b32 v3, v1, 16, v0
696; GFX9-NEXT:    image_sample_d_cl v[0:3], v[3:6], s[0:7], s[8:11] dmask:0xf a16
697; GFX9-NEXT:    s_waitcnt vmcnt(0)
698; GFX9-NEXT:    ; return to shader part epilog
699;
700; GFX10-LABEL: sample_d_cl_2d:
701; GFX10:       ; %bb.0: ; %main_body
702; GFX10-NEXT:    v_and_b32_e32 v4, 0xffff, v4
703; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
704; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
705; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
706; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
707; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
708; GFX10-NEXT:    image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
709; GFX10-NEXT:    s_waitcnt vmcnt(0)
710; GFX10-NEXT:    ; return to shader part epilog
711main_body:
712  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
713  ret <4 x float> %v
714}
715
716define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
717; GFX9-LABEL: sample_c_d_cl_1d:
718; GFX9:       ; %bb.0: ; %main_body
719; GFX9-NEXT:    v_and_b32_e32 v3, 0xffff, v3
720; GFX9-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
721; GFX9-NEXT:    image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
722; GFX9-NEXT:    s_waitcnt vmcnt(0)
723; GFX9-NEXT:    ; return to shader part epilog
724;
725; GFX10-LABEL: sample_c_d_cl_1d:
726; GFX10:       ; %bb.0: ; %main_body
727; GFX10-NEXT:    v_and_b32_e32 v3, 0xffff, v3
728; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
729; GFX10-NEXT:    image_sample_c_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
730; GFX10-NEXT:    s_waitcnt vmcnt(0)
731; GFX10-NEXT:    ; return to shader part epilog
732main_body:
733  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
734  ret <4 x float> %v
735}
736
737define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
738; GFX9-LABEL: sample_c_d_cl_2d:
739; GFX9:       ; %bb.0: ; %main_body
740; GFX9-NEXT:    v_mov_b32_e32 v11, v7
741; GFX9-NEXT:    v_mov_b32_e32 v7, v0
742; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v5
743; GFX9-NEXT:    v_lshl_or_b32 v10, v6, 16, v0
744; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v3
745; GFX9-NEXT:    v_lshl_or_b32 v9, v4, 16, v0
746; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v1
747; GFX9-NEXT:    v_lshl_or_b32 v8, v2, 16, v0
748; GFX9-NEXT:    image_sample_c_d_cl v[0:3], v[7:11], s[0:7], s[8:11] dmask:0xf a16
749; GFX9-NEXT:    s_waitcnt vmcnt(0)
750; GFX9-NEXT:    ; return to shader part epilog
751;
752; GFX10-LABEL: sample_c_d_cl_2d:
753; GFX10:       ; %bb.0: ; %main_body
754; GFX10-NEXT:    v_and_b32_e32 v5, 0xffff, v5
755; GFX10-NEXT:    v_and_b32_e32 v3, 0xffff, v3
756; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
757; GFX10-NEXT:    v_lshl_or_b32 v5, v6, 16, v5
758; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
759; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
760; GFX10-NEXT:    image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
761; GFX10-NEXT:    s_waitcnt vmcnt(0)
762; GFX10-NEXT:    ; return to shader part epilog
763main_body:
764  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
765  ret <4 x float> %v
766}
767
768define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) {
769; GFX9-LABEL: sample_l_1d:
770; GFX9:       ; %bb.0: ; %main_body
771; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
772; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
773; GFX9-NEXT:    image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
774; GFX9-NEXT:    s_waitcnt vmcnt(0)
775; GFX9-NEXT:    ; return to shader part epilog
776;
777; GFX10-LABEL: sample_l_1d:
778; GFX10:       ; %bb.0: ; %main_body
779; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
780; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
781; GFX10-NEXT:    image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
782; GFX10-NEXT:    s_waitcnt vmcnt(0)
783; GFX10-NEXT:    ; return to shader part epilog
784main_body:
785  %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
786  ret <4 x float> %v
787}
788
789define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
790; GFX9-LABEL: sample_l_2d:
791; GFX9:       ; %bb.0: ; %main_body
792; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
793; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
794; GFX9-NEXT:    image_sample_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16
795; GFX9-NEXT:    s_waitcnt vmcnt(0)
796; GFX9-NEXT:    ; return to shader part epilog
797;
798; GFX10-LABEL: sample_l_2d:
799; GFX10:       ; %bb.0: ; %main_body
800; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
801; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
802; GFX10-NEXT:    image_sample_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
803; GFX10-NEXT:    s_waitcnt vmcnt(0)
804; GFX10-NEXT:    ; return to shader part epilog
805main_body:
806  %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
807  ret <4 x float> %v
808}
809
810define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) {
811; GFX9-LABEL: sample_c_l_1d:
812; GFX9:       ; %bb.0: ; %main_body
813; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
814; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
815; GFX9-NEXT:    image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
816; GFX9-NEXT:    s_waitcnt vmcnt(0)
817; GFX9-NEXT:    ; return to shader part epilog
818;
819; GFX10-LABEL: sample_c_l_1d:
820; GFX10:       ; %bb.0: ; %main_body
821; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
822; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
823; GFX10-NEXT:    image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
824; GFX10-NEXT:    s_waitcnt vmcnt(0)
825; GFX10-NEXT:    ; return to shader part epilog
826main_body:
827  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
828  ret <4 x float> %v
829}
830
831define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
832; GFX9-LABEL: sample_c_l_2d:
833; GFX9:       ; %bb.0: ; %main_body
834; GFX9-NEXT:    v_mov_b32_e32 v5, v3
835; GFX9-NEXT:    v_mov_b32_e32 v3, v0
836; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v1
837; GFX9-NEXT:    v_lshl_or_b32 v4, v2, 16, v0
838; GFX9-NEXT:    image_sample_c_l v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16
839; GFX9-NEXT:    s_waitcnt vmcnt(0)
840; GFX9-NEXT:    ; return to shader part epilog
841;
842; GFX10-LABEL: sample_c_l_2d:
843; GFX10:       ; %bb.0: ; %main_body
844; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
845; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
846; GFX10-NEXT:    image_sample_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
847; GFX10-NEXT:    s_waitcnt vmcnt(0)
848; GFX10-NEXT:    ; return to shader part epilog
849main_body:
850  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
851  ret <4 x float> %v
852}
853
854define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
855; GFX9-LABEL: sample_lz_1d:
856; GFX9:       ; %bb.0: ; %main_body
857; GFX9-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
858; GFX9-NEXT:    s_waitcnt vmcnt(0)
859; GFX9-NEXT:    ; return to shader part epilog
860;
861; GFX10-LABEL: sample_lz_1d:
862; GFX10:       ; %bb.0: ; %main_body
863; GFX10-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
864; GFX10-NEXT:    s_waitcnt vmcnt(0)
865; GFX10-NEXT:    ; return to shader part epilog
866main_body:
867  %v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
868  ret <4 x float> %v
869}
870
871define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
872; GFX9-LABEL: sample_lz_2d:
873; GFX9:       ; %bb.0: ; %main_body
874; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
875; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
876; GFX9-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
877; GFX9-NEXT:    s_waitcnt vmcnt(0)
878; GFX9-NEXT:    ; return to shader part epilog
879;
880; GFX10-LABEL: sample_lz_2d:
881; GFX10:       ; %bb.0: ; %main_body
882; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
883; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
884; GFX10-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
885; GFX10-NEXT:    s_waitcnt vmcnt(0)
886; GFX10-NEXT:    ; return to shader part epilog
887main_body:
888  %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
889  ret <4 x float> %v
890}
891
892define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
893; GFX9-LABEL: sample_c_lz_1d:
894; GFX9:       ; %bb.0: ; %main_body
895; GFX9-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
896; GFX9-NEXT:    s_waitcnt vmcnt(0)
897; GFX9-NEXT:    ; return to shader part epilog
898;
899; GFX10-LABEL: sample_c_lz_1d:
900; GFX10:       ; %bb.0: ; %main_body
901; GFX10-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
902; GFX10-NEXT:    s_waitcnt vmcnt(0)
903; GFX10-NEXT:    ; return to shader part epilog
904main_body:
905  %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
906  ret <4 x float> %v
907}
908
909define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
910; GFX9-LABEL: sample_c_lz_2d:
911; GFX9:       ; %bb.0: ; %main_body
912; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
913; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
914; GFX9-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
915; GFX9-NEXT:    s_waitcnt vmcnt(0)
916; GFX9-NEXT:    ; return to shader part epilog
917;
918; GFX10-LABEL: sample_c_lz_2d:
919; GFX10:       ; %bb.0: ; %main_body
920; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
921; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
922; GFX10-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
923; GFX10-NEXT:    s_waitcnt vmcnt(0)
924; GFX10-NEXT:    ; return to shader part epilog
925main_body:
926  %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
927  ret <4 x float> %v
928}
929
930define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
931; GFX9-LABEL: sample_c_d_o_2darray_V1:
932; GFX9:       ; %bb.0: ; %main_body
933; GFX9-NEXT:    v_mov_b32_e32 v13, v8
934; GFX9-NEXT:    v_mov_b32_e32 v8, v0
935; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v6
936; GFX9-NEXT:    v_lshl_or_b32 v12, v7, 16, v0
937; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v4
938; GFX9-NEXT:    v_lshl_or_b32 v11, v5, 16, v0
939; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v2
940; GFX9-NEXT:    v_mov_b32_e32 v9, v1
941; GFX9-NEXT:    v_lshl_or_b32 v10, v3, 16, v0
942; GFX9-NEXT:    image_sample_c_d_o v0, v[8:13], s[0:7], s[8:11] dmask:0x4 a16 da
943; GFX9-NEXT:    s_waitcnt vmcnt(0)
944; GFX9-NEXT:    ; return to shader part epilog
945;
946; GFX10-LABEL: sample_c_d_o_2darray_V1:
947; GFX10:       ; %bb.0: ; %main_body
948; GFX10-NEXT:    v_mov_b32_e32 v13, v8
949; GFX10-NEXT:    v_mov_b32_e32 v9, v1
950; GFX10-NEXT:    v_mov_b32_e32 v8, v0
951; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v6
952; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v4
953; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
954; GFX10-NEXT:    v_lshl_or_b32 v12, v7, 16, v0
955; GFX10-NEXT:    v_lshl_or_b32 v11, v5, 16, v1
956; GFX10-NEXT:    v_lshl_or_b32 v10, v3, 16, v2
957; GFX10-NEXT:    image_sample_c_d_o_g16 v0, v[8:13], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
958; GFX10-NEXT:    s_waitcnt vmcnt(0)
959; GFX10-NEXT:    ; return to shader part epilog
960main_body:
961  %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
962  ret float %v
963}
964
965define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
966; GFX9-LABEL: sample_c_d_o_2darray_V2:
967; GFX9:       ; %bb.0: ; %main_body
968; GFX9-NEXT:    v_mov_b32_e32 v13, v8
969; GFX9-NEXT:    v_mov_b32_e32 v8, v0
970; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v6
971; GFX9-NEXT:    v_lshl_or_b32 v12, v7, 16, v0
972; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v4
973; GFX9-NEXT:    v_lshl_or_b32 v11, v5, 16, v0
974; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v2
975; GFX9-NEXT:    v_mov_b32_e32 v9, v1
976; GFX9-NEXT:    v_lshl_or_b32 v10, v3, 16, v0
977; GFX9-NEXT:    image_sample_c_d_o v[0:1], v[8:13], s[0:7], s[8:11] dmask:0x6 a16 da
978; GFX9-NEXT:    s_waitcnt vmcnt(0)
979; GFX9-NEXT:    ; return to shader part epilog
980;
981; GFX10-LABEL: sample_c_d_o_2darray_V2:
982; GFX10:       ; %bb.0: ; %main_body
983; GFX10-NEXT:    v_mov_b32_e32 v13, v8
984; GFX10-NEXT:    v_mov_b32_e32 v9, v1
985; GFX10-NEXT:    v_mov_b32_e32 v8, v0
986; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v6
987; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v4
988; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
989; GFX10-NEXT:    v_lshl_or_b32 v12, v7, 16, v0
990; GFX10-NEXT:    v_lshl_or_b32 v11, v5, 16, v1
991; GFX10-NEXT:    v_lshl_or_b32 v10, v3, 16, v2
992; GFX10-NEXT:    image_sample_c_d_o_g16 v[0:1], v[8:13], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
993; GFX10-NEXT:    s_waitcnt vmcnt(0)
994; GFX10-NEXT:    ; return to shader part epilog
995main_body:
996  %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
997  ret <2 x float> %v
998}
999
1000declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1001declare <8 x float> @llvm.amdgcn.image.sample.1d.v8f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1002declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1003declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1004declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1005declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1006declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1007
1008declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1009declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1010declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1011declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1012declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1013declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1014
1015declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1016declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1017declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32, half, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1018declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1019declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1020declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1021declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1022declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32, half, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1023
1024declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1025declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1026declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1027declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1028declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1029declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1030declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1031declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1032declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1033
1034declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1035declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1036declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1037declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1038
1039declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1040declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1041declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1042declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1043
1044declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1045declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1046
1047attributes #0 = { nounwind }
1048attributes #1 = { nounwind readonly }
1049attributes #2 = { nounwind readnone }
1050