1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
3; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
4
5define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
6; GFX9-LABEL: sample_1d:
7; GFX9:       ; %bb.0: ; %main_body
8; GFX9-NEXT:    s_mov_b64 s[12:13], exec
9; GFX9-NEXT:    s_wqm_b64 exec, exec
10; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
11; GFX9-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
12; GFX9-NEXT:    s_waitcnt vmcnt(0)
13; GFX9-NEXT:    ; return to shader part epilog
14;
15; GFX10-LABEL: sample_1d:
16; GFX10:       ; %bb.0: ; %main_body
17; GFX10-NEXT:    s_mov_b32 s12, exec_lo
18; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
19; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
20; GFX10-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
21; GFX10-NEXT:    s_waitcnt vmcnt(0)
22; GFX10-NEXT:    ; return to shader part epilog
23main_body:
24  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
25  ret <4 x float> %v
26}
27
28define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
29; GFX9-LABEL: sample_2d:
30; GFX9:       ; %bb.0: ; %main_body
31; GFX9-NEXT:    s_mov_b64 s[12:13], exec
32; GFX9-NEXT:    s_wqm_b64 exec, exec
33; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
34; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
35; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
36; GFX9-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
37; GFX9-NEXT:    s_waitcnt vmcnt(0)
38; GFX9-NEXT:    ; return to shader part epilog
39;
40; GFX10-LABEL: sample_2d:
41; GFX10:       ; %bb.0: ; %main_body
42; GFX10-NEXT:    s_mov_b32 s12, exec_lo
43; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
44; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
45; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
46; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
47; GFX10-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
48; GFX10-NEXT:    s_waitcnt vmcnt(0)
49; GFX10-NEXT:    ; return to shader part epilog
50main_body:
51  %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
52  ret <4 x float> %v
53}
54
55define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
56; GFX9-LABEL: sample_3d:
57; GFX9:       ; %bb.0: ; %main_body
58; GFX9-NEXT:    s_mov_b64 s[12:13], exec
59; GFX9-NEXT:    s_wqm_b64 exec, exec
60; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
61; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
62; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
63; GFX9-NEXT:    image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16
64; GFX9-NEXT:    s_waitcnt vmcnt(0)
65; GFX9-NEXT:    ; return to shader part epilog
66;
67; GFX10-LABEL: sample_3d:
68; GFX10:       ; %bb.0: ; %main_body
69; GFX10-NEXT:    s_mov_b32 s12, exec_lo
70; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
71; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
72; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
73; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
74; GFX10-NEXT:    image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
75; GFX10-NEXT:    s_waitcnt vmcnt(0)
76; GFX10-NEXT:    ; return to shader part epilog
77main_body:
78  %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
79  ret <4 x float> %v
80}
81
82define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
83; GFX9-LABEL: sample_cube:
84; GFX9:       ; %bb.0: ; %main_body
85; GFX9-NEXT:    s_mov_b64 s[12:13], exec
86; GFX9-NEXT:    s_wqm_b64 exec, exec
87; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
88; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
89; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
90; GFX9-NEXT:    image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 da
91; GFX9-NEXT:    s_waitcnt vmcnt(0)
92; GFX9-NEXT:    ; return to shader part epilog
93;
94; GFX10-LABEL: sample_cube:
95; GFX10:       ; %bb.0: ; %main_body
96; GFX10-NEXT:    s_mov_b32 s12, exec_lo
97; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
98; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
99; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
100; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
101; GFX10-NEXT:    image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE a16
102; GFX10-NEXT:    s_waitcnt vmcnt(0)
103; GFX10-NEXT:    ; return to shader part epilog
104main_body:
105  %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
106  ret <4 x float> %v
107}
108
109define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
110; GFX9-LABEL: sample_1darray:
111; GFX9:       ; %bb.0: ; %main_body
112; GFX9-NEXT:    s_mov_b64 s[12:13], exec
113; GFX9-NEXT:    s_wqm_b64 exec, exec
114; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
115; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
116; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
117; GFX9-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 da
118; GFX9-NEXT:    s_waitcnt vmcnt(0)
119; GFX9-NEXT:    ; return to shader part epilog
120;
121; GFX10-LABEL: sample_1darray:
122; GFX10:       ; %bb.0: ; %main_body
123; GFX10-NEXT:    s_mov_b32 s12, exec_lo
124; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
125; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
126; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
127; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
128; GFX10-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY a16
129; GFX10-NEXT:    s_waitcnt vmcnt(0)
130; GFX10-NEXT:    ; return to shader part epilog
131main_body:
132  %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half %s, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
133  ret <4 x float> %v
134}
135
136define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
137; GFX9-LABEL: sample_2darray:
138; GFX9:       ; %bb.0: ; %main_body
139; GFX9-NEXT:    s_mov_b64 s[12:13], exec
140; GFX9-NEXT:    s_wqm_b64 exec, exec
141; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
142; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
143; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
144; GFX9-NEXT:    image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 da
145; GFX9-NEXT:    s_waitcnt vmcnt(0)
146; GFX9-NEXT:    ; return to shader part epilog
147;
148; GFX10-LABEL: sample_2darray:
149; GFX10:       ; %bb.0: ; %main_body
150; GFX10-NEXT:    s_mov_b32 s12, exec_lo
151; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
152; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
153; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
154; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
155; GFX10-NEXT:    image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY a16
156; GFX10-NEXT:    s_waitcnt vmcnt(0)
157; GFX10-NEXT:    ; return to shader part epilog
158main_body:
159  %v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
160  ret <4 x float> %v
161}
162
163define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
164; GFX9-LABEL: sample_c_1d:
165; GFX9:       ; %bb.0: ; %main_body
166; GFX9-NEXT:    s_mov_b64 s[12:13], exec
167; GFX9-NEXT:    s_wqm_b64 exec, exec
168; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
169; GFX9-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
170; GFX9-NEXT:    s_waitcnt vmcnt(0)
171; GFX9-NEXT:    ; return to shader part epilog
172;
173; GFX10-LABEL: sample_c_1d:
174; GFX10:       ; %bb.0: ; %main_body
175; GFX10-NEXT:    s_mov_b32 s12, exec_lo
176; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
177; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
178; GFX10-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
179; GFX10-NEXT:    s_waitcnt vmcnt(0)
180; GFX10-NEXT:    ; return to shader part epilog
181main_body:
182  %v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
183  ret <4 x float> %v
184}
185
186define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
187; GFX9-LABEL: sample_c_2d:
188; GFX9:       ; %bb.0: ; %main_body
189; GFX9-NEXT:    s_mov_b64 s[12:13], exec
190; GFX9-NEXT:    s_wqm_b64 exec, exec
191; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
192; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
193; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
194; GFX9-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
195; GFX9-NEXT:    s_waitcnt vmcnt(0)
196; GFX9-NEXT:    ; return to shader part epilog
197;
198; GFX10-LABEL: sample_c_2d:
199; GFX10:       ; %bb.0: ; %main_body
200; GFX10-NEXT:    s_mov_b32 s12, exec_lo
201; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
202; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
203; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
204; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
205; GFX10-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
206; GFX10-NEXT:    s_waitcnt vmcnt(0)
207; GFX10-NEXT:    ; return to shader part epilog
208main_body:
209  %v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
210  ret <4 x float> %v
211}
212
213define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) {
214; GFX9-LABEL: sample_cl_1d:
215; GFX9:       ; %bb.0: ; %main_body
216; GFX9-NEXT:    s_mov_b64 s[12:13], exec
217; GFX9-NEXT:    s_wqm_b64 exec, exec
218; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
219; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
220; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
221; GFX9-NEXT:    image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
222; GFX9-NEXT:    s_waitcnt vmcnt(0)
223; GFX9-NEXT:    ; return to shader part epilog
224;
225; GFX10-LABEL: sample_cl_1d:
226; GFX10:       ; %bb.0: ; %main_body
227; GFX10-NEXT:    s_mov_b32 s12, exec_lo
228; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
229; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
230; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
231; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
232; GFX10-NEXT:    image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
233; GFX10-NEXT:    s_waitcnt vmcnt(0)
234; GFX10-NEXT:    ; return to shader part epilog
235main_body:
236  %v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
237  ret <4 x float> %v
238}
239
240define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
241; GFX9-LABEL: sample_cl_2d:
242; GFX9:       ; %bb.0: ; %main_body
243; GFX9-NEXT:    s_mov_b64 s[12:13], exec
244; GFX9-NEXT:    s_wqm_b64 exec, exec
245; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
246; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
247; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
248; GFX9-NEXT:    image_sample_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16
249; GFX9-NEXT:    s_waitcnt vmcnt(0)
250; GFX9-NEXT:    ; return to shader part epilog
251;
252; GFX10-LABEL: sample_cl_2d:
253; GFX10:       ; %bb.0: ; %main_body
254; GFX10-NEXT:    s_mov_b32 s12, exec_lo
255; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
256; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
257; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
258; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
259; GFX10-NEXT:    image_sample_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
260; GFX10-NEXT:    s_waitcnt vmcnt(0)
261; GFX10-NEXT:    ; return to shader part epilog
262main_body:
263  %v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
264  ret <4 x float> %v
265}
266
267define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) {
268; GFX9-LABEL: sample_c_cl_1d:
269; GFX9:       ; %bb.0: ; %main_body
270; GFX9-NEXT:    s_mov_b64 s[12:13], exec
271; GFX9-NEXT:    s_wqm_b64 exec, exec
272; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
273; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
274; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
275; GFX9-NEXT:    image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
276; GFX9-NEXT:    s_waitcnt vmcnt(0)
277; GFX9-NEXT:    ; return to shader part epilog
278;
279; GFX10-LABEL: sample_c_cl_1d:
280; GFX10:       ; %bb.0: ; %main_body
281; GFX10-NEXT:    s_mov_b32 s12, exec_lo
282; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
283; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
284; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
285; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
286; GFX10-NEXT:    image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
287; GFX10-NEXT:    s_waitcnt vmcnt(0)
288; GFX10-NEXT:    ; return to shader part epilog
289main_body:
290  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
291  ret <4 x float> %v
292}
293
294define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
295; GFX9-LABEL: sample_c_cl_2d:
296; GFX9:       ; %bb.0: ; %main_body
297; GFX9-NEXT:    s_mov_b64 s[12:13], exec
298; GFX9-NEXT:    s_wqm_b64 exec, exec
299; GFX9-NEXT:    v_mov_b32_e32 v5, v3
300; GFX9-NEXT:    v_mov_b32_e32 v3, v0
301; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v1
302; GFX9-NEXT:    v_lshl_or_b32 v4, v2, 16, v0
303; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
304; GFX9-NEXT:    image_sample_c_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16
305; GFX9-NEXT:    s_waitcnt vmcnt(0)
306; GFX9-NEXT:    ; return to shader part epilog
307;
308; GFX10-LABEL: sample_c_cl_2d:
309; GFX10:       ; %bb.0: ; %main_body
310; GFX10-NEXT:    s_mov_b32 s12, exec_lo
311; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
312; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
313; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
314; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
315; GFX10-NEXT:    image_sample_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
316; GFX10-NEXT:    s_waitcnt vmcnt(0)
317; GFX10-NEXT:    ; return to shader part epilog
318main_body:
319  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
320  ret <4 x float> %v
321}
322
323define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s) {
324; GFX9-LABEL: sample_b_1d:
325; GFX9:       ; %bb.0: ; %main_body
326; GFX9-NEXT:    s_mov_b64 s[12:13], exec
327; GFX9-NEXT:    s_wqm_b64 exec, exec
328; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
329; GFX9-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
330; GFX9-NEXT:    s_waitcnt vmcnt(0)
331; GFX9-NEXT:    ; return to shader part epilog
332;
333; GFX10-LABEL: sample_b_1d:
334; GFX10:       ; %bb.0: ; %main_body
335; GFX10-NEXT:    s_mov_b32 s12, exec_lo
336; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
337; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
338; GFX10-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
339; GFX10-NEXT:    s_waitcnt vmcnt(0)
340; GFX10-NEXT:    ; return to shader part epilog
341main_body:
342  %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32 15, half %bias, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
343  ret <4 x float> %v
344}
345
346define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t) {
347; GFX9-LABEL: sample_b_2d:
348; GFX9:       ; %bb.0: ; %main_body
349; GFX9-NEXT:    s_mov_b64 s[12:13], exec
350; GFX9-NEXT:    s_wqm_b64 exec, exec
351; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
352; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
353; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
354; GFX9-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
355; GFX9-NEXT:    s_waitcnt vmcnt(0)
356; GFX9-NEXT:    ; return to shader part epilog
357;
358; GFX10-LABEL: sample_b_2d:
359; GFX10:       ; %bb.0: ; %main_body
360; GFX10-NEXT:    s_mov_b32 s12, exec_lo
361; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
362; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
363; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
364; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
365; GFX10-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
366; GFX10-NEXT:    s_waitcnt vmcnt(0)
367; GFX10-NEXT:    ; return to shader part epilog
368main_body:
369  %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32 15, half %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
370  ret <4 x float> %v
371}
372
373define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s) {
374; GFX9-LABEL: sample_c_b_1d:
375; GFX9:       ; %bb.0: ; %main_body
376; GFX9-NEXT:    s_mov_b64 s[12:13], exec
377; GFX9-NEXT:    s_wqm_b64 exec, exec
378; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
379; GFX9-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
380; GFX9-NEXT:    s_waitcnt vmcnt(0)
381; GFX9-NEXT:    ; return to shader part epilog
382;
383; GFX10-LABEL: sample_c_b_1d:
384; GFX10:       ; %bb.0: ; %main_body
385; GFX10-NEXT:    s_mov_b32 s12, exec_lo
386; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
387; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
388; GFX10-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
389; GFX10-NEXT:    s_waitcnt vmcnt(0)
390; GFX10-NEXT:    ; return to shader part epilog
391main_body:
392  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
393  ret <4 x float> %v
394}
395
396define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t) {
397; GFX9-LABEL: sample_c_b_2d:
398; GFX9:       ; %bb.0: ; %main_body
399; GFX9-NEXT:    s_mov_b64 s[12:13], exec
400; GFX9-NEXT:    s_wqm_b64 exec, exec
401; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v2
402; GFX9-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
403; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
404; GFX9-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
405; GFX9-NEXT:    s_waitcnt vmcnt(0)
406; GFX9-NEXT:    ; return to shader part epilog
407;
408; GFX10-LABEL: sample_c_b_2d:
409; GFX10:       ; %bb.0: ; %main_body
410; GFX10-NEXT:    s_mov_b32 s12, exec_lo
411; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
412; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
413; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
414; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
415; GFX10-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
416; GFX10-NEXT:    s_waitcnt vmcnt(0)
417; GFX10-NEXT:    ; return to shader part epilog
418main_body:
419  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
420  ret <4 x float> %v
421}
422
423define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %clamp) {
424; GFX9-LABEL: sample_b_cl_1d:
425; GFX9:       ; %bb.0: ; %main_body
426; GFX9-NEXT:    s_mov_b64 s[12:13], exec
427; GFX9-NEXT:    s_wqm_b64 exec, exec
428; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
429; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
430; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
431; GFX9-NEXT:    image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
432; GFX9-NEXT:    s_waitcnt vmcnt(0)
433; GFX9-NEXT:    ; return to shader part epilog
434;
435; GFX10-LABEL: sample_b_cl_1d:
436; GFX10:       ; %bb.0: ; %main_body
437; GFX10-NEXT:    s_mov_b32 s12, exec_lo
438; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
439; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
440; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
441; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
442; GFX10-NEXT:    image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
443; GFX10-NEXT:    s_waitcnt vmcnt(0)
444; GFX10-NEXT:    ; return to shader part epilog
445main_body:
446  %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32 15, half %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
447  ret <4 x float> %v
448}
449
450define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t, half %clamp) {
451; GFX9-LABEL: sample_b_cl_2d:
452; GFX9:       ; %bb.0: ; %main_body
453; GFX9-NEXT:    s_mov_b64 s[12:13], exec
454; GFX9-NEXT:    s_wqm_b64 exec, exec
455; GFX9-NEXT:    v_mov_b32_e32 v5, v3
456; GFX9-NEXT:    v_mov_b32_e32 v3, v0
457; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v1
458; GFX9-NEXT:    v_lshl_or_b32 v4, v2, 16, v0
459; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
460; GFX9-NEXT:    image_sample_b_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16
461; GFX9-NEXT:    s_waitcnt vmcnt(0)
462; GFX9-NEXT:    ; return to shader part epilog
463;
464; GFX10-LABEL: sample_b_cl_2d:
465; GFX10:       ; %bb.0: ; %main_body
466; GFX10-NEXT:    s_mov_b32 s12, exec_lo
467; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
468; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
469; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
470; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
471; GFX10-NEXT:    image_sample_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
472; GFX10-NEXT:    s_waitcnt vmcnt(0)
473; GFX10-NEXT:    ; return to shader part epilog
474main_body:
475  %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32 15, half %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
476  ret <4 x float> %v
477}
478
479define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %clamp) {
480; GFX9-LABEL: sample_c_b_cl_1d:
481; GFX9:       ; %bb.0: ; %main_body
482; GFX9-NEXT:    s_mov_b64 s[12:13], exec
483; GFX9-NEXT:    s_wqm_b64 exec, exec
484; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v2
485; GFX9-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
486; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
487; GFX9-NEXT:    image_sample_c_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
488; GFX9-NEXT:    s_waitcnt vmcnt(0)
489; GFX9-NEXT:    ; return to shader part epilog
490;
491; GFX10-LABEL: sample_c_b_cl_1d:
492; GFX10:       ; %bb.0: ; %main_body
493; GFX10-NEXT:    s_mov_b32 s12, exec_lo
494; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
495; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
496; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
497; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
498; GFX10-NEXT:    image_sample_c_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
499; GFX10-NEXT:    s_waitcnt vmcnt(0)
500; GFX10-NEXT:    ; return to shader part epilog
501main_body:
502  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
503  ret <4 x float> %v
504}
505
506define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t, half %clamp) {
507; GFX9-LABEL: sample_c_b_cl_2d:
508; GFX9:       ; %bb.0: ; %main_body
509; GFX9-NEXT:    s_mov_b64 s[12:13], exec
510; GFX9-NEXT:    s_wqm_b64 exec, exec
511; GFX9-NEXT:    v_mov_b32_e32 v7, v4
512; GFX9-NEXT:    v_mov_b32_e32 v4, v0
513; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v2
514; GFX9-NEXT:    v_mov_b32_e32 v5, v1
515; GFX9-NEXT:    v_lshl_or_b32 v6, v3, 16, v0
516; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
517; GFX9-NEXT:    image_sample_c_b_cl v[0:3], v[4:7], s[0:7], s[8:11] dmask:0xf a16
518; GFX9-NEXT:    s_waitcnt vmcnt(0)
519; GFX9-NEXT:    ; return to shader part epilog
520;
521; GFX10-LABEL: sample_c_b_cl_2d:
522; GFX10:       ; %bb.0: ; %main_body
523; GFX10-NEXT:    s_mov_b32 s12, exec_lo
524; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
525; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
526; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
527; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
528; GFX10-NEXT:    image_sample_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
529; GFX10-NEXT:    s_waitcnt vmcnt(0)
530; GFX10-NEXT:    ; return to shader part epilog
531main_body:
532  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
533  ret <4 x float> %v
534}
535
536define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
537; GFX9-LABEL: sample_d_1d:
538; GFX9:       ; %bb.0: ; %main_body
539; GFX9-NEXT:    image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
540; GFX9-NEXT:    s_waitcnt vmcnt(0)
541; GFX9-NEXT:    ; return to shader part epilog
542;
543; GFX10-LABEL: sample_d_1d:
544; GFX10:       ; %bb.0: ; %main_body
545; GFX10-NEXT:    image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
546; GFX10-NEXT:    s_waitcnt vmcnt(0)
547; GFX10-NEXT:    ; return to shader part epilog
548main_body:
549  %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
550  ret <4 x float> %v
551}
552
553define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
554; GFX9-LABEL: sample_d_2d:
555; GFX9:       ; %bb.0: ; %main_body
556; GFX9-NEXT:    v_mov_b32_e32 v6, 0xffff
557; GFX9-NEXT:    v_and_b32_e32 v4, v6, v4
558; GFX9-NEXT:    v_and_b32_e32 v2, v6, v2
559; GFX9-NEXT:    v_and_b32_e32 v0, v6, v0
560; GFX9-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
561; GFX9-NEXT:    v_lshl_or_b32 v3, v3, 16, v2
562; GFX9-NEXT:    v_lshl_or_b32 v2, v1, 16, v0
563; GFX9-NEXT:    image_sample_d v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf a16
564; GFX9-NEXT:    s_waitcnt vmcnt(0)
565; GFX9-NEXT:    ; return to shader part epilog
566;
567; GFX10-LABEL: sample_d_2d:
568; GFX10:       ; %bb.0: ; %main_body
569; GFX10-NEXT:    v_mov_b32_e32 v6, 0xffff
570; GFX10-NEXT:    v_and_b32_e32 v4, v6, v4
571; GFX10-NEXT:    v_and_b32_e32 v2, v6, v2
572; GFX10-NEXT:    v_and_b32_e32 v0, v6, v0
573; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
574; GFX10-NEXT:    v_lshl_or_b32 v3, v3, 16, v2
575; GFX10-NEXT:    v_lshl_or_b32 v2, v1, 16, v0
576; GFX10-NEXT:    image_sample_d_g16 v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
577; GFX10-NEXT:    s_waitcnt vmcnt(0)
578; GFX10-NEXT:    ; return to shader part epilog
579main_body:
580  %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
581  ret <4 x float> %v
582}
583
584define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) {
585; GFX9-LABEL: sample_d_3d:
586; GFX9:       ; %bb.0: ; %main_body
587; GFX9-NEXT:    v_mov_b32_e32 v12, v8
588; GFX9-NEXT:    v_mov_b32_e32 v8, v2
589; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
590; GFX9-NEXT:    v_mov_b32_e32 v10, v5
591; GFX9-NEXT:    v_and_b32_e32 v5, v2, v6
592; GFX9-NEXT:    v_and_b32_e32 v3, v2, v3
593; GFX9-NEXT:    v_and_b32_e32 v0, v2, v0
594; GFX9-NEXT:    v_lshl_or_b32 v11, v7, 16, v5
595; GFX9-NEXT:    v_lshl_or_b32 v9, v4, 16, v3
596; GFX9-NEXT:    v_lshl_or_b32 v7, v1, 16, v0
597; GFX9-NEXT:    image_sample_d v[0:3], v[7:12], s[0:7], s[8:11] dmask:0xf a16
598; GFX9-NEXT:    s_waitcnt vmcnt(0)
599; GFX9-NEXT:    ; return to shader part epilog
600;
601; GFX10-LABEL: sample_d_3d:
602; GFX10:       ; %bb.0: ; %main_body
603; GFX10-NEXT:    v_mov_b32_e32 v12, v8
604; GFX10-NEXT:    v_mov_b32_e32 v8, v2
605; GFX10-NEXT:    v_mov_b32_e32 v2, 0xffff
606; GFX10-NEXT:    v_mov_b32_e32 v10, v5
607; GFX10-NEXT:    v_and_b32_e32 v5, v2, v6
608; GFX10-NEXT:    v_and_b32_e32 v3, v2, v3
609; GFX10-NEXT:    v_and_b32_e32 v0, v2, v0
610; GFX10-NEXT:    v_lshl_or_b32 v11, v7, 16, v5
611; GFX10-NEXT:    v_lshl_or_b32 v9, v4, 16, v3
612; GFX10-NEXT:    v_lshl_or_b32 v7, v1, 16, v0
613; GFX10-NEXT:    image_sample_d_g16 v[0:3], v[7:12], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
614; GFX10-NEXT:    s_waitcnt vmcnt(0)
615; GFX10-NEXT:    ; return to shader part epilog
616main_body:
617  %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
618  ret <4 x float> %v
619}
620
621define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
622; GFX9-LABEL: sample_c_d_1d:
623; GFX9:       ; %bb.0: ; %main_body
624; GFX9-NEXT:    image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
625; GFX9-NEXT:    s_waitcnt vmcnt(0)
626; GFX9-NEXT:    ; return to shader part epilog
627;
628; GFX10-LABEL: sample_c_d_1d:
629; GFX10:       ; %bb.0: ; %main_body
630; GFX10-NEXT:    image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
631; GFX10-NEXT:    s_waitcnt vmcnt(0)
632; GFX10-NEXT:    ; return to shader part epilog
633main_body:
634  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
635  ret <4 x float> %v
636}
637
638define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
639; GFX9-LABEL: sample_c_d_2d:
640; GFX9:       ; %bb.0: ; %main_body
641; GFX9-NEXT:    v_mov_b32_e32 v9, 0xffff
642; GFX9-NEXT:    v_mov_b32_e32 v7, v3
643; GFX9-NEXT:    v_mov_b32_e32 v8, v2
644; GFX9-NEXT:    v_and_b32_e32 v2, v9, v5
645; GFX9-NEXT:    v_lshl_or_b32 v3, v6, 16, v2
646; GFX9-NEXT:    v_and_b32_e32 v2, v9, v7
647; GFX9-NEXT:    v_and_b32_e32 v1, v9, v1
648; GFX9-NEXT:    v_lshl_or_b32 v2, v4, 16, v2
649; GFX9-NEXT:    v_lshl_or_b32 v1, v8, 16, v1
650; GFX9-NEXT:    image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
651; GFX9-NEXT:    s_waitcnt vmcnt(0)
652; GFX9-NEXT:    ; return to shader part epilog
653;
654; GFX10-LABEL: sample_c_d_2d:
655; GFX10:       ; %bb.0: ; %main_body
656; GFX10-NEXT:    v_mov_b32_e32 v7, 0xffff
657; GFX10-NEXT:    v_and_b32_e32 v5, v7, v5
658; GFX10-NEXT:    v_and_b32_e32 v3, v7, v3
659; GFX10-NEXT:    v_and_b32_e32 v1, v7, v1
660; GFX10-NEXT:    v_lshl_or_b32 v5, v6, 16, v5
661; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
662; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
663; GFX10-NEXT:    image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
664; GFX10-NEXT:    s_waitcnt vmcnt(0)
665; GFX10-NEXT:    ; return to shader part epilog
666main_body:
667  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
668  ret <4 x float> %v
669}
670
671define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
672; GFX9-LABEL: sample_d_cl_1d:
673; GFX9:       ; %bb.0: ; %main_body
674; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v2
675; GFX9-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
676; GFX9-NEXT:    image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
677; GFX9-NEXT:    s_waitcnt vmcnt(0)
678; GFX9-NEXT:    ; return to shader part epilog
679;
680; GFX10-LABEL: sample_d_cl_1d:
681; GFX10:       ; %bb.0: ; %main_body
682; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
683; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
684; GFX10-NEXT:    image_sample_d_cl_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
685; GFX10-NEXT:    s_waitcnt vmcnt(0)
686; GFX10-NEXT:    ; return to shader part epilog
687main_body:
688  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
689  ret <4 x float> %v
690}
691
692define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
693; GFX9-LABEL: sample_d_cl_2d:
694; GFX9:       ; %bb.0: ; %main_body
695; GFX9-NEXT:    v_mov_b32_e32 v7, 0xffff
696; GFX9-NEXT:    v_and_b32_e32 v4, v7, v4
697; GFX9-NEXT:    v_and_b32_e32 v2, v7, v2
698; GFX9-NEXT:    v_and_b32_e32 v0, v7, v0
699; GFX9-NEXT:    v_lshl_or_b32 v5, v5, 16, v4
700; GFX9-NEXT:    v_lshl_or_b32 v4, v3, 16, v2
701; GFX9-NEXT:    v_lshl_or_b32 v3, v1, 16, v0
702; GFX9-NEXT:    image_sample_d_cl v[0:3], v[3:6], s[0:7], s[8:11] dmask:0xf a16
703; GFX9-NEXT:    s_waitcnt vmcnt(0)
704; GFX9-NEXT:    ; return to shader part epilog
705;
706; GFX10-LABEL: sample_d_cl_2d:
707; GFX10:       ; %bb.0: ; %main_body
708; GFX10-NEXT:    v_mov_b32_e32 v7, 0xffff
709; GFX10-NEXT:    v_and_b32_e32 v4, v7, v4
710; GFX10-NEXT:    v_and_b32_e32 v2, v7, v2
711; GFX10-NEXT:    v_and_b32_e32 v0, v7, v0
712; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
713; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
714; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
715; GFX10-NEXT:    image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
716; GFX10-NEXT:    s_waitcnt vmcnt(0)
717; GFX10-NEXT:    ; return to shader part epilog
718main_body:
719  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
720  ret <4 x float> %v
721}
722
723define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
724; GFX9-LABEL: sample_c_d_cl_1d:
725; GFX9:       ; %bb.0: ; %main_body
726; GFX9-NEXT:    v_and_b32_e32 v3, 0xffff, v3
727; GFX9-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
728; GFX9-NEXT:    image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
729; GFX9-NEXT:    s_waitcnt vmcnt(0)
730; GFX9-NEXT:    ; return to shader part epilog
731;
732; GFX10-LABEL: sample_c_d_cl_1d:
733; GFX10:       ; %bb.0: ; %main_body
734; GFX10-NEXT:    v_and_b32_e32 v3, 0xffff, v3
735; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
736; GFX10-NEXT:    image_sample_c_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
737; GFX10-NEXT:    s_waitcnt vmcnt(0)
738; GFX10-NEXT:    ; return to shader part epilog
739main_body:
740  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
741  ret <4 x float> %v
742}
743
744define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
745; GFX9-LABEL: sample_c_d_cl_2d:
746; GFX9:       ; %bb.0: ; %main_body
747; GFX9-NEXT:    v_mov_b32_e32 v11, v7
748; GFX9-NEXT:    v_mov_b32_e32 v7, v0
749; GFX9-NEXT:    v_mov_b32_e32 v0, 0xffff
750; GFX9-NEXT:    v_and_b32_e32 v5, v0, v5
751; GFX9-NEXT:    v_and_b32_e32 v3, v0, v3
752; GFX9-NEXT:    v_and_b32_e32 v0, v0, v1
753; GFX9-NEXT:    v_lshl_or_b32 v10, v6, 16, v5
754; GFX9-NEXT:    v_lshl_or_b32 v9, v4, 16, v3
755; GFX9-NEXT:    v_lshl_or_b32 v8, v2, 16, v0
756; GFX9-NEXT:    image_sample_c_d_cl v[0:3], v[7:11], s[0:7], s[8:11] dmask:0xf a16
757; GFX9-NEXT:    s_waitcnt vmcnt(0)
758; GFX9-NEXT:    ; return to shader part epilog
759;
760; GFX10-LABEL: sample_c_d_cl_2d:
761; GFX10:       ; %bb.0: ; %main_body
762; GFX10-NEXT:    v_mov_b32_e32 v8, 0xffff
763; GFX10-NEXT:    v_and_b32_e32 v5, v8, v5
764; GFX10-NEXT:    v_and_b32_e32 v3, v8, v3
765; GFX10-NEXT:    v_and_b32_e32 v1, v8, v1
766; GFX10-NEXT:    v_lshl_or_b32 v5, v6, 16, v5
767; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
768; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
769; GFX10-NEXT:    image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
770; GFX10-NEXT:    s_waitcnt vmcnt(0)
771; GFX10-NEXT:    ; return to shader part epilog
772main_body:
773  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
774  ret <4 x float> %v
775}
776
777define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
778; GFX9-LABEL: sample_cd_1d:
779; GFX9:       ; %bb.0: ; %main_body
780; GFX9-NEXT:    image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
781; GFX9-NEXT:    s_waitcnt vmcnt(0)
782; GFX9-NEXT:    ; return to shader part epilog
783;
784; GFX10-LABEL: sample_cd_1d:
785; GFX10:       ; %bb.0: ; %main_body
786; GFX10-NEXT:    image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
787; GFX10-NEXT:    s_waitcnt vmcnt(0)
788; GFX10-NEXT:    ; return to shader part epilog
789main_body:
790  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
791  ret <4 x float> %v
792}
793
794define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
795; GFX9-LABEL: sample_cd_2d:
796; GFX9:       ; %bb.0: ; %main_body
797; GFX9-NEXT:    v_mov_b32_e32 v6, 0xffff
798; GFX9-NEXT:    v_and_b32_e32 v4, v6, v4
799; GFX9-NEXT:    v_and_b32_e32 v2, v6, v2
800; GFX9-NEXT:    v_and_b32_e32 v0, v6, v0
801; GFX9-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
802; GFX9-NEXT:    v_lshl_or_b32 v3, v3, 16, v2
803; GFX9-NEXT:    v_lshl_or_b32 v2, v1, 16, v0
804; GFX9-NEXT:    image_sample_cd v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf a16
805; GFX9-NEXT:    s_waitcnt vmcnt(0)
806; GFX9-NEXT:    ; return to shader part epilog
807;
808; GFX10-LABEL: sample_cd_2d:
809; GFX10:       ; %bb.0: ; %main_body
810; GFX10-NEXT:    v_mov_b32_e32 v6, 0xffff
811; GFX10-NEXT:    v_and_b32_e32 v4, v6, v4
812; GFX10-NEXT:    v_and_b32_e32 v2, v6, v2
813; GFX10-NEXT:    v_and_b32_e32 v0, v6, v0
814; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
815; GFX10-NEXT:    v_lshl_or_b32 v3, v3, 16, v2
816; GFX10-NEXT:    v_lshl_or_b32 v2, v1, 16, v0
817; GFX10-NEXT:    image_sample_cd_g16 v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
818; GFX10-NEXT:    s_waitcnt vmcnt(0)
819; GFX10-NEXT:    ; return to shader part epilog
820main_body:
821  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
822  ret <4 x float> %v
823}
824
825define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
826; GFX9-LABEL: sample_c_cd_1d:
827; GFX9:       ; %bb.0: ; %main_body
828; GFX9-NEXT:    image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
829; GFX9-NEXT:    s_waitcnt vmcnt(0)
830; GFX9-NEXT:    ; return to shader part epilog
831;
832; GFX10-LABEL: sample_c_cd_1d:
833; GFX10:       ; %bb.0: ; %main_body
834; GFX10-NEXT:    image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
835; GFX10-NEXT:    s_waitcnt vmcnt(0)
836; GFX10-NEXT:    ; return to shader part epilog
837main_body:
838  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
839  ret <4 x float> %v
840}
841
842define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
843; GFX9-LABEL: sample_c_cd_2d:
844; GFX9:       ; %bb.0: ; %main_body
845; GFX9-NEXT:    v_mov_b32_e32 v9, 0xffff
846; GFX9-NEXT:    v_mov_b32_e32 v7, v3
847; GFX9-NEXT:    v_mov_b32_e32 v8, v2
848; GFX9-NEXT:    v_and_b32_e32 v2, v9, v5
849; GFX9-NEXT:    v_lshl_or_b32 v3, v6, 16, v2
850; GFX9-NEXT:    v_and_b32_e32 v2, v9, v7
851; GFX9-NEXT:    v_and_b32_e32 v1, v9, v1
852; GFX9-NEXT:    v_lshl_or_b32 v2, v4, 16, v2
853; GFX9-NEXT:    v_lshl_or_b32 v1, v8, 16, v1
854; GFX9-NEXT:    image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
855; GFX9-NEXT:    s_waitcnt vmcnt(0)
856; GFX9-NEXT:    ; return to shader part epilog
857;
858; GFX10-LABEL: sample_c_cd_2d:
859; GFX10:       ; %bb.0: ; %main_body
860; GFX10-NEXT:    v_mov_b32_e32 v7, 0xffff
861; GFX10-NEXT:    v_and_b32_e32 v5, v7, v5
862; GFX10-NEXT:    v_and_b32_e32 v3, v7, v3
863; GFX10-NEXT:    v_and_b32_e32 v1, v7, v1
864; GFX10-NEXT:    v_lshl_or_b32 v5, v6, 16, v5
865; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
866; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
867; GFX10-NEXT:    image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
868; GFX10-NEXT:    s_waitcnt vmcnt(0)
869; GFX10-NEXT:    ; return to shader part epilog
870main_body:
871  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
872  ret <4 x float> %v
873}
874
875define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
876; GFX9-LABEL: sample_cd_cl_1d:
877; GFX9:       ; %bb.0: ; %main_body
878; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v2
879; GFX9-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
880; GFX9-NEXT:    image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
881; GFX9-NEXT:    s_waitcnt vmcnt(0)
882; GFX9-NEXT:    ; return to shader part epilog
883;
884; GFX10-LABEL: sample_cd_cl_1d:
885; GFX10:       ; %bb.0: ; %main_body
886; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
887; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
888; GFX10-NEXT:    image_sample_cd_cl_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
889; GFX10-NEXT:    s_waitcnt vmcnt(0)
890; GFX10-NEXT:    ; return to shader part epilog
891main_body:
892  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
893  ret <4 x float> %v
894}
895
896define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
897; GFX9-LABEL: sample_cd_cl_2d:
898; GFX9:       ; %bb.0: ; %main_body
899; GFX9-NEXT:    v_mov_b32_e32 v7, 0xffff
900; GFX9-NEXT:    v_and_b32_e32 v4, v7, v4
901; GFX9-NEXT:    v_and_b32_e32 v2, v7, v2
902; GFX9-NEXT:    v_and_b32_e32 v0, v7, v0
903; GFX9-NEXT:    v_lshl_or_b32 v5, v5, 16, v4
904; GFX9-NEXT:    v_lshl_or_b32 v4, v3, 16, v2
905; GFX9-NEXT:    v_lshl_or_b32 v3, v1, 16, v0
906; GFX9-NEXT:    image_sample_cd_cl v[0:3], v[3:6], s[0:7], s[8:11] dmask:0xf a16
907; GFX9-NEXT:    s_waitcnt vmcnt(0)
908; GFX9-NEXT:    ; return to shader part epilog
909;
910; GFX10-LABEL: sample_cd_cl_2d:
911; GFX10:       ; %bb.0: ; %main_body
912; GFX10-NEXT:    v_mov_b32_e32 v7, 0xffff
913; GFX10-NEXT:    v_and_b32_e32 v4, v7, v4
914; GFX10-NEXT:    v_and_b32_e32 v2, v7, v2
915; GFX10-NEXT:    v_and_b32_e32 v0, v7, v0
916; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
917; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
918; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
919; GFX10-NEXT:    image_sample_cd_cl_g16 v[0:3], [v0, v2, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
920; GFX10-NEXT:    s_waitcnt vmcnt(0)
921; GFX10-NEXT:    ; return to shader part epilog
922main_body:
923  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
924  ret <4 x float> %v
925}
926
927define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
928; GFX9-LABEL: sample_c_cd_cl_1d:
929; GFX9:       ; %bb.0: ; %main_body
930; GFX9-NEXT:    v_and_b32_e32 v3, 0xffff, v3
931; GFX9-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
932; GFX9-NEXT:    image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
933; GFX9-NEXT:    s_waitcnt vmcnt(0)
934; GFX9-NEXT:    ; return to shader part epilog
935;
936; GFX10-LABEL: sample_c_cd_cl_1d:
937; GFX10:       ; %bb.0: ; %main_body
938; GFX10-NEXT:    v_and_b32_e32 v3, 0xffff, v3
939; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
940; GFX10-NEXT:    image_sample_c_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
941; GFX10-NEXT:    s_waitcnt vmcnt(0)
942; GFX10-NEXT:    ; return to shader part epilog
943main_body:
944  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
945  ret <4 x float> %v
946}
947
948define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
949; GFX9-LABEL: sample_c_cd_cl_2d:
950; GFX9:       ; %bb.0: ; %main_body
951; GFX9-NEXT:    v_mov_b32_e32 v11, v7
952; GFX9-NEXT:    v_mov_b32_e32 v7, v0
953; GFX9-NEXT:    v_mov_b32_e32 v0, 0xffff
954; GFX9-NEXT:    v_and_b32_e32 v5, v0, v5
955; GFX9-NEXT:    v_and_b32_e32 v3, v0, v3
956; GFX9-NEXT:    v_and_b32_e32 v0, v0, v1
957; GFX9-NEXT:    v_lshl_or_b32 v10, v6, 16, v5
958; GFX9-NEXT:    v_lshl_or_b32 v9, v4, 16, v3
959; GFX9-NEXT:    v_lshl_or_b32 v8, v2, 16, v0
960; GFX9-NEXT:    image_sample_c_cd_cl v[0:3], v[7:11], s[0:7], s[8:11] dmask:0xf a16
961; GFX9-NEXT:    s_waitcnt vmcnt(0)
962; GFX9-NEXT:    ; return to shader part epilog
963;
964; GFX10-LABEL: sample_c_cd_cl_2d:
965; GFX10:       ; %bb.0: ; %main_body
966; GFX10-NEXT:    v_mov_b32_e32 v8, 0xffff
967; GFX10-NEXT:    v_and_b32_e32 v5, v8, v5
968; GFX10-NEXT:    v_and_b32_e32 v3, v8, v3
969; GFX10-NEXT:    v_and_b32_e32 v1, v8, v1
970; GFX10-NEXT:    v_lshl_or_b32 v5, v6, 16, v5
971; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
972; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
973; GFX10-NEXT:    image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
974; GFX10-NEXT:    s_waitcnt vmcnt(0)
975; GFX10-NEXT:    ; return to shader part epilog
976main_body:
977  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
978  ret <4 x float> %v
979}
980
981define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) {
982; GFX9-LABEL: sample_l_1d:
983; GFX9:       ; %bb.0: ; %main_body
984; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
985; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
986; GFX9-NEXT:    image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
987; GFX9-NEXT:    s_waitcnt vmcnt(0)
988; GFX9-NEXT:    ; return to shader part epilog
989;
990; GFX10-LABEL: sample_l_1d:
991; GFX10:       ; %bb.0: ; %main_body
992; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
993; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
994; GFX10-NEXT:    image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
995; GFX10-NEXT:    s_waitcnt vmcnt(0)
996; GFX10-NEXT:    ; return to shader part epilog
997main_body:
998  %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
999  ret <4 x float> %v
1000}
1001
1002define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
1003; GFX9-LABEL: sample_l_2d:
1004; GFX9:       ; %bb.0: ; %main_body
1005; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1006; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
1007; GFX9-NEXT:    image_sample_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16
1008; GFX9-NEXT:    s_waitcnt vmcnt(0)
1009; GFX9-NEXT:    ; return to shader part epilog
1010;
1011; GFX10-LABEL: sample_l_2d:
1012; GFX10:       ; %bb.0: ; %main_body
1013; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1014; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
1015; GFX10-NEXT:    image_sample_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
1016; GFX10-NEXT:    s_waitcnt vmcnt(0)
1017; GFX10-NEXT:    ; return to shader part epilog
1018main_body:
1019  %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1020  ret <4 x float> %v
1021}
1022
1023define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) {
1024; GFX9-LABEL: sample_c_l_1d:
1025; GFX9:       ; %bb.0: ; %main_body
1026; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1027; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
1028; GFX9-NEXT:    image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
1029; GFX9-NEXT:    s_waitcnt vmcnt(0)
1030; GFX9-NEXT:    ; return to shader part epilog
1031;
1032; GFX10-LABEL: sample_c_l_1d:
1033; GFX10:       ; %bb.0: ; %main_body
1034; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1035; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
1036; GFX10-NEXT:    image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
1037; GFX10-NEXT:    s_waitcnt vmcnt(0)
1038; GFX10-NEXT:    ; return to shader part epilog
1039main_body:
1040  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1041  ret <4 x float> %v
1042}
1043
1044define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
1045; GFX9-LABEL: sample_c_l_2d:
1046; GFX9:       ; %bb.0: ; %main_body
1047; GFX9-NEXT:    v_mov_b32_e32 v5, v3
1048; GFX9-NEXT:    v_mov_b32_e32 v3, v0
1049; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v1
1050; GFX9-NEXT:    v_lshl_or_b32 v4, v2, 16, v0
1051; GFX9-NEXT:    image_sample_c_l v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16
1052; GFX9-NEXT:    s_waitcnt vmcnt(0)
1053; GFX9-NEXT:    ; return to shader part epilog
1054;
1055; GFX10-LABEL: sample_c_l_2d:
1056; GFX10:       ; %bb.0: ; %main_body
1057; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1058; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
1059; GFX10-NEXT:    image_sample_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
1060; GFX10-NEXT:    s_waitcnt vmcnt(0)
1061; GFX10-NEXT:    ; return to shader part epilog
1062main_body:
1063  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1064  ret <4 x float> %v
1065}
1066
1067define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
1068; GFX9-LABEL: sample_lz_1d:
1069; GFX9:       ; %bb.0: ; %main_body
1070; GFX9-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
1071; GFX9-NEXT:    s_waitcnt vmcnt(0)
1072; GFX9-NEXT:    ; return to shader part epilog
1073;
1074; GFX10-LABEL: sample_lz_1d:
1075; GFX10:       ; %bb.0: ; %main_body
1076; GFX10-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
1077; GFX10-NEXT:    s_waitcnt vmcnt(0)
1078; GFX10-NEXT:    ; return to shader part epilog
1079main_body:
1080  %v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1081  ret <4 x float> %v
1082}
1083
1084define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
1085; GFX9-LABEL: sample_lz_2d:
1086; GFX9:       ; %bb.0: ; %main_body
1087; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1088; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
1089; GFX9-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
1090; GFX9-NEXT:    s_waitcnt vmcnt(0)
1091; GFX9-NEXT:    ; return to shader part epilog
1092;
1093; GFX10-LABEL: sample_lz_2d:
1094; GFX10:       ; %bb.0: ; %main_body
1095; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1096; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
1097; GFX10-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
1098; GFX10-NEXT:    s_waitcnt vmcnt(0)
1099; GFX10-NEXT:    ; return to shader part epilog
1100main_body:
1101  %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1102  ret <4 x float> %v
1103}
1104
1105define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
1106; GFX9-LABEL: sample_c_lz_1d:
1107; GFX9:       ; %bb.0: ; %main_body
1108; GFX9-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
1109; GFX9-NEXT:    s_waitcnt vmcnt(0)
1110; GFX9-NEXT:    ; return to shader part epilog
1111;
1112; GFX10-LABEL: sample_c_lz_1d:
1113; GFX10:       ; %bb.0: ; %main_body
1114; GFX10-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
1115; GFX10-NEXT:    s_waitcnt vmcnt(0)
1116; GFX10-NEXT:    ; return to shader part epilog
1117main_body:
1118  %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1119  ret <4 x float> %v
1120}
1121
1122define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
1123; GFX9-LABEL: sample_c_lz_2d:
1124; GFX9:       ; %bb.0: ; %main_body
1125; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1126; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
1127; GFX9-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
1128; GFX9-NEXT:    s_waitcnt vmcnt(0)
1129; GFX9-NEXT:    ; return to shader part epilog
1130;
1131; GFX10-LABEL: sample_c_lz_2d:
1132; GFX10:       ; %bb.0: ; %main_body
1133; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1134; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
1135; GFX10-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
1136; GFX10-NEXT:    s_waitcnt vmcnt(0)
1137; GFX10-NEXT:    ; return to shader part epilog
1138main_body:
1139  %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1140  ret <4 x float> %v
1141}
1142
1143define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
1144; GFX9-LABEL: sample_c_d_o_2darray_V1:
1145; GFX9:       ; %bb.0: ; %main_body
1146; GFX9-NEXT:    v_mov_b32_e32 v13, v8
1147; GFX9-NEXT:    v_mov_b32_e32 v8, v0
1148; GFX9-NEXT:    v_mov_b32_e32 v0, 0xffff
1149; GFX9-NEXT:    v_mov_b32_e32 v9, v1
1150; GFX9-NEXT:    v_and_b32_e32 v1, v0, v6
1151; GFX9-NEXT:    v_lshl_or_b32 v12, v7, 16, v1
1152; GFX9-NEXT:    v_and_b32_e32 v1, v0, v4
1153; GFX9-NEXT:    v_and_b32_e32 v0, v0, v2
1154; GFX9-NEXT:    v_lshl_or_b32 v11, v5, 16, v1
1155; GFX9-NEXT:    v_lshl_or_b32 v10, v3, 16, v0
1156; GFX9-NEXT:    image_sample_c_d_o v0, v[8:13], s[0:7], s[8:11] dmask:0x4 a16 da
1157; GFX9-NEXT:    s_waitcnt vmcnt(0)
1158; GFX9-NEXT:    ; return to shader part epilog
1159;
1160; GFX10-LABEL: sample_c_d_o_2darray_V1:
1161; GFX10:       ; %bb.0: ; %main_body
1162; GFX10-NEXT:    v_mov_b32_e32 v13, v8
1163; GFX10-NEXT:    v_mov_b32_e32 v8, v0
1164; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffff
1165; GFX10-NEXT:    v_mov_b32_e32 v9, v1
1166; GFX10-NEXT:    v_and_b32_e32 v1, v0, v6
1167; GFX10-NEXT:    v_and_b32_e32 v4, v0, v4
1168; GFX10-NEXT:    v_and_b32_e32 v0, v0, v2
1169; GFX10-NEXT:    v_lshl_or_b32 v12, v7, 16, v1
1170; GFX10-NEXT:    v_lshl_or_b32 v11, v5, 16, v4
1171; GFX10-NEXT:    v_lshl_or_b32 v10, v3, 16, v0
1172; GFX10-NEXT:    image_sample_c_d_o_g16 v0, v[8:13], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
1173; GFX10-NEXT:    s_waitcnt vmcnt(0)
1174; GFX10-NEXT:    ; return to shader part epilog
1175main_body:
1176  %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1177  ret float %v
1178}
1179
1180define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
1181; GFX9-LABEL: sample_c_d_o_2darray_V2:
1182; GFX9:       ; %bb.0: ; %main_body
1183; GFX9-NEXT:    v_mov_b32_e32 v13, v8
1184; GFX9-NEXT:    v_mov_b32_e32 v8, v0
1185; GFX9-NEXT:    v_mov_b32_e32 v0, 0xffff
1186; GFX9-NEXT:    v_mov_b32_e32 v9, v1
1187; GFX9-NEXT:    v_and_b32_e32 v1, v0, v6
1188; GFX9-NEXT:    v_lshl_or_b32 v12, v7, 16, v1
1189; GFX9-NEXT:    v_and_b32_e32 v1, v0, v4
1190; GFX9-NEXT:    v_and_b32_e32 v0, v0, v2
1191; GFX9-NEXT:    v_lshl_or_b32 v11, v5, 16, v1
1192; GFX9-NEXT:    v_lshl_or_b32 v10, v3, 16, v0
1193; GFX9-NEXT:    image_sample_c_d_o v[0:1], v[8:13], s[0:7], s[8:11] dmask:0x6 a16 da
1194; GFX9-NEXT:    s_waitcnt vmcnt(0)
1195; GFX9-NEXT:    ; return to shader part epilog
1196;
1197; GFX10-LABEL: sample_c_d_o_2darray_V2:
1198; GFX10:       ; %bb.0: ; %main_body
1199; GFX10-NEXT:    v_mov_b32_e32 v13, v8
1200; GFX10-NEXT:    v_mov_b32_e32 v8, v0
1201; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffff
1202; GFX10-NEXT:    v_mov_b32_e32 v9, v1
1203; GFX10-NEXT:    v_and_b32_e32 v1, v0, v6
1204; GFX10-NEXT:    v_and_b32_e32 v4, v0, v4
1205; GFX10-NEXT:    v_and_b32_e32 v0, v0, v2
1206; GFX10-NEXT:    v_lshl_or_b32 v12, v7, 16, v1
1207; GFX10-NEXT:    v_lshl_or_b32 v11, v5, 16, v4
1208; GFX10-NEXT:    v_lshl_or_b32 v10, v3, 16, v0
1209; GFX10-NEXT:    image_sample_c_d_o_g16 v[0:1], v[8:13], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
1210; GFX10-NEXT:    s_waitcnt vmcnt(0)
1211; GFX10-NEXT:    ; return to shader part epilog
1212main_body:
1213  %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1214  ret <2 x float> %v
1215}
1216
1217declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1218declare <8 x float> @llvm.amdgcn.image.sample.1d.v8f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1219declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1220declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1221declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1222declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1223declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1224
1225declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1226declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1227declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1228declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1229declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1230declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1231
1232declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1233declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1234declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32, half, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1235declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1236declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1237declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1238declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1239declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32, half, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1240
1241declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1242declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1243declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1244declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1245declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1246declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1247declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1248declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1249declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1250
1251declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1252declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1253declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1254declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1255declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1256declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1257declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1258declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1259
1260declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1261declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1262declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1263declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1264
1265declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1266declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1267declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1268declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1269
1270declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1271declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1272
1273attributes #0 = { nounwind }
1274attributes #1 = { nounwind readonly }
1275attributes #2 = { nounwind readnone }
1276