1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=VERDE %s
3; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s
4; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
5; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
6
7define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
8; VERDE-LABEL: sample_1d:
9; VERDE:       ; %bb.0: ; %main_body
10; VERDE-NEXT:    s_mov_b64 s[12:13], exec
11; VERDE-NEXT:    s_wqm_b64 exec, exec
12; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
13; VERDE-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
14; VERDE-NEXT:    s_waitcnt vmcnt(0)
15; VERDE-NEXT:    ; return to shader part epilog
16;
17; GFX6789-LABEL: sample_1d:
18; GFX6789:       ; %bb.0: ; %main_body
19; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
20; GFX6789-NEXT:    s_wqm_b64 exec, exec
21; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
22; GFX6789-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
23; GFX6789-NEXT:    s_waitcnt vmcnt(0)
24; GFX6789-NEXT:    ; return to shader part epilog
25;
26; GFX10PLUS-LABEL: sample_1d:
27; GFX10PLUS:       ; %bb.0: ; %main_body
28; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
29; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
30; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
31; GFX10PLUS-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
32; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
33; GFX10PLUS-NEXT:    ; return to shader part epilog
34main_body:
35  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
36  ret <4 x float> %v
37}
38
39define amdgpu_ps <4 x float> @sample_1d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) {
40; VERDE-LABEL: sample_1d_tfe:
41; VERDE:       ; %bb.0: ; %main_body
42; VERDE-NEXT:    s_mov_b64 s[14:15], exec
43; VERDE-NEXT:    s_wqm_b64 exec, exec
44; VERDE-NEXT:    v_mov_b32_e32 v5, v0
45; VERDE-NEXT:    v_mov_b32_e32 v0, 0
46; VERDE-NEXT:    v_mov_b32_e32 v1, v0
47; VERDE-NEXT:    v_mov_b32_e32 v2, v0
48; VERDE-NEXT:    v_mov_b32_e32 v3, v0
49; VERDE-NEXT:    v_mov_b32_e32 v4, v0
50; VERDE-NEXT:    s_and_b64 exec, exec, s[14:15]
51; VERDE-NEXT:    image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe
52; VERDE-NEXT:    s_mov_b32 s15, 0xf000
53; VERDE-NEXT:    s_mov_b32 s14, -1
54; VERDE-NEXT:    s_waitcnt vmcnt(0)
55; VERDE-NEXT:    buffer_store_dword v4, off, s[12:15], 0
56; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
57; VERDE-NEXT:    ; return to shader part epilog
58;
59; GFX6789-LABEL: sample_1d_tfe:
60; GFX6789:       ; %bb.0: ; %main_body
61; GFX6789-NEXT:    s_mov_b64 s[14:15], exec
62; GFX6789-NEXT:    s_wqm_b64 exec, exec
63; GFX6789-NEXT:    v_mov_b32_e32 v6, 0
64; GFX6789-NEXT:    v_mov_b32_e32 v5, v0
65; GFX6789-NEXT:    v_mov_b32_e32 v7, v6
66; GFX6789-NEXT:    v_mov_b32_e32 v8, v6
67; GFX6789-NEXT:    v_mov_b32_e32 v9, v6
68; GFX6789-NEXT:    v_mov_b32_e32 v10, v6
69; GFX6789-NEXT:    v_mov_b32_e32 v0, v6
70; GFX6789-NEXT:    v_mov_b32_e32 v1, v7
71; GFX6789-NEXT:    v_mov_b32_e32 v2, v8
72; GFX6789-NEXT:    v_mov_b32_e32 v3, v9
73; GFX6789-NEXT:    v_mov_b32_e32 v4, v10
74; GFX6789-NEXT:    s_and_b64 exec, exec, s[14:15]
75; GFX6789-NEXT:    image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe
76; GFX6789-NEXT:    s_waitcnt vmcnt(0)
77; GFX6789-NEXT:    global_store_dword v6, v4, s[12:13]
78; GFX6789-NEXT:    s_waitcnt vmcnt(0)
79; GFX6789-NEXT:    ; return to shader part epilog
80;
81; GFX10-LABEL: sample_1d_tfe:
82; GFX10:       ; %bb.0: ; %main_body
83; GFX10-NEXT:    s_mov_b32 s14, exec_lo
84; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
85; GFX10-NEXT:    v_mov_b32_e32 v6, 0
86; GFX10-NEXT:    v_mov_b32_e32 v5, v0
87; GFX10-NEXT:    v_mov_b32_e32 v7, v6
88; GFX10-NEXT:    v_mov_b32_e32 v8, v6
89; GFX10-NEXT:    v_mov_b32_e32 v9, v6
90; GFX10-NEXT:    v_mov_b32_e32 v10, v6
91; GFX10-NEXT:    v_mov_b32_e32 v0, v6
92; GFX10-NEXT:    v_mov_b32_e32 v1, v7
93; GFX10-NEXT:    v_mov_b32_e32 v2, v8
94; GFX10-NEXT:    v_mov_b32_e32 v3, v9
95; GFX10-NEXT:    v_mov_b32_e32 v4, v10
96; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s14
97; GFX10-NEXT:    image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe
98; GFX10-NEXT:    s_waitcnt vmcnt(0)
99; GFX10-NEXT:    global_store_dword v6, v4, s[12:13]
100; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
101; GFX10-NEXT:    ; return to shader part epilog
102;
103; GFX11-LABEL: sample_1d_tfe:
104; GFX11:       ; %bb.0: ; %main_body
105; GFX11-NEXT:    s_mov_b32 s14, exec_lo
106; GFX11-NEXT:    s_wqm_b32 exec_lo, exec_lo
107; GFX11-NEXT:    v_mov_b32_e32 v6, 0
108; GFX11-NEXT:    v_mov_b32_e32 v5, v0
109; GFX11-NEXT:    v_mov_b32_e32 v7, v6
110; GFX11-NEXT:    v_mov_b32_e32 v8, v6
111; GFX11-NEXT:    v_mov_b32_e32 v9, v6
112; GFX11-NEXT:    v_mov_b32_e32 v10, v6
113; GFX11-NEXT:    v_mov_b32_e32 v0, v6
114; GFX11-NEXT:    v_mov_b32_e32 v1, v7
115; GFX11-NEXT:    v_mov_b32_e32 v2, v8
116; GFX11-NEXT:    v_mov_b32_e32 v3, v9
117; GFX11-NEXT:    v_mov_b32_e32 v4, v10
118; GFX11-NEXT:    s_and_b32 exec_lo, exec_lo, s14
119; GFX11-NEXT:    image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe
120; GFX11-NEXT:    s_waitcnt vmcnt(0)
121; GFX11-NEXT:    global_store_b32 v6, v4, s[12:13]
122; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
123; GFX11-NEXT:    ; return to shader part epilog
124main_body:
125  %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
126  %v.vec = extractvalue {<4 x float>, i32} %v, 0
127  %v.err = extractvalue {<4 x float>, i32} %v, 1
128  store i32 %v.err, i32 addrspace(1)* %out, align 4
129  ret <4 x float> %v.vec
130}
131
132define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) {
133; VERDE-LABEL: sample_1d_tfe_adjust_writemask_1:
134; VERDE:       ; %bb.0: ; %main_body
135; VERDE-NEXT:    s_mov_b64 s[12:13], exec
136; VERDE-NEXT:    s_wqm_b64 exec, exec
137; VERDE-NEXT:    v_mov_b32_e32 v2, v0
138; VERDE-NEXT:    v_mov_b32_e32 v0, 0
139; VERDE-NEXT:    v_mov_b32_e32 v1, v0
140; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
141; VERDE-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe
142; VERDE-NEXT:    s_waitcnt vmcnt(0)
143; VERDE-NEXT:    ; return to shader part epilog
144;
145; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_1:
146; GFX6789:       ; %bb.0: ; %main_body
147; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
148; GFX6789-NEXT:    s_wqm_b64 exec, exec
149; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
150; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
151; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
152; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
153; GFX6789-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe
154; GFX6789-NEXT:    s_waitcnt vmcnt(0)
155; GFX6789-NEXT:    ; return to shader part epilog
156;
157; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_1:
158; GFX10PLUS:       ; %bb.0: ; %main_body
159; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
160; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
161; GFX10PLUS-NEXT:    v_mov_b32_e32 v2, v0
162; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, 0
163; GFX10PLUS-NEXT:    v_mov_b32_e32 v1, v0
164; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
165; GFX10PLUS-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
166; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
167; GFX10PLUS-NEXT:    ; return to shader part epilog
168main_body:
169  %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
170  %res.vec = extractvalue {<4 x float>,i32} %v, 0
171  %res.f = extractelement <4 x float> %res.vec, i32 0
172  %res.err = extractvalue {<4 x float>,i32} %v, 1
173  %res.errf = bitcast i32 %res.err to float
174  %res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
175  %res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
176  ret <2 x float> %res
177}
178
179define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
180; VERDE-LABEL: sample_1d_tfe_adjust_writemask_2:
181; VERDE:       ; %bb.0: ; %main_body
182; VERDE-NEXT:    s_mov_b64 s[12:13], exec
183; VERDE-NEXT:    s_wqm_b64 exec, exec
184; VERDE-NEXT:    v_mov_b32_e32 v2, v0
185; VERDE-NEXT:    v_mov_b32_e32 v0, 0
186; VERDE-NEXT:    v_mov_b32_e32 v1, v0
187; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
188; VERDE-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe
189; VERDE-NEXT:    s_waitcnt vmcnt(0)
190; VERDE-NEXT:    ; return to shader part epilog
191;
192; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_2:
193; GFX6789:       ; %bb.0: ; %main_body
194; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
195; GFX6789-NEXT:    s_wqm_b64 exec, exec
196; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
197; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
198; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
199; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
200; GFX6789-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe
201; GFX6789-NEXT:    s_waitcnt vmcnt(0)
202; GFX6789-NEXT:    ; return to shader part epilog
203;
204; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_2:
205; GFX10PLUS:       ; %bb.0: ; %main_body
206; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
207; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
208; GFX10PLUS-NEXT:    v_mov_b32_e32 v2, v0
209; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, 0
210; GFX10PLUS-NEXT:    v_mov_b32_e32 v1, v0
211; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
212; GFX10PLUS-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_1D tfe
213; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
214; GFX10PLUS-NEXT:    ; return to shader part epilog
215main_body:
216  %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
217  %res.vec = extractvalue {<4 x float>,i32} %v, 0
218  %res.f = extractelement <4 x float> %res.vec, i32 1
219  %res.err = extractvalue {<4 x float>,i32} %v, 1
220  %res.errf = bitcast i32 %res.err to float
221  %res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
222  %res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
223  ret <2 x float> %res
224}
225
226define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
227; VERDE-LABEL: sample_1d_tfe_adjust_writemask_3:
228; VERDE:       ; %bb.0: ; %main_body
229; VERDE-NEXT:    s_mov_b64 s[12:13], exec
230; VERDE-NEXT:    s_wqm_b64 exec, exec
231; VERDE-NEXT:    v_mov_b32_e32 v2, v0
232; VERDE-NEXT:    v_mov_b32_e32 v0, 0
233; VERDE-NEXT:    v_mov_b32_e32 v1, v0
234; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
235; VERDE-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe
236; VERDE-NEXT:    s_waitcnt vmcnt(0)
237; VERDE-NEXT:    ; return to shader part epilog
238;
239; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_3:
240; GFX6789:       ; %bb.0: ; %main_body
241; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
242; GFX6789-NEXT:    s_wqm_b64 exec, exec
243; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
244; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
245; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
246; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
247; GFX6789-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe
248; GFX6789-NEXT:    s_waitcnt vmcnt(0)
249; GFX6789-NEXT:    ; return to shader part epilog
250;
251; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_3:
252; GFX10PLUS:       ; %bb.0: ; %main_body
253; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
254; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
255; GFX10PLUS-NEXT:    v_mov_b32_e32 v2, v0
256; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, 0
257; GFX10PLUS-NEXT:    v_mov_b32_e32 v1, v0
258; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
259; GFX10PLUS-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_1D tfe
260; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
261; GFX10PLUS-NEXT:    ; return to shader part epilog
262main_body:
263  %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
264  %res.vec = extractvalue {<4 x float>,i32} %v, 0
265  %res.f = extractelement <4 x float> %res.vec, i32 2
266  %res.err = extractvalue {<4 x float>,i32} %v, 1
267  %res.errf = bitcast i32 %res.err to float
268  %res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
269  %res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
270  ret <2 x float> %res
271}
272
273define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
274; VERDE-LABEL: sample_1d_tfe_adjust_writemask_4:
275; VERDE:       ; %bb.0: ; %main_body
276; VERDE-NEXT:    s_mov_b64 s[12:13], exec
277; VERDE-NEXT:    s_wqm_b64 exec, exec
278; VERDE-NEXT:    v_mov_b32_e32 v2, v0
279; VERDE-NEXT:    v_mov_b32_e32 v0, 0
280; VERDE-NEXT:    v_mov_b32_e32 v1, v0
281; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
282; VERDE-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe
283; VERDE-NEXT:    s_waitcnt vmcnt(0)
284; VERDE-NEXT:    ; return to shader part epilog
285;
286; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_4:
287; GFX6789:       ; %bb.0: ; %main_body
288; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
289; GFX6789-NEXT:    s_wqm_b64 exec, exec
290; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
291; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
292; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
293; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
294; GFX6789-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe
295; GFX6789-NEXT:    s_waitcnt vmcnt(0)
296; GFX6789-NEXT:    ; return to shader part epilog
297;
298; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_4:
299; GFX10PLUS:       ; %bb.0: ; %main_body
300; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
301; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
302; GFX10PLUS-NEXT:    v_mov_b32_e32 v2, v0
303; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, 0
304; GFX10PLUS-NEXT:    v_mov_b32_e32 v1, v0
305; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
306; GFX10PLUS-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_1D tfe
307; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
308; GFX10PLUS-NEXT:    ; return to shader part epilog
309main_body:
310  %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
311  %res.vec = extractvalue {<4 x float>,i32} %v, 0
312  %res.f = extractelement <4 x float> %res.vec, i32 3
313  %res.err = extractvalue {<4 x float>,i32} %v, 1
314  %res.errf = bitcast i32 %res.err to float
315  %res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
316  %res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
317  ret <2 x float> %res
318}
319
320define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
321; VERDE-LABEL: sample_1d_tfe_adjust_writemask_12:
322; VERDE:       ; %bb.0: ; %main_body
323; VERDE-NEXT:    s_mov_b64 s[12:13], exec
324; VERDE-NEXT:    s_wqm_b64 exec, exec
325; VERDE-NEXT:    v_mov_b32_e32 v3, v0
326; VERDE-NEXT:    v_mov_b32_e32 v0, 0
327; VERDE-NEXT:    v_mov_b32_e32 v1, v0
328; VERDE-NEXT:    v_mov_b32_e32 v2, v0
329; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
330; VERDE-NEXT:    image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe
331; VERDE-NEXT:    s_waitcnt vmcnt(0)
332; VERDE-NEXT:    ; return to shader part epilog
333;
334; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_12:
335; GFX6789:       ; %bb.0: ; %main_body
336; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
337; GFX6789-NEXT:    s_wqm_b64 exec, exec
338; GFX6789-NEXT:    v_mov_b32_e32 v3, v0
339; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
340; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
341; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
342; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
343; GFX6789-NEXT:    image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe
344; GFX6789-NEXT:    s_waitcnt vmcnt(0)
345; GFX6789-NEXT:    ; return to shader part epilog
346;
347; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_12:
348; GFX10PLUS:       ; %bb.0: ; %main_body
349; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
350; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
351; GFX10PLUS-NEXT:    v_mov_b32_e32 v3, v0
352; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, 0
353; GFX10PLUS-NEXT:    v_mov_b32_e32 v1, v0
354; GFX10PLUS-NEXT:    v_mov_b32_e32 v2, v0
355; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
356; GFX10PLUS-NEXT:    image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
357; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
358; GFX10PLUS-NEXT:    ; return to shader part epilog
359main_body:
360  %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
361  %res.vec = extractvalue {<4 x float>,i32} %v, 0
362  %res.f1 = extractelement <4 x float> %res.vec, i32 0
363  %res.f2 = extractelement <4 x float> %res.vec, i32 1
364  %res.err = extractvalue {<4 x float>,i32} %v, 1
365  %res.errf = bitcast i32 %res.err to float
366  %res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0
367  %res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1
368  %res = insertelement <4 x float> %res.tmp2, float %res.errf, i32 2
369  ret <4 x float> %res
370}
371
372define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_24(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
373; VERDE-LABEL: sample_1d_tfe_adjust_writemask_24:
374; VERDE:       ; %bb.0: ; %main_body
375; VERDE-NEXT:    s_mov_b64 s[12:13], exec
376; VERDE-NEXT:    s_wqm_b64 exec, exec
377; VERDE-NEXT:    v_mov_b32_e32 v3, v0
378; VERDE-NEXT:    v_mov_b32_e32 v0, 0
379; VERDE-NEXT:    v_mov_b32_e32 v1, v0
380; VERDE-NEXT:    v_mov_b32_e32 v2, v0
381; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
382; VERDE-NEXT:    image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe
383; VERDE-NEXT:    s_waitcnt vmcnt(0)
384; VERDE-NEXT:    ; return to shader part epilog
385;
386; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_24:
387; GFX6789:       ; %bb.0: ; %main_body
388; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
389; GFX6789-NEXT:    s_wqm_b64 exec, exec
390; GFX6789-NEXT:    v_mov_b32_e32 v3, v0
391; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
392; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
393; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
394; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
395; GFX6789-NEXT:    image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe
396; GFX6789-NEXT:    s_waitcnt vmcnt(0)
397; GFX6789-NEXT:    ; return to shader part epilog
398;
399; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_24:
400; GFX10PLUS:       ; %bb.0: ; %main_body
401; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
402; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
403; GFX10PLUS-NEXT:    v_mov_b32_e32 v3, v0
404; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, 0
405; GFX10PLUS-NEXT:    v_mov_b32_e32 v1, v0
406; GFX10PLUS-NEXT:    v_mov_b32_e32 v2, v0
407; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
408; GFX10PLUS-NEXT:    image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D tfe
409; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
410; GFX10PLUS-NEXT:    ; return to shader part epilog
411main_body:
412  %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
413  %res.vec = extractvalue {<4 x float>,i32} %v, 0
414  %res.f1 = extractelement <4 x float> %res.vec, i32 1
415  %res.f2 = extractelement <4 x float> %res.vec, i32 3
416  %res.err = extractvalue {<4 x float>,i32} %v, 1
417  %res.errf = bitcast i32 %res.err to float
418  %res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0
419  %res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1
420  %res = insertelement <4 x float> %res.tmp2, float %res.errf, i32 2
421  ret <4 x float> %res
422}
423
424define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_134(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
425; VERDE-LABEL: sample_1d_tfe_adjust_writemask_134:
426; VERDE:       ; %bb.0: ; %main_body
427; VERDE-NEXT:    s_mov_b64 s[12:13], exec
428; VERDE-NEXT:    s_wqm_b64 exec, exec
429; VERDE-NEXT:    v_mov_b32_e32 v4, v0
430; VERDE-NEXT:    v_mov_b32_e32 v0, 0
431; VERDE-NEXT:    v_mov_b32_e32 v1, v0
432; VERDE-NEXT:    v_mov_b32_e32 v2, v0
433; VERDE-NEXT:    v_mov_b32_e32 v3, v0
434; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
435; VERDE-NEXT:    image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe
436; VERDE-NEXT:    s_waitcnt vmcnt(0)
437; VERDE-NEXT:    ; return to shader part epilog
438;
439; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_134:
440; GFX6789:       ; %bb.0: ; %main_body
441; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
442; GFX6789-NEXT:    s_wqm_b64 exec, exec
443; GFX6789-NEXT:    v_mov_b32_e32 v4, v0
444; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
445; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
446; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
447; GFX6789-NEXT:    v_mov_b32_e32 v3, v0
448; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
449; GFX6789-NEXT:    image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe
450; GFX6789-NEXT:    s_waitcnt vmcnt(0)
451; GFX6789-NEXT:    ; return to shader part epilog
452;
453; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_134:
454; GFX10PLUS:       ; %bb.0: ; %main_body
455; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
456; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
457; GFX10PLUS-NEXT:    v_mov_b32_e32 v4, v0
458; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, 0
459; GFX10PLUS-NEXT:    v_mov_b32_e32 v1, v0
460; GFX10PLUS-NEXT:    v_mov_b32_e32 v2, v0
461; GFX10PLUS-NEXT:    v_mov_b32_e32 v3, v0
462; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
463; GFX10PLUS-NEXT:    image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd dim:SQ_RSRC_IMG_1D tfe
464; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
465; GFX10PLUS-NEXT:    ; return to shader part epilog
466main_body:
467  %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
468  %res.vec = extractvalue {<4 x float>,i32} %v, 0
469  %res.f1 = extractelement <4 x float> %res.vec, i32 0
470  %res.f2 = extractelement <4 x float> %res.vec, i32 2
471  %res.f3 = extractelement <4 x float> %res.vec, i32 3
472  %res.err = extractvalue {<4 x float>,i32} %v, 1
473  %res.errf = bitcast i32 %res.err to float
474  %res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0
475  %res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1
476  %res.tmp3 = insertelement <4 x float> %res.tmp2, float %res.f3, i32 2
477  %res = insertelement <4 x float> %res.tmp3, float %res.errf, i32 3
478  ret <4 x float> %res
479}
480
481define amdgpu_ps <4 x float> @sample_1d_lwe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) {
482; VERDE-LABEL: sample_1d_lwe:
483; VERDE:       ; %bb.0: ; %main_body
484; VERDE-NEXT:    s_mov_b64 s[14:15], exec
485; VERDE-NEXT:    s_wqm_b64 exec, exec
486; VERDE-NEXT:    v_mov_b32_e32 v5, v0
487; VERDE-NEXT:    v_mov_b32_e32 v0, 0
488; VERDE-NEXT:    v_mov_b32_e32 v1, v0
489; VERDE-NEXT:    v_mov_b32_e32 v2, v0
490; VERDE-NEXT:    v_mov_b32_e32 v3, v0
491; VERDE-NEXT:    v_mov_b32_e32 v4, v0
492; VERDE-NEXT:    s_and_b64 exec, exec, s[14:15]
493; VERDE-NEXT:    image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe
494; VERDE-NEXT:    s_mov_b32 s15, 0xf000
495; VERDE-NEXT:    s_mov_b32 s14, -1
496; VERDE-NEXT:    s_waitcnt vmcnt(0)
497; VERDE-NEXT:    buffer_store_dword v4, off, s[12:15], 0
498; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
499; VERDE-NEXT:    ; return to shader part epilog
500;
501; GFX6789-LABEL: sample_1d_lwe:
502; GFX6789:       ; %bb.0: ; %main_body
503; GFX6789-NEXT:    s_mov_b64 s[14:15], exec
504; GFX6789-NEXT:    s_wqm_b64 exec, exec
505; GFX6789-NEXT:    v_mov_b32_e32 v6, 0
506; GFX6789-NEXT:    v_mov_b32_e32 v5, v0
507; GFX6789-NEXT:    v_mov_b32_e32 v7, v6
508; GFX6789-NEXT:    v_mov_b32_e32 v8, v6
509; GFX6789-NEXT:    v_mov_b32_e32 v9, v6
510; GFX6789-NEXT:    v_mov_b32_e32 v10, v6
511; GFX6789-NEXT:    v_mov_b32_e32 v0, v6
512; GFX6789-NEXT:    v_mov_b32_e32 v1, v7
513; GFX6789-NEXT:    v_mov_b32_e32 v2, v8
514; GFX6789-NEXT:    v_mov_b32_e32 v3, v9
515; GFX6789-NEXT:    v_mov_b32_e32 v4, v10
516; GFX6789-NEXT:    s_and_b64 exec, exec, s[14:15]
517; GFX6789-NEXT:    image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe
518; GFX6789-NEXT:    s_waitcnt vmcnt(0)
519; GFX6789-NEXT:    global_store_dword v6, v4, s[12:13]
520; GFX6789-NEXT:    s_waitcnt vmcnt(0)
521; GFX6789-NEXT:    ; return to shader part epilog
522;
523; GFX10-LABEL: sample_1d_lwe:
524; GFX10:       ; %bb.0: ; %main_body
525; GFX10-NEXT:    s_mov_b32 s14, exec_lo
526; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
527; GFX10-NEXT:    v_mov_b32_e32 v6, 0
528; GFX10-NEXT:    v_mov_b32_e32 v5, v0
529; GFX10-NEXT:    v_mov_b32_e32 v7, v6
530; GFX10-NEXT:    v_mov_b32_e32 v8, v6
531; GFX10-NEXT:    v_mov_b32_e32 v9, v6
532; GFX10-NEXT:    v_mov_b32_e32 v10, v6
533; GFX10-NEXT:    v_mov_b32_e32 v0, v6
534; GFX10-NEXT:    v_mov_b32_e32 v1, v7
535; GFX10-NEXT:    v_mov_b32_e32 v2, v8
536; GFX10-NEXT:    v_mov_b32_e32 v3, v9
537; GFX10-NEXT:    v_mov_b32_e32 v4, v10
538; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s14
539; GFX10-NEXT:    image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe
540; GFX10-NEXT:    s_waitcnt vmcnt(0)
541; GFX10-NEXT:    global_store_dword v6, v4, s[12:13]
542; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
543; GFX10-NEXT:    ; return to shader part epilog
544;
545; GFX11-LABEL: sample_1d_lwe:
546; GFX11:       ; %bb.0: ; %main_body
547; GFX11-NEXT:    s_mov_b32 s14, exec_lo
548; GFX11-NEXT:    s_wqm_b32 exec_lo, exec_lo
549; GFX11-NEXT:    v_mov_b32_e32 v6, 0
550; GFX11-NEXT:    v_mov_b32_e32 v5, v0
551; GFX11-NEXT:    v_mov_b32_e32 v7, v6
552; GFX11-NEXT:    v_mov_b32_e32 v8, v6
553; GFX11-NEXT:    v_mov_b32_e32 v9, v6
554; GFX11-NEXT:    v_mov_b32_e32 v10, v6
555; GFX11-NEXT:    v_mov_b32_e32 v0, v6
556; GFX11-NEXT:    v_mov_b32_e32 v1, v7
557; GFX11-NEXT:    v_mov_b32_e32 v2, v8
558; GFX11-NEXT:    v_mov_b32_e32 v3, v9
559; GFX11-NEXT:    v_mov_b32_e32 v4, v10
560; GFX11-NEXT:    s_and_b32 exec_lo, exec_lo, s14
561; GFX11-NEXT:    image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe
562; GFX11-NEXT:    s_waitcnt vmcnt(0)
563; GFX11-NEXT:    global_store_b32 v6, v4, s[12:13]
564; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
565; GFX11-NEXT:    ; return to shader part epilog
566main_body:
567  %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 2, i32 0)
568  %v.vec = extractvalue {<4 x float>, i32} %v, 0
569  %v.err = extractvalue {<4 x float>, i32} %v, 1
570  store i32 %v.err, i32 addrspace(1)* %out, align 4
571  ret <4 x float> %v.vec
572}
573
574define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
575; VERDE-LABEL: sample_2d:
576; VERDE:       ; %bb.0: ; %main_body
577; VERDE-NEXT:    s_mov_b64 s[12:13], exec
578; VERDE-NEXT:    s_wqm_b64 exec, exec
579; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
580; VERDE-NEXT:    image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
581; VERDE-NEXT:    s_waitcnt vmcnt(0)
582; VERDE-NEXT:    ; return to shader part epilog
583;
584; GFX6789-LABEL: sample_2d:
585; GFX6789:       ; %bb.0: ; %main_body
586; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
587; GFX6789-NEXT:    s_wqm_b64 exec, exec
588; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
589; GFX6789-NEXT:    image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
590; GFX6789-NEXT:    s_waitcnt vmcnt(0)
591; GFX6789-NEXT:    ; return to shader part epilog
592;
593; GFX10PLUS-LABEL: sample_2d:
594; GFX10PLUS:       ; %bb.0: ; %main_body
595; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
596; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
597; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
598; GFX10PLUS-NEXT:    image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
599; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
600; GFX10PLUS-NEXT:    ; return to shader part epilog
601main_body:
602  %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
603  ret <4 x float> %v
604}
605
606define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) {
607; VERDE-LABEL: sample_3d:
608; VERDE:       ; %bb.0: ; %main_body
609; VERDE-NEXT:    s_mov_b64 s[12:13], exec
610; VERDE-NEXT:    s_wqm_b64 exec, exec
611; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
612; VERDE-NEXT:    image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
613; VERDE-NEXT:    s_waitcnt vmcnt(0)
614; VERDE-NEXT:    ; return to shader part epilog
615;
616; GFX6789-LABEL: sample_3d:
617; GFX6789:       ; %bb.0: ; %main_body
618; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
619; GFX6789-NEXT:    s_wqm_b64 exec, exec
620; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
621; GFX6789-NEXT:    image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
622; GFX6789-NEXT:    s_waitcnt vmcnt(0)
623; GFX6789-NEXT:    ; return to shader part epilog
624;
625; GFX10PLUS-LABEL: sample_3d:
626; GFX10PLUS:       ; %bb.0: ; %main_body
627; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
628; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
629; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
630; GFX10PLUS-NEXT:    image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
631; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
632; GFX10PLUS-NEXT:    ; return to shader part epilog
633main_body:
634  %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
635  ret <4 x float> %v
636}
637
638define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) {
639; VERDE-LABEL: sample_cube:
640; VERDE:       ; %bb.0: ; %main_body
641; VERDE-NEXT:    s_mov_b64 s[12:13], exec
642; VERDE-NEXT:    s_wqm_b64 exec, exec
643; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
644; VERDE-NEXT:    image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
645; VERDE-NEXT:    s_waitcnt vmcnt(0)
646; VERDE-NEXT:    ; return to shader part epilog
647;
648; GFX6789-LABEL: sample_cube:
649; GFX6789:       ; %bb.0: ; %main_body
650; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
651; GFX6789-NEXT:    s_wqm_b64 exec, exec
652; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
653; GFX6789-NEXT:    image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
654; GFX6789-NEXT:    s_waitcnt vmcnt(0)
655; GFX6789-NEXT:    ; return to shader part epilog
656;
657; GFX10PLUS-LABEL: sample_cube:
658; GFX10PLUS:       ; %bb.0: ; %main_body
659; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
660; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
661; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
662; GFX10PLUS-NEXT:    image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE
663; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
664; GFX10PLUS-NEXT:    ; return to shader part epilog
665main_body:
666  %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
667  ret <4 x float> %v
668}
669
670define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) {
671; VERDE-LABEL: sample_1darray:
672; VERDE:       ; %bb.0: ; %main_body
673; VERDE-NEXT:    s_mov_b64 s[12:13], exec
674; VERDE-NEXT:    s_wqm_b64 exec, exec
675; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
676; VERDE-NEXT:    image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da
677; VERDE-NEXT:    s_waitcnt vmcnt(0)
678; VERDE-NEXT:    ; return to shader part epilog
679;
680; GFX6789-LABEL: sample_1darray:
681; GFX6789:       ; %bb.0: ; %main_body
682; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
683; GFX6789-NEXT:    s_wqm_b64 exec, exec
684; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
685; GFX6789-NEXT:    image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da
686; GFX6789-NEXT:    s_waitcnt vmcnt(0)
687; GFX6789-NEXT:    ; return to shader part epilog
688;
689; GFX10PLUS-LABEL: sample_1darray:
690; GFX10PLUS:       ; %bb.0: ; %main_body
691; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
692; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
693; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
694; GFX10PLUS-NEXT:    image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY
695; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
696; GFX10PLUS-NEXT:    ; return to shader part epilog
697main_body:
698  %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
699  ret <4 x float> %v
700}
701
702define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) {
703; VERDE-LABEL: sample_2darray:
704; VERDE:       ; %bb.0: ; %main_body
705; VERDE-NEXT:    s_mov_b64 s[12:13], exec
706; VERDE-NEXT:    s_wqm_b64 exec, exec
707; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
708; VERDE-NEXT:    image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
709; VERDE-NEXT:    s_waitcnt vmcnt(0)
710; VERDE-NEXT:    ; return to shader part epilog
711;
712; GFX6789-LABEL: sample_2darray:
713; GFX6789:       ; %bb.0: ; %main_body
714; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
715; GFX6789-NEXT:    s_wqm_b64 exec, exec
716; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
717; GFX6789-NEXT:    image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
718; GFX6789-NEXT:    s_waitcnt vmcnt(0)
719; GFX6789-NEXT:    ; return to shader part epilog
720;
721; GFX10PLUS-LABEL: sample_2darray:
722; GFX10PLUS:       ; %bb.0: ; %main_body
723; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
724; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
725; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
726; GFX10PLUS-NEXT:    image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY
727; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
728; GFX10PLUS-NEXT:    ; return to shader part epilog
729main_body:
730  %v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
731  ret <4 x float> %v
732}
733
734define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
735; VERDE-LABEL: sample_c_1d:
736; VERDE:       ; %bb.0: ; %main_body
737; VERDE-NEXT:    s_mov_b64 s[12:13], exec
738; VERDE-NEXT:    s_wqm_b64 exec, exec
739; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
740; VERDE-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
741; VERDE-NEXT:    s_waitcnt vmcnt(0)
742; VERDE-NEXT:    ; return to shader part epilog
743;
744; GFX6789-LABEL: sample_c_1d:
745; GFX6789:       ; %bb.0: ; %main_body
746; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
747; GFX6789-NEXT:    s_wqm_b64 exec, exec
748; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
749; GFX6789-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
750; GFX6789-NEXT:    s_waitcnt vmcnt(0)
751; GFX6789-NEXT:    ; return to shader part epilog
752;
753; GFX10PLUS-LABEL: sample_c_1d:
754; GFX10PLUS:       ; %bb.0: ; %main_body
755; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
756; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
757; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
758; GFX10PLUS-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
759; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
760; GFX10PLUS-NEXT:    ; return to shader part epilog
761main_body:
762  %v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
763  ret <4 x float> %v
764}
765
766define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
767; VERDE-LABEL: sample_c_2d:
768; VERDE:       ; %bb.0: ; %main_body
769; VERDE-NEXT:    s_mov_b64 s[12:13], exec
770; VERDE-NEXT:    s_wqm_b64 exec, exec
771; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
772; VERDE-NEXT:    image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
773; VERDE-NEXT:    s_waitcnt vmcnt(0)
774; VERDE-NEXT:    ; return to shader part epilog
775;
776; GFX6789-LABEL: sample_c_2d:
777; GFX6789:       ; %bb.0: ; %main_body
778; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
779; GFX6789-NEXT:    s_wqm_b64 exec, exec
780; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
781; GFX6789-NEXT:    image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
782; GFX6789-NEXT:    s_waitcnt vmcnt(0)
783; GFX6789-NEXT:    ; return to shader part epilog
784;
785; GFX10PLUS-LABEL: sample_c_2d:
786; GFX10PLUS:       ; %bb.0: ; %main_body
787; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
788; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
789; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
790; GFX10PLUS-NEXT:    image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
791; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
792; GFX10PLUS-NEXT:    ; return to shader part epilog
793main_body:
794  %v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
795  ret <4 x float> %v
796}
797
798define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %clamp) {
799; VERDE-LABEL: sample_cl_1d:
800; VERDE:       ; %bb.0: ; %main_body
801; VERDE-NEXT:    s_mov_b64 s[12:13], exec
802; VERDE-NEXT:    s_wqm_b64 exec, exec
803; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
804; VERDE-NEXT:    image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
805; VERDE-NEXT:    s_waitcnt vmcnt(0)
806; VERDE-NEXT:    ; return to shader part epilog
807;
808; GFX6789-LABEL: sample_cl_1d:
809; GFX6789:       ; %bb.0: ; %main_body
810; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
811; GFX6789-NEXT:    s_wqm_b64 exec, exec
812; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
813; GFX6789-NEXT:    image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
814; GFX6789-NEXT:    s_waitcnt vmcnt(0)
815; GFX6789-NEXT:    ; return to shader part epilog
816;
817; GFX10PLUS-LABEL: sample_cl_1d:
818; GFX10PLUS:       ; %bb.0: ; %main_body
819; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
820; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
821; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
822; GFX10PLUS-NEXT:    image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
823; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
824; GFX10PLUS-NEXT:    ; return to shader part epilog
825main_body:
826  %v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32 15, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
827  ret <4 x float> %v
828}
829
830define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) {
831; VERDE-LABEL: sample_cl_2d:
832; VERDE:       ; %bb.0: ; %main_body
833; VERDE-NEXT:    s_mov_b64 s[12:13], exec
834; VERDE-NEXT:    s_wqm_b64 exec, exec
835; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
836; VERDE-NEXT:    image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
837; VERDE-NEXT:    s_waitcnt vmcnt(0)
838; VERDE-NEXT:    ; return to shader part epilog
839;
840; GFX6789-LABEL: sample_cl_2d:
841; GFX6789:       ; %bb.0: ; %main_body
842; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
843; GFX6789-NEXT:    s_wqm_b64 exec, exec
844; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
845; GFX6789-NEXT:    image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
846; GFX6789-NEXT:    s_waitcnt vmcnt(0)
847; GFX6789-NEXT:    ; return to shader part epilog
848;
849; GFX10PLUS-LABEL: sample_cl_2d:
850; GFX10PLUS:       ; %bb.0: ; %main_body
851; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
852; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
853; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
854; GFX10PLUS-NEXT:    image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
855; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
856; GFX10PLUS-NEXT:    ; return to shader part epilog
857main_body:
858  %v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32 15, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
859  ret <4 x float> %v
860}
861
862define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %clamp) {
863; VERDE-LABEL: sample_c_cl_1d:
864; VERDE:       ; %bb.0: ; %main_body
865; VERDE-NEXT:    s_mov_b64 s[12:13], exec
866; VERDE-NEXT:    s_wqm_b64 exec, exec
867; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
868; VERDE-NEXT:    image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
869; VERDE-NEXT:    s_waitcnt vmcnt(0)
870; VERDE-NEXT:    ; return to shader part epilog
871;
872; GFX6789-LABEL: sample_c_cl_1d:
873; GFX6789:       ; %bb.0: ; %main_body
874; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
875; GFX6789-NEXT:    s_wqm_b64 exec, exec
876; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
877; GFX6789-NEXT:    image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
878; GFX6789-NEXT:    s_waitcnt vmcnt(0)
879; GFX6789-NEXT:    ; return to shader part epilog
880;
881; GFX10PLUS-LABEL: sample_c_cl_1d:
882; GFX10PLUS:       ; %bb.0: ; %main_body
883; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
884; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
885; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
886; GFX10PLUS-NEXT:    image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
887; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
888; GFX10PLUS-NEXT:    ; return to shader part epilog
889main_body:
890  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
891  ret <4 x float> %v
892}
893
894define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) {
895; VERDE-LABEL: sample_c_cl_2d:
896; VERDE:       ; %bb.0: ; %main_body
897; VERDE-NEXT:    s_mov_b64 s[12:13], exec
898; VERDE-NEXT:    s_wqm_b64 exec, exec
899; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
900; VERDE-NEXT:    image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
901; VERDE-NEXT:    s_waitcnt vmcnt(0)
902; VERDE-NEXT:    ; return to shader part epilog
903;
904; GFX6789-LABEL: sample_c_cl_2d:
905; GFX6789:       ; %bb.0: ; %main_body
906; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
907; GFX6789-NEXT:    s_wqm_b64 exec, exec
908; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
909; GFX6789-NEXT:    image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
910; GFX6789-NEXT:    s_waitcnt vmcnt(0)
911; GFX6789-NEXT:    ; return to shader part epilog
912;
913; GFX10PLUS-LABEL: sample_c_cl_2d:
914; GFX10PLUS:       ; %bb.0: ; %main_body
915; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
916; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
917; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
918; GFX10PLUS-NEXT:    image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
919; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
920; GFX10PLUS-NEXT:    ; return to shader part epilog
921main_body:
922  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
923  ret <4 x float> %v
924}
925
926define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s) {
927; VERDE-LABEL: sample_b_1d:
928; VERDE:       ; %bb.0: ; %main_body
929; VERDE-NEXT:    s_mov_b64 s[12:13], exec
930; VERDE-NEXT:    s_wqm_b64 exec, exec
931; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
932; VERDE-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
933; VERDE-NEXT:    s_waitcnt vmcnt(0)
934; VERDE-NEXT:    ; return to shader part epilog
935;
936; GFX6789-LABEL: sample_b_1d:
937; GFX6789:       ; %bb.0: ; %main_body
938; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
939; GFX6789-NEXT:    s_wqm_b64 exec, exec
940; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
941; GFX6789-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
942; GFX6789-NEXT:    s_waitcnt vmcnt(0)
943; GFX6789-NEXT:    ; return to shader part epilog
944;
945; GFX10PLUS-LABEL: sample_b_1d:
946; GFX10PLUS:       ; %bb.0: ; %main_body
947; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
948; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
949; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
950; GFX10PLUS-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
951; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
952; GFX10PLUS-NEXT:    ; return to shader part epilog
953main_body:
954  %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
955  ret <4 x float> %v
956}
957
958define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) {
959; VERDE-LABEL: sample_b_2d:
960; VERDE:       ; %bb.0: ; %main_body
961; VERDE-NEXT:    s_mov_b64 s[12:13], exec
962; VERDE-NEXT:    s_wqm_b64 exec, exec
963; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
964; VERDE-NEXT:    image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
965; VERDE-NEXT:    s_waitcnt vmcnt(0)
966; VERDE-NEXT:    ; return to shader part epilog
967;
968; GFX6789-LABEL: sample_b_2d:
969; GFX6789:       ; %bb.0: ; %main_body
970; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
971; GFX6789-NEXT:    s_wqm_b64 exec, exec
972; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
973; GFX6789-NEXT:    image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
974; GFX6789-NEXT:    s_waitcnt vmcnt(0)
975; GFX6789-NEXT:    ; return to shader part epilog
976;
977; GFX10PLUS-LABEL: sample_b_2d:
978; GFX10PLUS:       ; %bb.0: ; %main_body
979; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
980; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
981; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
982; GFX10PLUS-NEXT:    image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
983; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
984; GFX10PLUS-NEXT:    ; return to shader part epilog
985main_body:
986  %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
987  ret <4 x float> %v
988}
989
990define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s) {
991; VERDE-LABEL: sample_c_b_1d:
992; VERDE:       ; %bb.0: ; %main_body
993; VERDE-NEXT:    s_mov_b64 s[12:13], exec
994; VERDE-NEXT:    s_wqm_b64 exec, exec
995; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
996; VERDE-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
997; VERDE-NEXT:    s_waitcnt vmcnt(0)
998; VERDE-NEXT:    ; return to shader part epilog
999;
1000; GFX6789-LABEL: sample_c_b_1d:
1001; GFX6789:       ; %bb.0: ; %main_body
1002; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
1003; GFX6789-NEXT:    s_wqm_b64 exec, exec
1004; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
1005; GFX6789-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1006; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1007; GFX6789-NEXT:    ; return to shader part epilog
1008;
1009; GFX10PLUS-LABEL: sample_c_b_1d:
1010; GFX10PLUS:       ; %bb.0: ; %main_body
1011; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
1012; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
1013; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1014; GFX10PLUS-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1015; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1016; GFX10PLUS-NEXT:    ; return to shader part epilog
1017main_body:
1018  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1019  ret <4 x float> %v
1020}
1021
1022define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) {
1023; VERDE-LABEL: sample_c_b_2d:
1024; VERDE:       ; %bb.0: ; %main_body
1025; VERDE-NEXT:    s_mov_b64 s[12:13], exec
1026; VERDE-NEXT:    s_wqm_b64 exec, exec
1027; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
1028; VERDE-NEXT:    image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1029; VERDE-NEXT:    s_waitcnt vmcnt(0)
1030; VERDE-NEXT:    ; return to shader part epilog
1031;
1032; GFX6789-LABEL: sample_c_b_2d:
1033; GFX6789:       ; %bb.0: ; %main_body
1034; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
1035; GFX6789-NEXT:    s_wqm_b64 exec, exec
1036; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
1037; GFX6789-NEXT:    image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1038; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1039; GFX6789-NEXT:    ; return to shader part epilog
1040;
1041; GFX10PLUS-LABEL: sample_c_b_2d:
1042; GFX10PLUS:       ; %bb.0: ; %main_body
1043; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
1044; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
1045; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1046; GFX10PLUS-NEXT:    image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1047; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1048; GFX10PLUS-NEXT:    ; return to shader part epilog
1049main_body:
1050  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1051  ret <4 x float> %v
1052}
1053
1054define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %clamp) {
1055; VERDE-LABEL: sample_b_cl_1d:
1056; VERDE:       ; %bb.0: ; %main_body
1057; VERDE-NEXT:    s_mov_b64 s[12:13], exec
1058; VERDE-NEXT:    s_wqm_b64 exec, exec
1059; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
1060; VERDE-NEXT:    image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1061; VERDE-NEXT:    s_waitcnt vmcnt(0)
1062; VERDE-NEXT:    ; return to shader part epilog
1063;
1064; GFX6789-LABEL: sample_b_cl_1d:
1065; GFX6789:       ; %bb.0: ; %main_body
1066; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
1067; GFX6789-NEXT:    s_wqm_b64 exec, exec
1068; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
1069; GFX6789-NEXT:    image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1070; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1071; GFX6789-NEXT:    ; return to shader part epilog
1072;
1073; GFX10PLUS-LABEL: sample_b_cl_1d:
1074; GFX10PLUS:       ; %bb.0: ; %main_body
1075; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
1076; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
1077; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1078; GFX10PLUS-NEXT:    image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1079; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1080; GFX10PLUS-NEXT:    ; return to shader part epilog
1081main_body:
1082  %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1083  ret <4 x float> %v
1084}
1085
1086define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) {
1087; VERDE-LABEL: sample_b_cl_2d:
1088; VERDE:       ; %bb.0: ; %main_body
1089; VERDE-NEXT:    s_mov_b64 s[12:13], exec
1090; VERDE-NEXT:    s_wqm_b64 exec, exec
1091; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
1092; VERDE-NEXT:    image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1093; VERDE-NEXT:    s_waitcnt vmcnt(0)
1094; VERDE-NEXT:    ; return to shader part epilog
1095;
1096; GFX6789-LABEL: sample_b_cl_2d:
1097; GFX6789:       ; %bb.0: ; %main_body
1098; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
1099; GFX6789-NEXT:    s_wqm_b64 exec, exec
1100; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
1101; GFX6789-NEXT:    image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1102; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1103; GFX6789-NEXT:    ; return to shader part epilog
1104;
1105; GFX10PLUS-LABEL: sample_b_cl_2d:
1106; GFX10PLUS:       ; %bb.0: ; %main_body
1107; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
1108; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
1109; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1110; GFX10PLUS-NEXT:    image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1111; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1112; GFX10PLUS-NEXT:    ; return to shader part epilog
1113main_body:
1114  %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1115  ret <4 x float> %v
1116}
1117
1118define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %clamp) {
1119; VERDE-LABEL: sample_c_b_cl_1d:
1120; VERDE:       ; %bb.0: ; %main_body
1121; VERDE-NEXT:    s_mov_b64 s[12:13], exec
1122; VERDE-NEXT:    s_wqm_b64 exec, exec
1123; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
1124; VERDE-NEXT:    image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1125; VERDE-NEXT:    s_waitcnt vmcnt(0)
1126; VERDE-NEXT:    ; return to shader part epilog
1127;
1128; GFX6789-LABEL: sample_c_b_cl_1d:
1129; GFX6789:       ; %bb.0: ; %main_body
1130; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
1131; GFX6789-NEXT:    s_wqm_b64 exec, exec
1132; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
1133; GFX6789-NEXT:    image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1134; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1135; GFX6789-NEXT:    ; return to shader part epilog
1136;
1137; GFX10PLUS-LABEL: sample_c_b_cl_1d:
1138; GFX10PLUS:       ; %bb.0: ; %main_body
1139; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
1140; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
1141; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1142; GFX10PLUS-NEXT:    image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1143; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1144; GFX10PLUS-NEXT:    ; return to shader part epilog
1145main_body:
1146  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1147  ret <4 x float> %v
1148}
1149
1150define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) {
1151; VERDE-LABEL: sample_c_b_cl_2d:
1152; VERDE:       ; %bb.0: ; %main_body
1153; VERDE-NEXT:    s_mov_b64 s[12:13], exec
1154; VERDE-NEXT:    s_wqm_b64 exec, exec
1155; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
1156; VERDE-NEXT:    image_sample_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf
1157; VERDE-NEXT:    s_waitcnt vmcnt(0)
1158; VERDE-NEXT:    ; return to shader part epilog
1159;
1160; GFX6789-LABEL: sample_c_b_cl_2d:
1161; GFX6789:       ; %bb.0: ; %main_body
1162; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
1163; GFX6789-NEXT:    s_wqm_b64 exec, exec
1164; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
1165; GFX6789-NEXT:    image_sample_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf
1166; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1167; GFX6789-NEXT:    ; return to shader part epilog
1168;
1169; GFX10PLUS-LABEL: sample_c_b_cl_2d:
1170; GFX10PLUS:       ; %bb.0: ; %main_body
1171; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
1172; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
1173; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1174; GFX10PLUS-NEXT:    image_sample_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1175; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1176; GFX10PLUS-NEXT:    ; return to shader part epilog
1177main_body:
1178  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1179  ret <4 x float> %v
1180}
1181
1182define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
1183; VERDE-LABEL: sample_d_1d:
1184; VERDE:       ; %bb.0: ; %main_body
1185; VERDE-NEXT:    image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1186; VERDE-NEXT:    s_waitcnt vmcnt(0)
1187; VERDE-NEXT:    ; return to shader part epilog
1188;
1189; GFX6789-LABEL: sample_d_1d:
1190; GFX6789:       ; %bb.0: ; %main_body
1191; GFX6789-NEXT:    image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1192; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1193; GFX6789-NEXT:    ; return to shader part epilog
1194;
1195; GFX10PLUS-LABEL: sample_d_1d:
1196; GFX10PLUS:       ; %bb.0: ; %main_body
1197; GFX10PLUS-NEXT:    image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1198; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1199; GFX10PLUS-NEXT:    ; return to shader part epilog
1200main_body:
1201  %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1202  ret <4 x float> %v
1203}
1204
1205define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
1206; VERDE-LABEL: sample_d_2d:
1207; VERDE:       ; %bb.0: ; %main_body
1208; VERDE-NEXT:    image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf
1209; VERDE-NEXT:    s_waitcnt vmcnt(0)
1210; VERDE-NEXT:    ; return to shader part epilog
1211;
1212; GFX6789-LABEL: sample_d_2d:
1213; GFX6789:       ; %bb.0: ; %main_body
1214; GFX6789-NEXT:    image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf
1215; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1216; GFX6789-NEXT:    ; return to shader part epilog
1217;
1218; GFX10PLUS-LABEL: sample_d_2d:
1219; GFX10PLUS:       ; %bb.0: ; %main_body
1220; GFX10PLUS-NEXT:    image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1221; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1222; GFX10PLUS-NEXT:    ; return to shader part epilog
1223main_body:
1224  %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1225  ret <4 x float> %v
1226}
1227
1228define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) {
1229; VERDE-LABEL: sample_c_d_1d:
1230; VERDE:       ; %bb.0: ; %main_body
1231; VERDE-NEXT:    image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1232; VERDE-NEXT:    s_waitcnt vmcnt(0)
1233; VERDE-NEXT:    ; return to shader part epilog
1234;
1235; GFX6789-LABEL: sample_c_d_1d:
1236; GFX6789:       ; %bb.0: ; %main_body
1237; GFX6789-NEXT:    image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1238; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1239; GFX6789-NEXT:    ; return to shader part epilog
1240;
1241; GFX10PLUS-LABEL: sample_c_d_1d:
1242; GFX10PLUS:       ; %bb.0: ; %main_body
1243; GFX10PLUS-NEXT:    image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1244; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1245; GFX10PLUS-NEXT:    ; return to shader part epilog
1246main_body:
1247  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1248  ret <4 x float> %v
1249}
1250
1251define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
1252; VERDE-LABEL: sample_c_d_2d:
1253; VERDE:       ; %bb.0: ; %main_body
1254; VERDE-NEXT:    image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
1255; VERDE-NEXT:    s_waitcnt vmcnt(0)
1256; VERDE-NEXT:    ; return to shader part epilog
1257;
1258; GFX6789-LABEL: sample_c_d_2d:
1259; GFX6789:       ; %bb.0: ; %main_body
1260; GFX6789-NEXT:    image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
1261; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1262; GFX6789-NEXT:    ; return to shader part epilog
1263;
1264; GFX10PLUS-LABEL: sample_c_d_2d:
1265; GFX10PLUS:       ; %bb.0: ; %main_body
1266; GFX10PLUS-NEXT:    image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1267; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1268; GFX10PLUS-NEXT:    ; return to shader part epilog
1269main_body:
1270  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1271  ret <4 x float> %v
1272}
1273
1274define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) {
1275; VERDE-LABEL: sample_d_cl_1d:
1276; VERDE:       ; %bb.0: ; %main_body
1277; VERDE-NEXT:    image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1278; VERDE-NEXT:    s_waitcnt vmcnt(0)
1279; VERDE-NEXT:    ; return to shader part epilog
1280;
1281; GFX6789-LABEL: sample_d_cl_1d:
1282; GFX6789:       ; %bb.0: ; %main_body
1283; GFX6789-NEXT:    image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1284; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1285; GFX6789-NEXT:    ; return to shader part epilog
1286;
1287; GFX10PLUS-LABEL: sample_d_cl_1d:
1288; GFX10PLUS:       ; %bb.0: ; %main_body
1289; GFX10PLUS-NEXT:    image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1290; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1291; GFX10PLUS-NEXT:    ; return to shader part epilog
1292main_body:
1293  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1294  ret <4 x float> %v
1295}
1296
1297define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
1298; VERDE-LABEL: sample_d_cl_2d:
1299; VERDE:       ; %bb.0: ; %main_body
1300; VERDE-NEXT:    image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
1301; VERDE-NEXT:    s_waitcnt vmcnt(0)
1302; VERDE-NEXT:    ; return to shader part epilog
1303;
1304; GFX6789-LABEL: sample_d_cl_2d:
1305; GFX6789:       ; %bb.0: ; %main_body
1306; GFX6789-NEXT:    image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
1307; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1308; GFX6789-NEXT:    ; return to shader part epilog
1309;
1310; GFX10PLUS-LABEL: sample_d_cl_2d:
1311; GFX10PLUS:       ; %bb.0: ; %main_body
1312; GFX10PLUS-NEXT:    image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1313; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1314; GFX10PLUS-NEXT:    ; return to shader part epilog
1315main_body:
1316  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1317  ret <4 x float> %v
1318}
1319
1320define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
1321; VERDE-LABEL: sample_c_d_cl_1d:
1322; VERDE:       ; %bb.0: ; %main_body
1323; VERDE-NEXT:    image_sample_c_d_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf
1324; VERDE-NEXT:    s_waitcnt vmcnt(0)
1325; VERDE-NEXT:    ; return to shader part epilog
1326;
1327; GFX6789-LABEL: sample_c_d_cl_1d:
1328; GFX6789:       ; %bb.0: ; %main_body
1329; GFX6789-NEXT:    image_sample_c_d_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf
1330; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1331; GFX6789-NEXT:    ; return to shader part epilog
1332;
1333; GFX10PLUS-LABEL: sample_c_d_cl_1d:
1334; GFX10PLUS:       ; %bb.0: ; %main_body
1335; GFX10PLUS-NEXT:    image_sample_c_d_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1336; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1337; GFX10PLUS-NEXT:    ; return to shader part epilog
1338main_body:
1339  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1340  ret <4 x float> %v
1341}
1342
1343define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
1344; VERDE-LABEL: sample_c_d_cl_2d:
1345; VERDE:       ; %bb.0: ; %main_body
1346; VERDE-NEXT:    image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
1347; VERDE-NEXT:    s_waitcnt vmcnt(0)
1348; VERDE-NEXT:    ; return to shader part epilog
1349;
1350; GFX6789-LABEL: sample_c_d_cl_2d:
1351; GFX6789:       ; %bb.0: ; %main_body
1352; GFX6789-NEXT:    image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
1353; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1354; GFX6789-NEXT:    ; return to shader part epilog
1355;
1356; GFX10PLUS-LABEL: sample_c_d_cl_2d:
1357; GFX10PLUS:       ; %bb.0: ; %main_body
1358; GFX10PLUS-NEXT:    image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1359; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1360; GFX10PLUS-NEXT:    ; return to shader part epilog
1361main_body:
1362  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1363  ret <4 x float> %v
1364}
1365
1366define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
1367; VERDE-LABEL: sample_l_1d:
1368; VERDE:       ; %bb.0: ; %main_body
1369; VERDE-NEXT:    image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
1370; VERDE-NEXT:    s_waitcnt vmcnt(0)
1371; VERDE-NEXT:    ; return to shader part epilog
1372;
1373; GFX6789-LABEL: sample_l_1d:
1374; GFX6789:       ; %bb.0: ; %main_body
1375; GFX6789-NEXT:    image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
1376; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1377; GFX6789-NEXT:    ; return to shader part epilog
1378;
1379; GFX10PLUS-LABEL: sample_l_1d:
1380; GFX10PLUS:       ; %bb.0: ; %main_body
1381; GFX10PLUS-NEXT:    image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1382; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1383; GFX10PLUS-NEXT:    ; return to shader part epilog
1384main_body:
1385  %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1386  ret <4 x float> %v
1387}
1388
1389define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
1390; VERDE-LABEL: sample_l_2d:
1391; VERDE:       ; %bb.0: ; %main_body
1392; VERDE-NEXT:    image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1393; VERDE-NEXT:    s_waitcnt vmcnt(0)
1394; VERDE-NEXT:    ; return to shader part epilog
1395;
1396; GFX6789-LABEL: sample_l_2d:
1397; GFX6789:       ; %bb.0: ; %main_body
1398; GFX6789-NEXT:    image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1399; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1400; GFX6789-NEXT:    ; return to shader part epilog
1401;
1402; GFX10PLUS-LABEL: sample_l_2d:
1403; GFX10PLUS:       ; %bb.0: ; %main_body
1404; GFX10PLUS-NEXT:    image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1405; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1406; GFX10PLUS-NEXT:    ; return to shader part epilog
1407main_body:
1408  %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1409  ret <4 x float> %v
1410}
1411
1412define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) {
1413; VERDE-LABEL: sample_c_l_1d:
1414; VERDE:       ; %bb.0: ; %main_body
1415; VERDE-NEXT:    image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1416; VERDE-NEXT:    s_waitcnt vmcnt(0)
1417; VERDE-NEXT:    ; return to shader part epilog
1418;
1419; GFX6789-LABEL: sample_c_l_1d:
1420; GFX6789:       ; %bb.0: ; %main_body
1421; GFX6789-NEXT:    image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1422; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1423; GFX6789-NEXT:    ; return to shader part epilog
1424;
1425; GFX10PLUS-LABEL: sample_c_l_1d:
1426; GFX10PLUS:       ; %bb.0: ; %main_body
1427; GFX10PLUS-NEXT:    image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1428; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1429; GFX10PLUS-NEXT:    ; return to shader part epilog
1430main_body:
1431  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1432  ret <4 x float> %v
1433}
1434
1435define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
1436; VERDE-LABEL: sample_c_l_2d:
1437; VERDE:       ; %bb.0: ; %main_body
1438; VERDE-NEXT:    image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1439; VERDE-NEXT:    s_waitcnt vmcnt(0)
1440; VERDE-NEXT:    ; return to shader part epilog
1441;
1442; GFX6789-LABEL: sample_c_l_2d:
1443; GFX6789:       ; %bb.0: ; %main_body
1444; GFX6789-NEXT:    image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1445; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1446; GFX6789-NEXT:    ; return to shader part epilog
1447;
1448; GFX10PLUS-LABEL: sample_c_l_2d:
1449; GFX10PLUS:       ; %bb.0: ; %main_body
1450; GFX10PLUS-NEXT:    image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1451; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1452; GFX10PLUS-NEXT:    ; return to shader part epilog
1453main_body:
1454  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1455  ret <4 x float> %v
1456}
1457
1458define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1459; VERDE-LABEL: sample_lz_1d:
1460; VERDE:       ; %bb.0: ; %main_body
1461; VERDE-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf
1462; VERDE-NEXT:    s_waitcnt vmcnt(0)
1463; VERDE-NEXT:    ; return to shader part epilog
1464;
1465; GFX6789-LABEL: sample_lz_1d:
1466; GFX6789:       ; %bb.0: ; %main_body
1467; GFX6789-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf
1468; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1469; GFX6789-NEXT:    ; return to shader part epilog
1470;
1471; GFX10PLUS-LABEL: sample_lz_1d:
1472; GFX10PLUS:       ; %bb.0: ; %main_body
1473; GFX10PLUS-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1474; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1475; GFX10PLUS-NEXT:    ; return to shader part epilog
1476main_body:
1477  %v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1478  ret <4 x float> %v
1479}
1480
1481define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
1482; VERDE-LABEL: sample_lz_2d:
1483; VERDE:       ; %bb.0: ; %main_body
1484; VERDE-NEXT:    image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
1485; VERDE-NEXT:    s_waitcnt vmcnt(0)
1486; VERDE-NEXT:    ; return to shader part epilog
1487;
1488; GFX6789-LABEL: sample_lz_2d:
1489; GFX6789:       ; %bb.0: ; %main_body
1490; GFX6789-NEXT:    image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
1491; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1492; GFX6789-NEXT:    ; return to shader part epilog
1493;
1494; GFX10PLUS-LABEL: sample_lz_2d:
1495; GFX10PLUS:       ; %bb.0: ; %main_body
1496; GFX10PLUS-NEXT:    image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1497; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1498; GFX10PLUS-NEXT:    ; return to shader part epilog
1499main_body:
1500  %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1501  ret <4 x float> %v
1502}
1503
1504define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
1505; VERDE-LABEL: sample_c_lz_1d:
1506; VERDE:       ; %bb.0: ; %main_body
1507; VERDE-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
1508; VERDE-NEXT:    s_waitcnt vmcnt(0)
1509; VERDE-NEXT:    ; return to shader part epilog
1510;
1511; GFX6789-LABEL: sample_c_lz_1d:
1512; GFX6789:       ; %bb.0: ; %main_body
1513; GFX6789-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
1514; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1515; GFX6789-NEXT:    ; return to shader part epilog
1516;
1517; GFX10PLUS-LABEL: sample_c_lz_1d:
1518; GFX10PLUS:       ; %bb.0: ; %main_body
1519; GFX10PLUS-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1520; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1521; GFX10PLUS-NEXT:    ; return to shader part epilog
1522main_body:
1523  %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1524  ret <4 x float> %v
1525}
1526
1527define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
1528; VERDE-LABEL: sample_c_lz_2d:
1529; VERDE:       ; %bb.0: ; %main_body
1530; VERDE-NEXT:    image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1531; VERDE-NEXT:    s_waitcnt vmcnt(0)
1532; VERDE-NEXT:    ; return to shader part epilog
1533;
1534; GFX6789-LABEL: sample_c_lz_2d:
1535; GFX6789:       ; %bb.0: ; %main_body
1536; GFX6789-NEXT:    image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1537; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1538; GFX6789-NEXT:    ; return to shader part epilog
1539;
1540; GFX10PLUS-LABEL: sample_c_lz_2d:
1541; GFX10PLUS:       ; %bb.0: ; %main_body
1542; GFX10PLUS-NEXT:    image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1543; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1544; GFX10PLUS-NEXT:    ; return to shader part epilog
1545main_body:
1546  %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1547  ret <4 x float> %v
1548}
1549
1550define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
1551; VERDE-LABEL: sample_c_d_o_2darray_V1:
1552; VERDE:       ; %bb.0: ; %main_body
1553; VERDE-NEXT:    image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 da
1554; VERDE-NEXT:    s_waitcnt vmcnt(0)
1555; VERDE-NEXT:    ; return to shader part epilog
1556;
1557; GFX6789-LABEL: sample_c_d_o_2darray_V1:
1558; GFX6789:       ; %bb.0: ; %main_body
1559; GFX6789-NEXT:    image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 da
1560; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1561; GFX6789-NEXT:    ; return to shader part epilog
1562;
1563; GFX10PLUS-LABEL: sample_c_d_o_2darray_V1:
1564; GFX10PLUS:       ; %bb.0: ; %main_body
1565; GFX10PLUS-NEXT:    image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
1566; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1567; GFX10PLUS-NEXT:    ; return to shader part epilog
1568main_body:
1569  %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1570  ret float %v
1571}
1572
1573define amdgpu_ps float @sample_c_d_o_2darray_V1_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, i32 addrspace(1)* inreg %out) {
1574; VERDE-LABEL: sample_c_d_o_2darray_V1_tfe:
1575; VERDE:       ; %bb.0: ; %main_body
1576; VERDE-NEXT:    v_mov_b32_e32 v9, 0
1577; VERDE-NEXT:    v_mov_b32_e32 v10, v9
1578; VERDE-NEXT:    image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 tfe da
1579; VERDE-NEXT:    s_mov_b32 s15, 0xf000
1580; VERDE-NEXT:    s_mov_b32 s14, -1
1581; VERDE-NEXT:    s_waitcnt vmcnt(0)
1582; VERDE-NEXT:    v_mov_b32_e32 v0, v9
1583; VERDE-NEXT:    buffer_store_dword v10, off, s[12:15], 0
1584; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1585; VERDE-NEXT:    ; return to shader part epilog
1586;
1587; GFX6789-LABEL: sample_c_d_o_2darray_V1_tfe:
1588; GFX6789:       ; %bb.0: ; %main_body
1589; GFX6789-NEXT:    v_mov_b32_e32 v11, 0
1590; GFX6789-NEXT:    v_mov_b32_e32 v12, v11
1591; GFX6789-NEXT:    v_mov_b32_e32 v9, v11
1592; GFX6789-NEXT:    v_mov_b32_e32 v10, v12
1593; GFX6789-NEXT:    image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 tfe da
1594; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1595; GFX6789-NEXT:    v_mov_b32_e32 v0, v9
1596; GFX6789-NEXT:    global_store_dword v11, v10, s[12:13]
1597; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1598; GFX6789-NEXT:    ; return to shader part epilog
1599;
1600; GFX10-LABEL: sample_c_d_o_2darray_V1_tfe:
1601; GFX10:       ; %bb.0: ; %main_body
1602; GFX10-NEXT:    v_mov_b32_e32 v11, 0
1603; GFX10-NEXT:    v_mov_b32_e32 v12, v11
1604; GFX10-NEXT:    v_mov_b32_e32 v9, v11
1605; GFX10-NEXT:    v_mov_b32_e32 v10, v12
1606; GFX10-NEXT:    image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe
1607; GFX10-NEXT:    s_waitcnt vmcnt(0)
1608; GFX10-NEXT:    v_mov_b32_e32 v0, v9
1609; GFX10-NEXT:    global_store_dword v11, v10, s[12:13]
1610; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1611; GFX10-NEXT:    ; return to shader part epilog
1612;
1613; GFX11-LABEL: sample_c_d_o_2darray_V1_tfe:
1614; GFX11:       ; %bb.0: ; %main_body
1615; GFX11-NEXT:    v_mov_b32_e32 v11, 0
1616; GFX11-NEXT:    v_mov_b32_e32 v12, v11
1617; GFX11-NEXT:    v_mov_b32_e32 v9, v11
1618; GFX11-NEXT:    v_mov_b32_e32 v10, v12
1619; GFX11-NEXT:    image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe
1620; GFX11-NEXT:    s_waitcnt vmcnt(0)
1621; GFX11-NEXT:    v_mov_b32_e32 v0, v9
1622; GFX11-NEXT:    global_store_b32 v11, v10, s[12:13]
1623; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1624; GFX11-NEXT:    ; return to shader part epilog
1625main_body:
1626  %v = call {float,i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
1627  %v.vec = extractvalue {float, i32} %v, 0
1628  %v.err = extractvalue {float, i32} %v, 1
1629  store i32 %v.err, i32 addrspace(1)* %out, align 4
1630  ret float %v.vec
1631}
1632
1633define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
1634; VERDE-LABEL: sample_c_d_o_2darray_V2:
1635; VERDE:       ; %bb.0: ; %main_body
1636; VERDE-NEXT:    image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 da
1637; VERDE-NEXT:    s_waitcnt vmcnt(0)
1638; VERDE-NEXT:    ; return to shader part epilog
1639;
1640; GFX6789-LABEL: sample_c_d_o_2darray_V2:
1641; GFX6789:       ; %bb.0: ; %main_body
1642; GFX6789-NEXT:    image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 da
1643; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1644; GFX6789-NEXT:    ; return to shader part epilog
1645;
1646; GFX10PLUS-LABEL: sample_c_d_o_2darray_V2:
1647; GFX10PLUS:       ; %bb.0: ; %main_body
1648; GFX10PLUS-NEXT:    image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
1649; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1650; GFX10PLUS-NEXT:    ; return to shader part epilog
1651main_body:
1652  %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1653  ret <2 x float> %v
1654}
1655
1656define amdgpu_ps <4 x float> @sample_c_d_o_2darray_V2_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
1657; VERDE-LABEL: sample_c_d_o_2darray_V2_tfe:
1658; VERDE:       ; %bb.0: ; %main_body
1659; VERDE-NEXT:    v_mov_b32_e32 v9, 0
1660; VERDE-NEXT:    v_mov_b32_e32 v10, v9
1661; VERDE-NEXT:    v_mov_b32_e32 v11, v9
1662; VERDE-NEXT:    image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 tfe da
1663; VERDE-NEXT:    s_waitcnt vmcnt(0)
1664; VERDE-NEXT:    v_mov_b32_e32 v0, v9
1665; VERDE-NEXT:    v_mov_b32_e32 v1, v10
1666; VERDE-NEXT:    v_mov_b32_e32 v2, v11
1667; VERDE-NEXT:    ; return to shader part epilog
1668;
1669; GFX6789-LABEL: sample_c_d_o_2darray_V2_tfe:
1670; GFX6789:       ; %bb.0: ; %main_body
1671; GFX6789-NEXT:    v_mov_b32_e32 v9, 0
1672; GFX6789-NEXT:    v_mov_b32_e32 v10, v9
1673; GFX6789-NEXT:    v_mov_b32_e32 v11, v9
1674; GFX6789-NEXT:    image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 tfe da
1675; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1676; GFX6789-NEXT:    v_mov_b32_e32 v0, v9
1677; GFX6789-NEXT:    v_mov_b32_e32 v1, v10
1678; GFX6789-NEXT:    v_mov_b32_e32 v2, v11
1679; GFX6789-NEXT:    ; return to shader part epilog
1680;
1681; GFX10PLUS-LABEL: sample_c_d_o_2darray_V2_tfe:
1682; GFX10PLUS:       ; %bb.0: ; %main_body
1683; GFX10PLUS-NEXT:    v_mov_b32_e32 v9, 0
1684; GFX10PLUS-NEXT:    v_mov_b32_e32 v10, v9
1685; GFX10PLUS-NEXT:    v_mov_b32_e32 v11, v9
1686; GFX10PLUS-NEXT:    image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe
1687; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1688; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, v9
1689; GFX10PLUS-NEXT:    v_mov_b32_e32 v1, v10
1690; GFX10PLUS-NEXT:    v_mov_b32_e32 v2, v11
1691; GFX10PLUS-NEXT:    ; return to shader part epilog
1692main_body:
1693  %v = call {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
1694  %v.vec = extractvalue {<2 x float>, i32} %v, 0
1695  %v.f1 = extractelement <2 x float> %v.vec, i32 0
1696  %v.f2 = extractelement <2 x float> %v.vec, i32 1
1697  %v.err = extractvalue {<2 x float>, i32} %v, 1
1698  %v.errf = bitcast i32 %v.err to float
1699  %res.0 = insertelement <4 x float> undef, float %v.f1, i32 0
1700  %res.1 = insertelement <4 x float> %res.0, float %v.f2, i32 1
1701  %res.2 = insertelement <4 x float> %res.1, float %v.errf, i32 2
1702  ret <4 x float> %res.2
1703}
1704
1705define amdgpu_ps <4 x float> @sample_1d_unorm(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1706; VERDE-LABEL: sample_1d_unorm:
1707; VERDE:       ; %bb.0: ; %main_body
1708; VERDE-NEXT:    s_mov_b64 s[12:13], exec
1709; VERDE-NEXT:    s_wqm_b64 exec, exec
1710; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
1711; VERDE-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm
1712; VERDE-NEXT:    s_waitcnt vmcnt(0)
1713; VERDE-NEXT:    ; return to shader part epilog
1714;
1715; GFX6789-LABEL: sample_1d_unorm:
1716; GFX6789:       ; %bb.0: ; %main_body
1717; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
1718; GFX6789-NEXT:    s_wqm_b64 exec, exec
1719; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
1720; GFX6789-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm
1721; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1722; GFX6789-NEXT:    ; return to shader part epilog
1723;
1724; GFX10PLUS-LABEL: sample_1d_unorm:
1725; GFX10PLUS:       ; %bb.0: ; %main_body
1726; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
1727; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
1728; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1729; GFX10PLUS-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D unorm
1730; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1731; GFX10PLUS-NEXT:    ; return to shader part epilog
1732main_body:
1733  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 1, i32 0, i32 0)
1734  ret <4 x float> %v
1735}
1736
1737define amdgpu_ps <4 x float> @sample_1d_glc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1738; VERDE-LABEL: sample_1d_glc:
1739; VERDE:       ; %bb.0: ; %main_body
1740; VERDE-NEXT:    s_mov_b64 s[12:13], exec
1741; VERDE-NEXT:    s_wqm_b64 exec, exec
1742; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
1743; VERDE-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc
1744; VERDE-NEXT:    s_waitcnt vmcnt(0)
1745; VERDE-NEXT:    ; return to shader part epilog
1746;
1747; GFX6789-LABEL: sample_1d_glc:
1748; GFX6789:       ; %bb.0: ; %main_body
1749; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
1750; GFX6789-NEXT:    s_wqm_b64 exec, exec
1751; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
1752; GFX6789-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc
1753; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1754; GFX6789-NEXT:    ; return to shader part epilog
1755;
1756; GFX10PLUS-LABEL: sample_1d_glc:
1757; GFX10PLUS:       ; %bb.0: ; %main_body
1758; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
1759; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
1760; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1761; GFX10PLUS-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc
1762; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1763; GFX10PLUS-NEXT:    ; return to shader part epilog
1764main_body:
1765  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 1)
1766  ret <4 x float> %v
1767}
1768
1769define amdgpu_ps <4 x float> @sample_1d_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1770; VERDE-LABEL: sample_1d_slc:
1771; VERDE:       ; %bb.0: ; %main_body
1772; VERDE-NEXT:    s_mov_b64 s[12:13], exec
1773; VERDE-NEXT:    s_wqm_b64 exec, exec
1774; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
1775; VERDE-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc
1776; VERDE-NEXT:    s_waitcnt vmcnt(0)
1777; VERDE-NEXT:    ; return to shader part epilog
1778;
1779; GFX6789-LABEL: sample_1d_slc:
1780; GFX6789:       ; %bb.0: ; %main_body
1781; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
1782; GFX6789-NEXT:    s_wqm_b64 exec, exec
1783; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
1784; GFX6789-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc
1785; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1786; GFX6789-NEXT:    ; return to shader part epilog
1787;
1788; GFX10PLUS-LABEL: sample_1d_slc:
1789; GFX10PLUS:       ; %bb.0: ; %main_body
1790; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
1791; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
1792; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1793; GFX10PLUS-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D slc
1794; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1795; GFX10PLUS-NEXT:    ; return to shader part epilog
1796main_body:
1797  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 2)
1798  ret <4 x float> %v
1799}
1800
1801define amdgpu_ps <4 x float> @sample_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1802; VERDE-LABEL: sample_1d_glc_slc:
1803; VERDE:       ; %bb.0: ; %main_body
1804; VERDE-NEXT:    s_mov_b64 s[12:13], exec
1805; VERDE-NEXT:    s_wqm_b64 exec, exec
1806; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
1807; VERDE-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc
1808; VERDE-NEXT:    s_waitcnt vmcnt(0)
1809; VERDE-NEXT:    ; return to shader part epilog
1810;
1811; GFX6789-LABEL: sample_1d_glc_slc:
1812; GFX6789:       ; %bb.0: ; %main_body
1813; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
1814; GFX6789-NEXT:    s_wqm_b64 exec, exec
1815; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
1816; GFX6789-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc
1817; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1818; GFX6789-NEXT:    ; return to shader part epilog
1819;
1820; GFX10PLUS-LABEL: sample_1d_glc_slc:
1821; GFX10PLUS:       ; %bb.0: ; %main_body
1822; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
1823; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
1824; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1825; GFX10PLUS-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc slc
1826; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1827; GFX10PLUS-NEXT:    ; return to shader part epilog
1828main_body:
1829  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 3)
1830  ret <4 x float> %v
1831}
1832
1833define amdgpu_ps float @adjust_writemask_sample_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1834; VERDE-LABEL: adjust_writemask_sample_0:
1835; VERDE:       ; %bb.0: ; %main_body
1836; VERDE-NEXT:    s_mov_b64 s[12:13], exec
1837; VERDE-NEXT:    s_wqm_b64 exec, exec
1838; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
1839; VERDE-NEXT:    image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
1840; VERDE-NEXT:    s_waitcnt vmcnt(0)
1841; VERDE-NEXT:    ; return to shader part epilog
1842;
1843; GFX6789-LABEL: adjust_writemask_sample_0:
1844; GFX6789:       ; %bb.0: ; %main_body
1845; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
1846; GFX6789-NEXT:    s_wqm_b64 exec, exec
1847; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
1848; GFX6789-NEXT:    image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
1849; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1850; GFX6789-NEXT:    ; return to shader part epilog
1851;
1852; GFX10PLUS-LABEL: adjust_writemask_sample_0:
1853; GFX10PLUS:       ; %bb.0: ; %main_body
1854; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
1855; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
1856; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1857; GFX10PLUS-NEXT:    image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
1858; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1859; GFX10PLUS-NEXT:    ; return to shader part epilog
1860main_body:
1861  %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1862  %elt0 = extractelement <4 x float> %r, i32 0
1863  ret float %elt0
1864}
1865
1866define amdgpu_ps <2 x float> @adjust_writemask_sample_01(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1867; VERDE-LABEL: adjust_writemask_sample_01:
1868; VERDE:       ; %bb.0: ; %main_body
1869; VERDE-NEXT:    s_mov_b64 s[12:13], exec
1870; VERDE-NEXT:    s_wqm_b64 exec, exec
1871; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
1872; VERDE-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3
1873; VERDE-NEXT:    s_waitcnt vmcnt(0)
1874; VERDE-NEXT:    ; return to shader part epilog
1875;
1876; GFX6789-LABEL: adjust_writemask_sample_01:
1877; GFX6789:       ; %bb.0: ; %main_body
1878; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
1879; GFX6789-NEXT:    s_wqm_b64 exec, exec
1880; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
1881; GFX6789-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3
1882; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1883; GFX6789-NEXT:    ; return to shader part epilog
1884;
1885; GFX10PLUS-LABEL: adjust_writemask_sample_01:
1886; GFX10PLUS:       ; %bb.0: ; %main_body
1887; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
1888; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
1889; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1890; GFX10PLUS-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D
1891; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1892; GFX10PLUS-NEXT:    ; return to shader part epilog
1893main_body:
1894  %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1895  %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
1896  ret <2 x float> %out
1897}
1898
1899define amdgpu_ps <3 x float> @adjust_writemask_sample_012(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1900; VERDE-LABEL: adjust_writemask_sample_012:
1901; VERDE:       ; %bb.0: ; %main_body
1902; VERDE-NEXT:    s_mov_b64 s[12:13], exec
1903; VERDE-NEXT:    s_wqm_b64 exec, exec
1904; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
1905; VERDE-NEXT:    image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7
1906; VERDE-NEXT:    s_waitcnt vmcnt(0)
1907; VERDE-NEXT:    ; return to shader part epilog
1908;
1909; GFX6789-LABEL: adjust_writemask_sample_012:
1910; GFX6789:       ; %bb.0: ; %main_body
1911; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
1912; GFX6789-NEXT:    s_wqm_b64 exec, exec
1913; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
1914; GFX6789-NEXT:    image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7
1915; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1916; GFX6789-NEXT:    ; return to shader part epilog
1917;
1918; GFX10PLUS-LABEL: adjust_writemask_sample_012:
1919; GFX10PLUS:       ; %bb.0: ; %main_body
1920; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
1921; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
1922; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1923; GFX10PLUS-NEXT:    image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7 dim:SQ_RSRC_IMG_1D
1924; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1925; GFX10PLUS-NEXT:    ; return to shader part epilog
1926main_body:
1927  %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1928  %out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
1929  ret <3 x float> %out
1930}
1931
1932define amdgpu_ps <2 x float> @adjust_writemask_sample_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1933; VERDE-LABEL: adjust_writemask_sample_12:
1934; VERDE:       ; %bb.0: ; %main_body
1935; VERDE-NEXT:    s_mov_b64 s[12:13], exec
1936; VERDE-NEXT:    s_wqm_b64 exec, exec
1937; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
1938; VERDE-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
1939; VERDE-NEXT:    s_waitcnt vmcnt(0)
1940; VERDE-NEXT:    ; return to shader part epilog
1941;
1942; GFX6789-LABEL: adjust_writemask_sample_12:
1943; GFX6789:       ; %bb.0: ; %main_body
1944; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
1945; GFX6789-NEXT:    s_wqm_b64 exec, exec
1946; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
1947; GFX6789-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
1948; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1949; GFX6789-NEXT:    ; return to shader part epilog
1950;
1951; GFX10PLUS-LABEL: adjust_writemask_sample_12:
1952; GFX10PLUS:       ; %bb.0: ; %main_body
1953; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
1954; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
1955; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1956; GFX10PLUS-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D
1957; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1958; GFX10PLUS-NEXT:    ; return to shader part epilog
1959main_body:
1960  %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1961  %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
1962  ret <2 x float> %out
1963}
1964
1965define amdgpu_ps <2 x float> @adjust_writemask_sample_03(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1966; VERDE-LABEL: adjust_writemask_sample_03:
1967; VERDE:       ; %bb.0: ; %main_body
1968; VERDE-NEXT:    s_mov_b64 s[12:13], exec
1969; VERDE-NEXT:    s_wqm_b64 exec, exec
1970; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
1971; VERDE-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9
1972; VERDE-NEXT:    s_waitcnt vmcnt(0)
1973; VERDE-NEXT:    ; return to shader part epilog
1974;
1975; GFX6789-LABEL: adjust_writemask_sample_03:
1976; GFX6789:       ; %bb.0: ; %main_body
1977; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
1978; GFX6789-NEXT:    s_wqm_b64 exec, exec
1979; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
1980; GFX6789-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9
1981; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1982; GFX6789-NEXT:    ; return to shader part epilog
1983;
1984; GFX10PLUS-LABEL: adjust_writemask_sample_03:
1985; GFX10PLUS:       ; %bb.0: ; %main_body
1986; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
1987; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
1988; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1989; GFX10PLUS-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9 dim:SQ_RSRC_IMG_1D
1990; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1991; GFX10PLUS-NEXT:    ; return to shader part epilog
1992main_body:
1993  %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1994  %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 3>
1995  ret <2 x float> %out
1996}
1997
1998define amdgpu_ps <2 x float> @adjust_writemask_sample_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1999; VERDE-LABEL: adjust_writemask_sample_13:
2000; VERDE:       ; %bb.0: ; %main_body
2001; VERDE-NEXT:    s_mov_b64 s[12:13], exec
2002; VERDE-NEXT:    s_wqm_b64 exec, exec
2003; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
2004; VERDE-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
2005; VERDE-NEXT:    s_waitcnt vmcnt(0)
2006; VERDE-NEXT:    ; return to shader part epilog
2007;
2008; GFX6789-LABEL: adjust_writemask_sample_13:
2009; GFX6789:       ; %bb.0: ; %main_body
2010; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
2011; GFX6789-NEXT:    s_wqm_b64 exec, exec
2012; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
2013; GFX6789-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
2014; GFX6789-NEXT:    s_waitcnt vmcnt(0)
2015; GFX6789-NEXT:    ; return to shader part epilog
2016;
2017; GFX10PLUS-LABEL: adjust_writemask_sample_13:
2018; GFX10PLUS:       ; %bb.0: ; %main_body
2019; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
2020; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
2021; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2022; GFX10PLUS-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D
2023; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2024; GFX10PLUS-NEXT:    ; return to shader part epilog
2025main_body:
2026  %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2027  %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 3>
2028  ret <2 x float> %out
2029}
2030
2031define amdgpu_ps <3 x float> @adjust_writemask_sample_123(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
2032; VERDE-LABEL: adjust_writemask_sample_123:
2033; VERDE:       ; %bb.0: ; %main_body
2034; VERDE-NEXT:    s_mov_b64 s[12:13], exec
2035; VERDE-NEXT:    s_wqm_b64 exec, exec
2036; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
2037; VERDE-NEXT:    image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe
2038; VERDE-NEXT:    s_waitcnt vmcnt(0)
2039; VERDE-NEXT:    ; return to shader part epilog
2040;
2041; GFX6789-LABEL: adjust_writemask_sample_123:
2042; GFX6789:       ; %bb.0: ; %main_body
2043; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
2044; GFX6789-NEXT:    s_wqm_b64 exec, exec
2045; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
2046; GFX6789-NEXT:    image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe
2047; GFX6789-NEXT:    s_waitcnt vmcnt(0)
2048; GFX6789-NEXT:    ; return to shader part epilog
2049;
2050; GFX10PLUS-LABEL: adjust_writemask_sample_123:
2051; GFX10PLUS:       ; %bb.0: ; %main_body
2052; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
2053; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
2054; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2055; GFX10PLUS-NEXT:    image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe dim:SQ_RSRC_IMG_1D
2056; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2057; GFX10PLUS-NEXT:    ; return to shader part epilog
2058main_body:
2059  %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2060  %out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
2061  ret <3 x float> %out
2062}
2063
2064define amdgpu_ps <4 x float> @adjust_writemask_sample_none_enabled(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
2065; VERDE-LABEL: adjust_writemask_sample_none_enabled:
2066; VERDE:       ; %bb.0: ; %main_body
2067; VERDE-NEXT:    ; return to shader part epilog
2068;
2069; GFX6789-LABEL: adjust_writemask_sample_none_enabled:
2070; GFX6789:       ; %bb.0: ; %main_body
2071; GFX6789-NEXT:    ; return to shader part epilog
2072;
2073; GFX10PLUS-LABEL: adjust_writemask_sample_none_enabled:
2074; GFX10PLUS:       ; %bb.0: ; %main_body
2075; GFX10PLUS-NEXT:    ; return to shader part epilog
2076main_body:
2077  %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2078  ret <4 x float> %r
2079}
2080
2081define amdgpu_ps <2 x float> @adjust_writemask_sample_123_to_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
2082; VERDE-LABEL: adjust_writemask_sample_123_to_12:
2083; VERDE:       ; %bb.0: ; %main_body
2084; VERDE-NEXT:    s_mov_b64 s[12:13], exec
2085; VERDE-NEXT:    s_wqm_b64 exec, exec
2086; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
2087; VERDE-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
2088; VERDE-NEXT:    s_waitcnt vmcnt(0)
2089; VERDE-NEXT:    ; return to shader part epilog
2090;
2091; GFX6789-LABEL: adjust_writemask_sample_123_to_12:
2092; GFX6789:       ; %bb.0: ; %main_body
2093; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
2094; GFX6789-NEXT:    s_wqm_b64 exec, exec
2095; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
2096; GFX6789-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
2097; GFX6789-NEXT:    s_waitcnt vmcnt(0)
2098; GFX6789-NEXT:    ; return to shader part epilog
2099;
2100; GFX10PLUS-LABEL: adjust_writemask_sample_123_to_12:
2101; GFX10PLUS:       ; %bb.0: ; %main_body
2102; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
2103; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
2104; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2105; GFX10PLUS-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D
2106; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2107; GFX10PLUS-NEXT:    ; return to shader part epilog
2108main_body:
2109  %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 14, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2110  %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
2111  ret <2 x float> %out
2112}
2113
2114define amdgpu_ps <2 x float> @adjust_writemask_sample_013_to_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
2115; VERDE-LABEL: adjust_writemask_sample_013_to_13:
2116; VERDE:       ; %bb.0: ; %main_body
2117; VERDE-NEXT:    s_mov_b64 s[12:13], exec
2118; VERDE-NEXT:    s_wqm_b64 exec, exec
2119; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
2120; VERDE-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
2121; VERDE-NEXT:    s_waitcnt vmcnt(0)
2122; VERDE-NEXT:    ; return to shader part epilog
2123;
2124; GFX6789-LABEL: adjust_writemask_sample_013_to_13:
2125; GFX6789:       ; %bb.0: ; %main_body
2126; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
2127; GFX6789-NEXT:    s_wqm_b64 exec, exec
2128; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
2129; GFX6789-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
2130; GFX6789-NEXT:    s_waitcnt vmcnt(0)
2131; GFX6789-NEXT:    ; return to shader part epilog
2132;
2133; GFX10PLUS-LABEL: adjust_writemask_sample_013_to_13:
2134; GFX10PLUS:       ; %bb.0: ; %main_body
2135; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
2136; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
2137; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2138; GFX10PLUS-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D
2139; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2140; GFX10PLUS-NEXT:    ; return to shader part epilog
2141main_body:
2142  %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 11, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2143  %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
2144  ret <2 x float> %out
2145}
2146
2147declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2148declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2149declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2150declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2151declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2152declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2153declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2154
2155declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2156declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2157declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2158declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2159declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2160declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2161
2162declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2163declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2164declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2165declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2166declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2167declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2168declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2169declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2170
2171declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2172declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2173declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2174declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2175declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2176declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2177declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2178declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2179
2180declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2181declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2182declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2183declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2184
2185declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2186declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2187declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2188declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2189
2190declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2191declare {float, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2192declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2193declare {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2194
2195attributes #0 = { nounwind }
2196attributes #1 = { nounwind readonly }
2197attributes #2 = { nounwind readnone }
2198