1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
4
5declare void @extern_func() #2
6
7define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) {
8; The vgpr tuple8 operand in image_gather4_c_b_cl instruction needs not be
9; preserved across the call and should get 8 scratch registers.
10; GFX9-LABEL: non_preserved_vgpr_tuple8:
11; GFX9:       ; %bb.0: ; %main_body
12; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13; GFX9-NEXT:    s_or_saveexec_b64 s[4:5], -1
14; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
15; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
16; GFX9-NEXT:    s_mov_b32 s4, 0
17; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
18; GFX9-NEXT:    s_mov_b32 s33, s32
19; GFX9-NEXT:    v_mov_b32_e32 v36, v16
20; GFX9-NEXT:    v_mov_b32_e32 v35, v15
21; GFX9-NEXT:    v_mov_b32_e32 v34, v14
22; GFX9-NEXT:    v_mov_b32_e32 v33, v13
23; GFX9-NEXT:    v_mov_b32_e32 v32, v12
24; GFX9-NEXT:    s_mov_b32 s5, s4
25; GFX9-NEXT:    s_mov_b32 s6, s4
26; GFX9-NEXT:    s_mov_b32 s7, s4
27; GFX9-NEXT:    s_mov_b32 s8, s4
28; GFX9-NEXT:    s_mov_b32 s9, s4
29; GFX9-NEXT:    s_mov_b32 s10, s4
30; GFX9-NEXT:    s_mov_b32 s11, s4
31; GFX9-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
32; GFX9-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
33; GFX9-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
34; GFX9-NEXT:    buffer_store_dword v44, off, s[0:3], s33 ; 4-byte Folded Spill
35; GFX9-NEXT:    ;;#ASMSTART
36; GFX9-NEXT:    ;;#ASMEND
37; GFX9-NEXT:    ;;#ASMSTART
38; GFX9-NEXT:    ;;#ASMEND
39; GFX9-NEXT:    ;;#ASMSTART
40; GFX9-NEXT:    ;;#ASMEND
41; GFX9-NEXT:    ;;#ASMSTART
42; GFX9-NEXT:    ;;#ASMEND
43; GFX9-NEXT:    image_gather4_c_b_cl v[41:44], v[32:36], s[4:11], s[4:7] dmask:0x1
44; GFX9-NEXT:    s_addk_i32 s32, 0x800
45; GFX9-NEXT:    s_getpc_b64 s[4:5]
46; GFX9-NEXT:    s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4
47; GFX9-NEXT:    s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12
48; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
49; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
50; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
51; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
52; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
53; GFX9-NEXT:    v_mov_b32_e32 v0, v41
54; GFX9-NEXT:    v_mov_b32_e32 v1, v42
55; GFX9-NEXT:    v_mov_b32_e32 v2, v43
56; GFX9-NEXT:    v_mov_b32_e32 v3, v44
57; GFX9-NEXT:    buffer_load_dword v44, off, s[0:3], s33 ; 4-byte Folded Reload
58; GFX9-NEXT:    buffer_load_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
59; GFX9-NEXT:    buffer_load_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
60; GFX9-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
61; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
62; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
63; GFX9-NEXT:    s_addk_i32 s32, 0xf800
64; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
65; GFX9-NEXT:    s_or_saveexec_b64 s[4:5], -1
66; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
67; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
68; GFX9-NEXT:    s_waitcnt vmcnt(0)
69; GFX9-NEXT:    s_setpc_b64 s[30:31]
70;
71; GFX10-LABEL: non_preserved_vgpr_tuple8:
72; GFX10:       ; %bb.0: ; %main_body
73; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
74; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
75; GFX10-NEXT:    s_or_saveexec_b32 s4, -1
76; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
77; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
78; GFX10-NEXT:    s_mov_b32 exec_lo, s4
79; GFX10-NEXT:    v_mov_b32_e32 v36, v16
80; GFX10-NEXT:    v_mov_b32_e32 v35, v15
81; GFX10-NEXT:    v_mov_b32_e32 v34, v14
82; GFX10-NEXT:    v_mov_b32_e32 v33, v13
83; GFX10-NEXT:    v_mov_b32_e32 v32, v12
84; GFX10-NEXT:    s_mov_b32 s4, 0
85; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
86; GFX10-NEXT:    s_mov_b32 s33, s32
87; GFX10-NEXT:    s_mov_b32 s5, s4
88; GFX10-NEXT:    s_mov_b32 s6, s4
89; GFX10-NEXT:    s_mov_b32 s7, s4
90; GFX10-NEXT:    s_mov_b32 s8, s4
91; GFX10-NEXT:    s_mov_b32 s9, s4
92; GFX10-NEXT:    s_mov_b32 s10, s4
93; GFX10-NEXT:    s_mov_b32 s11, s4
94; GFX10-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
95; GFX10-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
96; GFX10-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
97; GFX10-NEXT:    buffer_store_dword v44, off, s[0:3], s33 ; 4-byte Folded Spill
98; GFX10-NEXT:    ;;#ASMSTART
99; GFX10-NEXT:    ;;#ASMEND
100; GFX10-NEXT:    ;;#ASMSTART
101; GFX10-NEXT:    ;;#ASMEND
102; GFX10-NEXT:    ;;#ASMSTART
103; GFX10-NEXT:    ;;#ASMEND
104; GFX10-NEXT:    ;;#ASMSTART
105; GFX10-NEXT:    ;;#ASMEND
106; GFX10-NEXT:    image_gather4_c_b_cl v[41:44], v[32:36], s[4:11], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D
107; GFX10-NEXT:    s_addk_i32 s32, 0x400
108; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
109; GFX10-NEXT:    s_getpc_b64 s[4:5]
110; GFX10-NEXT:    s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4
111; GFX10-NEXT:    s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12
112; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
113; GFX10-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
114; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
115; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
116; GFX10-NEXT:    s_swappc_b64 s[30:31], s[4:5]
117; GFX10-NEXT:    v_mov_b32_e32 v0, v41
118; GFX10-NEXT:    v_mov_b32_e32 v1, v42
119; GFX10-NEXT:    v_mov_b32_e32 v2, v43
120; GFX10-NEXT:    v_mov_b32_e32 v3, v44
121; GFX10-NEXT:    s_clause 0x3
122; GFX10-NEXT:    buffer_load_dword v44, off, s[0:3], s33
123; GFX10-NEXT:    buffer_load_dword v43, off, s[0:3], s33 offset:4
124; GFX10-NEXT:    buffer_load_dword v42, off, s[0:3], s33 offset:8
125; GFX10-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:12
126; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
127; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
128; GFX10-NEXT:    s_addk_i32 s32, 0xfc00
129; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
130; GFX10-NEXT:    s_or_saveexec_b32 s4, -1
131; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
132; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
133; GFX10-NEXT:    s_mov_b32 exec_lo, s4
134; GFX10-NEXT:    s_waitcnt vmcnt(0)
135; GFX10-NEXT:    s_setpc_b64 s[30:31]
136
137
138
139
140
141
142
143
144
145
146main_body:
147  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0
148  call void asm sideeffect "", "~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15}"() #0
149  call void asm sideeffect "", "~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23}"() #0
150  call void asm sideeffect "", "~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() #0
151  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
152  call void @extern_func()
153  ret <4 x float> %v
154}
155
156define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) {
157; The vgpr tuple8 operand in image_gather4_c_b_cl instruction needs to be preserved
158; across the call and should get allcoated to 8 CSRs.
159; Only the lower 5 sub-registers of the tuple are preserved.
160; The upper 3 sub-registers are unused.
161; GFX9-LABEL: call_preserved_vgpr_tuple8:
162; GFX9:       ; %bb.0: ; %main_body
163; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164; GFX9-NEXT:    s_or_saveexec_b64 s[4:5], -1
165; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
166; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
167; GFX9-NEXT:    v_writelane_b32 v40, s33, 10
168; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
169; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
170; GFX9-NEXT:    v_writelane_b32 v40, s36, 2
171; GFX9-NEXT:    v_writelane_b32 v40, s37, 3
172; GFX9-NEXT:    v_writelane_b32 v40, s38, 4
173; GFX9-NEXT:    v_writelane_b32 v40, s39, 5
174; GFX9-NEXT:    v_writelane_b32 v40, s40, 6
175; GFX9-NEXT:    v_writelane_b32 v40, s41, 7
176; GFX9-NEXT:    s_mov_b32 s33, s32
177; GFX9-NEXT:    v_writelane_b32 v40, s42, 8
178; GFX9-NEXT:    s_mov_b32 s36, 0
179; GFX9-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
180; GFX9-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
181; GFX9-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
182; GFX9-NEXT:    buffer_store_dword v44, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
183; GFX9-NEXT:    buffer_store_dword v45, off, s[0:3], s33 ; 4-byte Folded Spill
184; GFX9-NEXT:    v_writelane_b32 v40, s43, 9
185; GFX9-NEXT:    v_mov_b32_e32 v45, v16
186; GFX9-NEXT:    v_mov_b32_e32 v44, v15
187; GFX9-NEXT:    v_mov_b32_e32 v43, v14
188; GFX9-NEXT:    v_mov_b32_e32 v42, v13
189; GFX9-NEXT:    v_mov_b32_e32 v41, v12
190; GFX9-NEXT:    s_mov_b32 s37, s36
191; GFX9-NEXT:    s_mov_b32 s38, s36
192; GFX9-NEXT:    s_mov_b32 s39, s36
193; GFX9-NEXT:    s_mov_b32 s40, s36
194; GFX9-NEXT:    s_mov_b32 s41, s36
195; GFX9-NEXT:    s_mov_b32 s42, s36
196; GFX9-NEXT:    s_mov_b32 s43, s36
197; GFX9-NEXT:    image_gather4_c_b_cl v[0:3], v[41:45], s[36:43], s[4:7] dmask:0x1
198; GFX9-NEXT:    s_addk_i32 s32, 0x800
199; GFX9-NEXT:    s_getpc_b64 s[4:5]
200; GFX9-NEXT:    s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4
201; GFX9-NEXT:    s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12
202; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
203; GFX9-NEXT:    s_waitcnt vmcnt(0)
204; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
205; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
206; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
207; GFX9-NEXT:    image_gather4_c_b_cl v[0:3], v[41:45], s[36:43], s[4:7] dmask:0x1
208; GFX9-NEXT:    s_nop 0
209; GFX9-NEXT:    buffer_load_dword v45, off, s[0:3], s33 ; 4-byte Folded Reload
210; GFX9-NEXT:    buffer_load_dword v44, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
211; GFX9-NEXT:    buffer_load_dword v43, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
212; GFX9-NEXT:    buffer_load_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
213; GFX9-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
214; GFX9-NEXT:    v_readlane_b32 s43, v40, 9
215; GFX9-NEXT:    v_readlane_b32 s42, v40, 8
216; GFX9-NEXT:    v_readlane_b32 s41, v40, 7
217; GFX9-NEXT:    v_readlane_b32 s40, v40, 6
218; GFX9-NEXT:    v_readlane_b32 s39, v40, 5
219; GFX9-NEXT:    v_readlane_b32 s38, v40, 4
220; GFX9-NEXT:    v_readlane_b32 s37, v40, 3
221; GFX9-NEXT:    v_readlane_b32 s36, v40, 2
222; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
223; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
224; GFX9-NEXT:    s_addk_i32 s32, 0xf800
225; GFX9-NEXT:    v_readlane_b32 s33, v40, 10
226; GFX9-NEXT:    s_or_saveexec_b64 s[4:5], -1
227; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
228; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
229; GFX9-NEXT:    s_waitcnt vmcnt(0)
230; GFX9-NEXT:    s_setpc_b64 s[30:31]
231;
232; GFX10-LABEL: call_preserved_vgpr_tuple8:
233; GFX10:       ; %bb.0: ; %main_body
234; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
235; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
236; GFX10-NEXT:    s_or_saveexec_b32 s4, -1
237; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
238; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
239; GFX10-NEXT:    s_mov_b32 exec_lo, s4
240; GFX10-NEXT:    v_writelane_b32 v40, s33, 10
241; GFX10-NEXT:    s_mov_b32 s33, s32
242; GFX10-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
243; GFX10-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
244; GFX10-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
245; GFX10-NEXT:    buffer_store_dword v44, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
246; GFX10-NEXT:    buffer_store_dword v45, off, s[0:3], s33 ; 4-byte Folded Spill
247; GFX10-NEXT:    s_addk_i32 s32, 0x400
248; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
249; GFX10-NEXT:    v_mov_b32_e32 v41, v16
250; GFX10-NEXT:    v_mov_b32_e32 v42, v15
251; GFX10-NEXT:    v_mov_b32_e32 v43, v14
252; GFX10-NEXT:    v_mov_b32_e32 v44, v13
253; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
254; GFX10-NEXT:    v_mov_b32_e32 v45, v12
255; GFX10-NEXT:    v_writelane_b32 v40, s36, 2
256; GFX10-NEXT:    s_mov_b32 s36, 0
257; GFX10-NEXT:    v_writelane_b32 v40, s37, 3
258; GFX10-NEXT:    s_mov_b32 s37, s36
259; GFX10-NEXT:    v_writelane_b32 v40, s38, 4
260; GFX10-NEXT:    s_mov_b32 s38, s36
261; GFX10-NEXT:    v_writelane_b32 v40, s39, 5
262; GFX10-NEXT:    s_mov_b32 s39, s36
263; GFX10-NEXT:    v_writelane_b32 v40, s40, 6
264; GFX10-NEXT:    s_mov_b32 s40, s36
265; GFX10-NEXT:    v_writelane_b32 v40, s41, 7
266; GFX10-NEXT:    s_mov_b32 s41, s36
267; GFX10-NEXT:    v_writelane_b32 v40, s42, 8
268; GFX10-NEXT:    s_mov_b32 s42, s36
269; GFX10-NEXT:    v_writelane_b32 v40, s43, 9
270; GFX10-NEXT:    s_mov_b32 s43, s36
271; GFX10-NEXT:    image_gather4_c_b_cl v[0:3], v[12:16], s[36:43], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D
272; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
273; GFX10-NEXT:    s_getpc_b64 s[4:5]
274; GFX10-NEXT:    s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4
275; GFX10-NEXT:    s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12
276; GFX10-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
277; GFX10-NEXT:    s_waitcnt vmcnt(0)
278; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
279; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
280; GFX10-NEXT:    s_swappc_b64 s[30:31], s[4:5]
281; GFX10-NEXT:    image_gather4_c_b_cl v[0:3], [v45, v44, v43, v42, v41], s[36:43], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D
282; GFX10-NEXT:    s_clause 0x4
283; GFX10-NEXT:    buffer_load_dword v45, off, s[0:3], s33
284; GFX10-NEXT:    buffer_load_dword v44, off, s[0:3], s33 offset:4
285; GFX10-NEXT:    buffer_load_dword v43, off, s[0:3], s33 offset:8
286; GFX10-NEXT:    buffer_load_dword v42, off, s[0:3], s33 offset:12
287; GFX10-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:16
288; GFX10-NEXT:    v_readlane_b32 s43, v40, 9
289; GFX10-NEXT:    v_readlane_b32 s42, v40, 8
290; GFX10-NEXT:    v_readlane_b32 s41, v40, 7
291; GFX10-NEXT:    v_readlane_b32 s40, v40, 6
292; GFX10-NEXT:    v_readlane_b32 s39, v40, 5
293; GFX10-NEXT:    v_readlane_b32 s38, v40, 4
294; GFX10-NEXT:    v_readlane_b32 s37, v40, 3
295; GFX10-NEXT:    v_readlane_b32 s36, v40, 2
296; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
297; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
298; GFX10-NEXT:    s_addk_i32 s32, 0xfc00
299; GFX10-NEXT:    v_readlane_b32 s33, v40, 10
300; GFX10-NEXT:    s_or_saveexec_b32 s4, -1
301; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
302; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
303; GFX10-NEXT:    s_mov_b32 exec_lo, s4
304; GFX10-NEXT:    s_waitcnt vmcnt(0)
305; GFX10-NEXT:    s_setpc_b64 s[30:31]
306
307
308
309
310
311
312
313main_body:
314  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
315  store <4 x float> %v, <4 x float> addrspace(1)* undef
316  call void @extern_func()
317  %v1 = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
318  ret <4 x float> %v1
319}
320
321declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1
322
323attributes #0 = { nounwind writeonly }
324attributes #1 = { nounwind readonly }
325attributes #2 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
326