1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s
3; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s
4; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX11 %s
5; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10-SCRATCH %s
6
7declare hidden amdgpu_gfx void @external_void_func_i1(i1) #0
8declare hidden amdgpu_gfx void @external_void_func_i1_signext(i1 signext) #0
9declare hidden amdgpu_gfx void @external_void_func_i1_zeroext(i1 zeroext) #0
10
11declare hidden amdgpu_gfx void @external_void_func_i8(i8) #0
12declare hidden amdgpu_gfx void @external_void_func_i8_signext(i8 signext) #0
13declare hidden amdgpu_gfx void @external_void_func_i8_zeroext(i8 zeroext) #0
14
15declare hidden amdgpu_gfx void @external_void_func_i16(i16) #0
16declare hidden amdgpu_gfx void @external_void_func_i16_signext(i16 signext) #0
17declare hidden amdgpu_gfx void @external_void_func_i16_zeroext(i16 zeroext) #0
18
19declare hidden amdgpu_gfx void @external_void_func_i32(i32) #0
20declare hidden amdgpu_gfx void @external_void_func_i64(i64) #0
21declare hidden amdgpu_gfx void @external_void_func_v2i64(<2 x i64>) #0
22declare hidden amdgpu_gfx void @external_void_func_v3i64(<3 x i64>) #0
23declare hidden amdgpu_gfx void @external_void_func_v4i64(<4 x i64>) #0
24
25declare hidden amdgpu_gfx void @external_void_func_f16(half) #0
26declare hidden amdgpu_gfx void @external_void_func_f32(float) #0
27declare hidden amdgpu_gfx void @external_void_func_f64(double) #0
28declare hidden amdgpu_gfx void @external_void_func_v2f32(<2 x float>) #0
29declare hidden amdgpu_gfx void @external_void_func_v2f64(<2 x double>) #0
30declare hidden amdgpu_gfx void @external_void_func_v3f32(<3 x float>) #0
31declare hidden amdgpu_gfx void @external_void_func_v3f64(<3 x double>) #0
32declare hidden amdgpu_gfx void @external_void_func_v5f32(<5 x float>) #0
33
34declare hidden amdgpu_gfx void @external_void_func_v2i16(<2 x i16>) #0
35declare hidden amdgpu_gfx void @external_void_func_v2f16(<2 x half>) #0
36declare hidden amdgpu_gfx void @external_void_func_v3i16(<3 x i16>) #0
37declare hidden amdgpu_gfx void @external_void_func_v3f16(<3 x half>) #0
38declare hidden amdgpu_gfx void @external_void_func_v4i16(<4 x i16>) #0
39declare hidden amdgpu_gfx void @external_void_func_v4f16(<4 x half>) #0
40
41declare hidden amdgpu_gfx void @external_void_func_v2i32(<2 x i32>) #0
42declare hidden amdgpu_gfx void @external_void_func_v3i32(<3 x i32>) #0
43declare hidden amdgpu_gfx void @external_void_func_v3i32_i32(<3 x i32>, i32) #0
44declare hidden amdgpu_gfx void @external_void_func_v4i32(<4 x i32>) #0
45declare hidden amdgpu_gfx void @external_void_func_v5i32(<5 x i32>) #0
46declare hidden amdgpu_gfx void @external_void_func_v8i32(<8 x i32>) #0
47declare hidden amdgpu_gfx void @external_void_func_v16i32(<16 x i32>) #0
48declare hidden amdgpu_gfx void @external_void_func_v32i32(<32 x i32>) #0
49declare hidden amdgpu_gfx void @external_void_func_v32i32_i32(<32 x i32>, i32) #0
50
51declare hidden amdgpu_gfx void @external_void_func_i1_inreg(i1 inreg) #0
52declare hidden amdgpu_gfx void @external_void_func_i8_inreg(i8 inreg) #0
53declare hidden amdgpu_gfx void @external_void_func_i16_inreg(i16 inreg) #0
54declare hidden amdgpu_gfx void @external_void_func_i32_inreg(i32 inreg) #0
55declare hidden amdgpu_gfx void @external_void_func_i64_inreg(i64 inreg) #0
56declare hidden amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> inreg) #0
57declare hidden amdgpu_gfx void @external_void_func_v3i64_inreg(<3 x i64> inreg) #0
58declare hidden amdgpu_gfx void @external_void_func_v4i64_inreg(<4 x i64> inreg) #0
59
60declare hidden amdgpu_gfx void @external_void_func_f16_inreg(half inreg) #0
61declare hidden amdgpu_gfx void @external_void_func_f32_inreg(float inreg) #0
62declare hidden amdgpu_gfx void @external_void_func_f64_inreg(double inreg) #0
63declare hidden amdgpu_gfx void @external_void_func_v2f32_inreg(<2 x float> inreg) #0
64declare hidden amdgpu_gfx void @external_void_func_v2f64_inreg(<2 x double> inreg) #0
65declare hidden amdgpu_gfx void @external_void_func_v3f32_inreg(<3 x float> inreg) #0
66declare hidden amdgpu_gfx void @external_void_func_v3f64_inreg(<3 x double> inreg) #0
67declare hidden amdgpu_gfx void @external_void_func_v5f32_inreg(<5 x float> inreg) #0
68
69declare hidden amdgpu_gfx void @external_void_func_v2i16_inreg(<2 x i16> inreg) #0
70declare hidden amdgpu_gfx void @external_void_func_v2f16_inreg(<2 x half> inreg) #0
71declare hidden amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> inreg) #0
72declare hidden amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> inreg) #0
73declare hidden amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> inreg) #0
74declare hidden amdgpu_gfx void @external_void_func_v4f16_inreg(<4 x half> inreg) #0
75
76declare hidden amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> inreg) #0
77declare hidden amdgpu_gfx void @external_void_func_v3i32_inreg(<3 x i32> inreg) #0
78declare hidden amdgpu_gfx void @external_void_func_v3i32_i32_inreg(<3 x i32> inreg, i32 inreg) #0
79declare hidden amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> inreg) #0
80declare hidden amdgpu_gfx void @external_void_func_v5i32_inreg(<5 x i32> inreg) #0
81declare hidden amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> inreg) #0
82declare hidden amdgpu_gfx void @external_void_func_v16i32_inreg(<16 x i32> inreg) #0
83declare hidden amdgpu_gfx void @external_void_func_v32i32_inreg(<32 x i32> inreg) #0
84declare hidden amdgpu_gfx void @external_void_func_v32i32_i32_inreg(<32 x i32> inreg, i32 inreg) #0
85
86; return value and argument
87declare hidden amdgpu_gfx i32 @external_i32_func_i32(i32) #0
88
89; Structs
90declare hidden amdgpu_gfx void @external_void_func_struct_i8_i32({ i8, i32 }) #0
91declare hidden amdgpu_gfx void @external_void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, i32 })) #0
92declare hidden amdgpu_gfx void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }), { i8, i32 } addrspace(5)* byval({ i8, i32 })) #0
93
94declare hidden amdgpu_gfx void @external_void_func_v16i8(<16 x i8>) #0
95
96define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 {
97; GFX9-LABEL: test_call_external_void_func_i1_imm:
98; GFX9:       ; %bb.0:
99; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
101; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
102; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
103; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
104; GFX9-NEXT:    s_mov_b32 s33, s32
105; GFX9-NEXT:    s_addk_i32 s32, 0x400
106; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
107; GFX9-NEXT:    v_mov_b32_e32 v0, 1
108; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
109; GFX9-NEXT:    s_getpc_b64 s[34:35]
110; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_i1@rel32@lo+4
111; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_i1@rel32@hi+12
112; GFX9-NEXT:    buffer_store_byte v0, off, s[0:3], s32
113; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
114; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
115; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
116; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
117; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
118; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
119; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
120; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
121; GFX9-NEXT:    s_waitcnt vmcnt(0)
122; GFX9-NEXT:    s_setpc_b64 s[30:31]
123;
124; GFX10-LABEL: test_call_external_void_func_i1_imm:
125; GFX10:       ; %bb.0:
126; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
128; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
129; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
130; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
131; GFX10-NEXT:    s_mov_b32 exec_lo, s34
132; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
133; GFX10-NEXT:    v_mov_b32_e32 v0, 1
134; GFX10-NEXT:    s_mov_b32 s33, s32
135; GFX10-NEXT:    s_addk_i32 s32, 0x200
136; GFX10-NEXT:    s_getpc_b64 s[34:35]
137; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_i1@rel32@lo+4
138; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_i1@rel32@hi+12
139; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
140; GFX10-NEXT:    buffer_store_byte v0, off, s[0:3], s32
141; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
142; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
143; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
144; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
145; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
146; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
147; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
148; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
149; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
150; GFX10-NEXT:    s_mov_b32 exec_lo, s34
151; GFX10-NEXT:    s_waitcnt vmcnt(0)
152; GFX10-NEXT:    s_setpc_b64 s[30:31]
153;
154; GFX11-LABEL: test_call_external_void_func_i1_imm:
155; GFX11:       ; %bb.0:
156; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
158; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
159; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
160; GFX11-NEXT:    s_mov_b32 exec_lo, s0
161; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
162; GFX11-NEXT:    v_mov_b32_e32 v0, 1
163; GFX11-NEXT:    s_mov_b32 s33, s32
164; GFX11-NEXT:    s_add_i32 s32, s32, 16
165; GFX11-NEXT:    s_getpc_b64 s[0:1]
166; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_i1@rel32@lo+4
167; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_i1@rel32@hi+12
168; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
169; GFX11-NEXT:    scratch_store_b8 off, v0, s32
170; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
171; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
172; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
173; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
174; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
175; GFX11-NEXT:    s_add_i32 s32, s32, -16
176; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
177; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
178; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
179; GFX11-NEXT:    s_mov_b32 exec_lo, s0
180; GFX11-NEXT:    s_waitcnt vmcnt(0)
181; GFX11-NEXT:    s_setpc_b64 s[30:31]
182;
183; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_imm:
184; GFX10-SCRATCH:       ; %bb.0:
185; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
186; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
187; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
188; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
189; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
190; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
191; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
192; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 1
193; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
194; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
195; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
196; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_i1@rel32@lo+4
197; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_i1@rel32@hi+12
198; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
199; GFX10-SCRATCH-NEXT:    scratch_store_byte off, v0, s32
200; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
201; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
202; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
203; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
204; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
205; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
206; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
207; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
208; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
209; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
210; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
211; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
212  call amdgpu_gfx void @external_void_func_i1(i1 true)
213  ret void
214}
215
216define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 {
217; GFX9-LABEL: test_call_external_void_func_i1_signext:
218; GFX9:       ; %bb.0:
219; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
220; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
221; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
222; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
223; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off glc
224; GFX9-NEXT:    s_waitcnt vmcnt(0)
225; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
226; GFX9-NEXT:    s_mov_b32 s33, s32
227; GFX9-NEXT:    s_addk_i32 s32, 0x400
228; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
229; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
230; GFX9-NEXT:    s_getpc_b64 s[34:35]
231; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_i1_signext@rel32@lo+4
232; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_i1_signext@rel32@hi+12
233; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
234; GFX9-NEXT:    buffer_store_byte v0, off, s[0:3], s32
235; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
236; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
237; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
238; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
239; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
240; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
241; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
242; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
243; GFX9-NEXT:    s_waitcnt vmcnt(0)
244; GFX9-NEXT:    s_setpc_b64 s[30:31]
245;
246; GFX10-LABEL: test_call_external_void_func_i1_signext:
247; GFX10:       ; %bb.0:
248; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
249; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
250; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
251; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
252; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
253; GFX10-NEXT:    s_mov_b32 exec_lo, s34
254; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
255; GFX10-NEXT:    s_waitcnt vmcnt(0)
256; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
257; GFX10-NEXT:    s_mov_b32 s33, s32
258; GFX10-NEXT:    s_addk_i32 s32, 0x200
259; GFX10-NEXT:    s_getpc_b64 s[34:35]
260; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_i1_signext@rel32@lo+4
261; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_i1_signext@rel32@hi+12
262; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
263; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
264; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
265; GFX10-NEXT:    buffer_store_byte v0, off, s[0:3], s32
266; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
267; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
268; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
269; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
270; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
271; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
272; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
273; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
274; GFX10-NEXT:    s_mov_b32 exec_lo, s34
275; GFX10-NEXT:    s_waitcnt vmcnt(0)
276; GFX10-NEXT:    s_setpc_b64 s[30:31]
277;
278; GFX11-LABEL: test_call_external_void_func_i1_signext:
279; GFX11:       ; %bb.0:
280; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
281; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
282; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
283; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
284; GFX11-NEXT:    s_mov_b32 exec_lo, s0
285; GFX11-NEXT:    global_load_u8 v0, v[0:1], off glc dlc
286; GFX11-NEXT:    s_waitcnt vmcnt(0)
287; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
288; GFX11-NEXT:    s_mov_b32 s33, s32
289; GFX11-NEXT:    s_add_i32 s32, s32, 16
290; GFX11-NEXT:    s_getpc_b64 s[0:1]
291; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_i1_signext@rel32@lo+4
292; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_i1_signext@rel32@hi+12
293; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
294; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
295; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
296; GFX11-NEXT:    scratch_store_b8 off, v0, s32
297; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
298; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
299; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
300; GFX11-NEXT:    s_add_i32 s32, s32, -16
301; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
302; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
303; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
304; GFX11-NEXT:    s_mov_b32 exec_lo, s0
305; GFX11-NEXT:    s_waitcnt vmcnt(0)
306; GFX11-NEXT:    s_setpc_b64 s[30:31]
307;
308; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_signext:
309; GFX10-SCRATCH:       ; %bb.0:
310; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
311; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
312; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
313; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
314; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
315; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
316; GFX10-SCRATCH-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
317; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
318; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
319; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
320; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
321; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
322; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_i1_signext@rel32@lo+4
323; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_i1_signext@rel32@hi+12
324; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
325; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
326; GFX10-SCRATCH-NEXT:    v_and_b32_e32 v0, 1, v0
327; GFX10-SCRATCH-NEXT:    scratch_store_byte off, v0, s32
328; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
329; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
330; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
331; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
332; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
333; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
334; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
335; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
336; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
337; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
338; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
339  %var = load volatile i1, i1 addrspace(1)* undef
340  call amdgpu_gfx void @external_void_func_i1_signext(i1 signext%var)
341  ret void
342}
343
344define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 {
345; GFX9-LABEL: test_call_external_void_func_i1_zeroext:
346; GFX9:       ; %bb.0:
347; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
348; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
349; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
350; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
351; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off glc
352; GFX9-NEXT:    s_waitcnt vmcnt(0)
353; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
354; GFX9-NEXT:    s_mov_b32 s33, s32
355; GFX9-NEXT:    s_addk_i32 s32, 0x400
356; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
357; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
358; GFX9-NEXT:    s_getpc_b64 s[34:35]
359; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_i1_zeroext@rel32@lo+4
360; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_i1_zeroext@rel32@hi+12
361; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
362; GFX9-NEXT:    buffer_store_byte v0, off, s[0:3], s32
363; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
364; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
365; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
366; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
367; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
368; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
369; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
370; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
371; GFX9-NEXT:    s_waitcnt vmcnt(0)
372; GFX9-NEXT:    s_setpc_b64 s[30:31]
373;
374; GFX10-LABEL: test_call_external_void_func_i1_zeroext:
375; GFX10:       ; %bb.0:
376; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
378; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
379; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
380; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
381; GFX10-NEXT:    s_mov_b32 exec_lo, s34
382; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
383; GFX10-NEXT:    s_waitcnt vmcnt(0)
384; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
385; GFX10-NEXT:    s_mov_b32 s33, s32
386; GFX10-NEXT:    s_addk_i32 s32, 0x200
387; GFX10-NEXT:    s_getpc_b64 s[34:35]
388; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_i1_zeroext@rel32@lo+4
389; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_i1_zeroext@rel32@hi+12
390; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
391; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
392; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
393; GFX10-NEXT:    buffer_store_byte v0, off, s[0:3], s32
394; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
395; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
396; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
397; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
398; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
399; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
400; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
401; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
402; GFX10-NEXT:    s_mov_b32 exec_lo, s34
403; GFX10-NEXT:    s_waitcnt vmcnt(0)
404; GFX10-NEXT:    s_setpc_b64 s[30:31]
405;
406; GFX11-LABEL: test_call_external_void_func_i1_zeroext:
407; GFX11:       ; %bb.0:
408; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
409; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
410; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
411; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
412; GFX11-NEXT:    s_mov_b32 exec_lo, s0
413; GFX11-NEXT:    global_load_u8 v0, v[0:1], off glc dlc
414; GFX11-NEXT:    s_waitcnt vmcnt(0)
415; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
416; GFX11-NEXT:    s_mov_b32 s33, s32
417; GFX11-NEXT:    s_add_i32 s32, s32, 16
418; GFX11-NEXT:    s_getpc_b64 s[0:1]
419; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_i1_zeroext@rel32@lo+4
420; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_i1_zeroext@rel32@hi+12
421; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
422; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
423; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
424; GFX11-NEXT:    scratch_store_b8 off, v0, s32
425; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
426; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
427; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
428; GFX11-NEXT:    s_add_i32 s32, s32, -16
429; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
430; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
431; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
432; GFX11-NEXT:    s_mov_b32 exec_lo, s0
433; GFX11-NEXT:    s_waitcnt vmcnt(0)
434; GFX11-NEXT:    s_setpc_b64 s[30:31]
435;
436; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_zeroext:
437; GFX10-SCRATCH:       ; %bb.0:
438; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
439; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
440; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
441; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
442; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
443; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
444; GFX10-SCRATCH-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
445; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
446; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
447; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
448; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
449; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
450; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_i1_zeroext@rel32@lo+4
451; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_i1_zeroext@rel32@hi+12
452; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
453; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
454; GFX10-SCRATCH-NEXT:    v_and_b32_e32 v0, 1, v0
455; GFX10-SCRATCH-NEXT:    scratch_store_byte off, v0, s32
456; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
457; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
458; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
459; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
460; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
461; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
462; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
463; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
464; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
465; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
466; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
467  %var = load volatile i1, i1 addrspace(1)* undef
468  call amdgpu_gfx void @external_void_func_i1_zeroext(i1 zeroext %var)
469  ret void
470}
471
472define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 {
473; GFX9-LABEL: test_call_external_void_func_i8_imm:
474; GFX9:       ; %bb.0:
475; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
476; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
477; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
478; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
479; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
480; GFX9-NEXT:    s_mov_b32 s33, s32
481; GFX9-NEXT:    s_addk_i32 s32, 0x400
482; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
483; GFX9-NEXT:    v_mov_b32_e32 v0, 0x7b
484; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
485; GFX9-NEXT:    s_getpc_b64 s[34:35]
486; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_i8@rel32@lo+4
487; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_i8@rel32@hi+12
488; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
489; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
490; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
491; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
492; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
493; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
494; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
495; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
496; GFX9-NEXT:    s_waitcnt vmcnt(0)
497; GFX9-NEXT:    s_setpc_b64 s[30:31]
498;
499; GFX10-LABEL: test_call_external_void_func_i8_imm:
500; GFX10:       ; %bb.0:
501; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
502; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
503; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
504; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
505; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
506; GFX10-NEXT:    s_mov_b32 exec_lo, s34
507; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
508; GFX10-NEXT:    v_mov_b32_e32 v0, 0x7b
509; GFX10-NEXT:    s_mov_b32 s33, s32
510; GFX10-NEXT:    s_addk_i32 s32, 0x200
511; GFX10-NEXT:    s_getpc_b64 s[34:35]
512; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_i8@rel32@lo+4
513; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_i8@rel32@hi+12
514; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
515; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
516; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
517; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
518; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
519; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
520; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
521; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
522; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
523; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
524; GFX10-NEXT:    s_mov_b32 exec_lo, s34
525; GFX10-NEXT:    s_waitcnt vmcnt(0)
526; GFX10-NEXT:    s_setpc_b64 s[30:31]
527;
528; GFX11-LABEL: test_call_external_void_func_i8_imm:
529; GFX11:       ; %bb.0:
530; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
531; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
532; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
533; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
534; GFX11-NEXT:    s_mov_b32 exec_lo, s0
535; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
536; GFX11-NEXT:    v_mov_b32_e32 v0, 0x7b
537; GFX11-NEXT:    s_mov_b32 s33, s32
538; GFX11-NEXT:    s_add_i32 s32, s32, 16
539; GFX11-NEXT:    s_getpc_b64 s[0:1]
540; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_i8@rel32@lo+4
541; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_i8@rel32@hi+12
542; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
543; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
544; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
545; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
546; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
547; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
548; GFX11-NEXT:    s_add_i32 s32, s32, -16
549; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
550; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
551; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
552; GFX11-NEXT:    s_mov_b32 exec_lo, s0
553; GFX11-NEXT:    s_waitcnt vmcnt(0)
554; GFX11-NEXT:    s_setpc_b64 s[30:31]
555;
556; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_imm:
557; GFX10-SCRATCH:       ; %bb.0:
558; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
559; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
560; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
561; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
562; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
563; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
564; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
565; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 0x7b
566; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
567; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
568; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
569; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_i8@rel32@lo+4
570; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_i8@rel32@hi+12
571; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
572; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
573; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
574; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
575; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
576; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
577; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
578; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
579; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
580; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
581; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
582; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
583; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
584  call amdgpu_gfx void @external_void_func_i8(i8 123)
585  ret void
586}
587
588define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 {
589; GFX9-LABEL: test_call_external_void_func_i8_signext:
590; GFX9:       ; %bb.0:
591; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
592; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
593; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
594; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
595; GFX9-NEXT:    global_load_sbyte v0, v[0:1], off glc
596; GFX9-NEXT:    s_waitcnt vmcnt(0)
597; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
598; GFX9-NEXT:    s_mov_b32 s33, s32
599; GFX9-NEXT:    s_addk_i32 s32, 0x400
600; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
601; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
602; GFX9-NEXT:    s_getpc_b64 s[34:35]
603; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_i8_signext@rel32@lo+4
604; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_i8_signext@rel32@hi+12
605; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
606; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
607; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
608; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
609; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
610; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
611; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
612; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
613; GFX9-NEXT:    s_waitcnt vmcnt(0)
614; GFX9-NEXT:    s_setpc_b64 s[30:31]
615;
616; GFX10-LABEL: test_call_external_void_func_i8_signext:
617; GFX10:       ; %bb.0:
618; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
619; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
620; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
621; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
622; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
623; GFX10-NEXT:    s_mov_b32 exec_lo, s34
624; GFX10-NEXT:    global_load_sbyte v0, v[0:1], off glc dlc
625; GFX10-NEXT:    s_waitcnt vmcnt(0)
626; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
627; GFX10-NEXT:    s_mov_b32 s33, s32
628; GFX10-NEXT:    s_addk_i32 s32, 0x200
629; GFX10-NEXT:    s_getpc_b64 s[34:35]
630; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_i8_signext@rel32@lo+4
631; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_i8_signext@rel32@hi+12
632; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
633; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
634; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
635; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
636; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
637; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
638; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
639; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
640; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
641; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
642; GFX10-NEXT:    s_mov_b32 exec_lo, s34
643; GFX10-NEXT:    s_waitcnt vmcnt(0)
644; GFX10-NEXT:    s_setpc_b64 s[30:31]
645;
646; GFX11-LABEL: test_call_external_void_func_i8_signext:
647; GFX11:       ; %bb.0:
648; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
649; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
650; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
651; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
652; GFX11-NEXT:    s_mov_b32 exec_lo, s0
653; GFX11-NEXT:    global_load_i8 v0, v[0:1], off glc dlc
654; GFX11-NEXT:    s_waitcnt vmcnt(0)
655; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
656; GFX11-NEXT:    s_mov_b32 s33, s32
657; GFX11-NEXT:    s_add_i32 s32, s32, 16
658; GFX11-NEXT:    s_getpc_b64 s[0:1]
659; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_i8_signext@rel32@lo+4
660; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_i8_signext@rel32@hi+12
661; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
662; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
663; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
664; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
665; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
666; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
667; GFX11-NEXT:    s_add_i32 s32, s32, -16
668; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
669; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
670; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
671; GFX11-NEXT:    s_mov_b32 exec_lo, s0
672; GFX11-NEXT:    s_waitcnt vmcnt(0)
673; GFX11-NEXT:    s_setpc_b64 s[30:31]
674;
675; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_signext:
676; GFX10-SCRATCH:       ; %bb.0:
677; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
678; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
679; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
680; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
681; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
682; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
683; GFX10-SCRATCH-NEXT:    global_load_sbyte v0, v[0:1], off glc dlc
684; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
685; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
686; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
687; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
688; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
689; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_i8_signext@rel32@lo+4
690; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_i8_signext@rel32@hi+12
691; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
692; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
693; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
694; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
695; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
696; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
697; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
698; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
699; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
700; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
701; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
702; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
703; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
704  %var = load volatile i8, i8 addrspace(1)* undef
705  call amdgpu_gfx void @external_void_func_i8_signext(i8 signext %var)
706  ret void
707}
708
709define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 {
710; GFX9-LABEL: test_call_external_void_func_i8_zeroext:
711; GFX9:       ; %bb.0:
712; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
713; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
714; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
715; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
716; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off glc
717; GFX9-NEXT:    s_waitcnt vmcnt(0)
718; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
719; GFX9-NEXT:    s_mov_b32 s33, s32
720; GFX9-NEXT:    s_addk_i32 s32, 0x400
721; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
722; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
723; GFX9-NEXT:    s_getpc_b64 s[34:35]
724; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_i8_zeroext@rel32@lo+4
725; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_i8_zeroext@rel32@hi+12
726; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
727; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
728; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
729; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
730; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
731; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
732; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
733; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
734; GFX9-NEXT:    s_waitcnt vmcnt(0)
735; GFX9-NEXT:    s_setpc_b64 s[30:31]
736;
737; GFX10-LABEL: test_call_external_void_func_i8_zeroext:
738; GFX10:       ; %bb.0:
739; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
740; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
741; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
742; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
743; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
744; GFX10-NEXT:    s_mov_b32 exec_lo, s34
745; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
746; GFX10-NEXT:    s_waitcnt vmcnt(0)
747; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
748; GFX10-NEXT:    s_mov_b32 s33, s32
749; GFX10-NEXT:    s_addk_i32 s32, 0x200
750; GFX10-NEXT:    s_getpc_b64 s[34:35]
751; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_i8_zeroext@rel32@lo+4
752; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_i8_zeroext@rel32@hi+12
753; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
754; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
755; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
756; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
757; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
758; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
759; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
760; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
761; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
762; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
763; GFX10-NEXT:    s_mov_b32 exec_lo, s34
764; GFX10-NEXT:    s_waitcnt vmcnt(0)
765; GFX10-NEXT:    s_setpc_b64 s[30:31]
766;
767; GFX11-LABEL: test_call_external_void_func_i8_zeroext:
768; GFX11:       ; %bb.0:
769; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
770; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
771; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
772; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
773; GFX11-NEXT:    s_mov_b32 exec_lo, s0
774; GFX11-NEXT:    global_load_u8 v0, v[0:1], off glc dlc
775; GFX11-NEXT:    s_waitcnt vmcnt(0)
776; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
777; GFX11-NEXT:    s_mov_b32 s33, s32
778; GFX11-NEXT:    s_add_i32 s32, s32, 16
779; GFX11-NEXT:    s_getpc_b64 s[0:1]
780; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_i8_zeroext@rel32@lo+4
781; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_i8_zeroext@rel32@hi+12
782; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
783; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
784; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
785; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
786; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
787; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
788; GFX11-NEXT:    s_add_i32 s32, s32, -16
789; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
790; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
791; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
792; GFX11-NEXT:    s_mov_b32 exec_lo, s0
793; GFX11-NEXT:    s_waitcnt vmcnt(0)
794; GFX11-NEXT:    s_setpc_b64 s[30:31]
795;
796; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_zeroext:
797; GFX10-SCRATCH:       ; %bb.0:
798; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
799; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
800; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
801; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
802; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
803; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
804; GFX10-SCRATCH-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
805; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
806; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
807; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
808; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
809; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
810; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_i8_zeroext@rel32@lo+4
811; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_i8_zeroext@rel32@hi+12
812; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
813; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
814; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
815; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
816; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
817; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
818; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
819; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
820; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
821; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
822; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
823; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
824; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
825  %var = load volatile i8, i8 addrspace(1)* undef
826  call amdgpu_gfx void @external_void_func_i8_zeroext(i8 zeroext %var)
827  ret void
828}
829
830define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 {
831; GFX9-LABEL: test_call_external_void_func_i16_imm:
832; GFX9:       ; %bb.0:
833; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
834; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
835; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
836; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
837; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
838; GFX9-NEXT:    s_mov_b32 s33, s32
839; GFX9-NEXT:    s_addk_i32 s32, 0x400
840; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
841; GFX9-NEXT:    v_mov_b32_e32 v0, 0x7b
842; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
843; GFX9-NEXT:    s_getpc_b64 s[34:35]
844; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_i16@rel32@lo+4
845; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_i16@rel32@hi+12
846; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
847; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
848; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
849; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
850; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
851; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
852; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
853; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
854; GFX9-NEXT:    s_waitcnt vmcnt(0)
855; GFX9-NEXT:    s_setpc_b64 s[30:31]
856;
857; GFX10-LABEL: test_call_external_void_func_i16_imm:
858; GFX10:       ; %bb.0:
859; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
860; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
861; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
862; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
863; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
864; GFX10-NEXT:    s_mov_b32 exec_lo, s34
865; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
866; GFX10-NEXT:    v_mov_b32_e32 v0, 0x7b
867; GFX10-NEXT:    s_mov_b32 s33, s32
868; GFX10-NEXT:    s_addk_i32 s32, 0x200
869; GFX10-NEXT:    s_getpc_b64 s[34:35]
870; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_i16@rel32@lo+4
871; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_i16@rel32@hi+12
872; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
873; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
874; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
875; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
876; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
877; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
878; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
879; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
880; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
881; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
882; GFX10-NEXT:    s_mov_b32 exec_lo, s34
883; GFX10-NEXT:    s_waitcnt vmcnt(0)
884; GFX10-NEXT:    s_setpc_b64 s[30:31]
885;
886; GFX11-LABEL: test_call_external_void_func_i16_imm:
887; GFX11:       ; %bb.0:
888; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
889; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
890; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
891; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
892; GFX11-NEXT:    s_mov_b32 exec_lo, s0
893; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
894; GFX11-NEXT:    v_mov_b32_e32 v0, 0x7b
895; GFX11-NEXT:    s_mov_b32 s33, s32
896; GFX11-NEXT:    s_add_i32 s32, s32, 16
897; GFX11-NEXT:    s_getpc_b64 s[0:1]
898; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_i16@rel32@lo+4
899; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_i16@rel32@hi+12
900; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
901; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
902; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
903; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
904; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
905; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
906; GFX11-NEXT:    s_add_i32 s32, s32, -16
907; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
908; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
909; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
910; GFX11-NEXT:    s_mov_b32 exec_lo, s0
911; GFX11-NEXT:    s_waitcnt vmcnt(0)
912; GFX11-NEXT:    s_setpc_b64 s[30:31]
913;
914; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_imm:
915; GFX10-SCRATCH:       ; %bb.0:
916; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
917; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
918; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
919; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
920; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
921; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
922; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
923; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 0x7b
924; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
925; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
926; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
927; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_i16@rel32@lo+4
928; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_i16@rel32@hi+12
929; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
930; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
931; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
932; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
933; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
934; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
935; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
936; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
937; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
938; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
939; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
940; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
941; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
942  call amdgpu_gfx void @external_void_func_i16(i16 123)
943  ret void
944}
945
946define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 {
947; GFX9-LABEL: test_call_external_void_func_i16_signext:
948; GFX9:       ; %bb.0:
949; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
950; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
951; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
952; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
953; GFX9-NEXT:    global_load_ushort v0, v[0:1], off glc
954; GFX9-NEXT:    s_waitcnt vmcnt(0)
955; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
956; GFX9-NEXT:    s_mov_b32 s33, s32
957; GFX9-NEXT:    s_addk_i32 s32, 0x400
958; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
959; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
960; GFX9-NEXT:    s_getpc_b64 s[34:35]
961; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_i16_signext@rel32@lo+4
962; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_i16_signext@rel32@hi+12
963; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
964; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
965; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
966; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
967; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
968; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
969; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
970; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
971; GFX9-NEXT:    s_waitcnt vmcnt(0)
972; GFX9-NEXT:    s_setpc_b64 s[30:31]
973;
974; GFX10-LABEL: test_call_external_void_func_i16_signext:
975; GFX10:       ; %bb.0:
976; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
977; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
978; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
979; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
980; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
981; GFX10-NEXT:    s_mov_b32 exec_lo, s34
982; GFX10-NEXT:    global_load_ushort v0, v[0:1], off glc dlc
983; GFX10-NEXT:    s_waitcnt vmcnt(0)
984; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
985; GFX10-NEXT:    s_mov_b32 s33, s32
986; GFX10-NEXT:    s_addk_i32 s32, 0x200
987; GFX10-NEXT:    s_getpc_b64 s[34:35]
988; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_i16_signext@rel32@lo+4
989; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_i16_signext@rel32@hi+12
990; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
991; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
992; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
993; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
994; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
995; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
996; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
997; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
998; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
999; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
1000; GFX10-NEXT:    s_mov_b32 exec_lo, s34
1001; GFX10-NEXT:    s_waitcnt vmcnt(0)
1002; GFX10-NEXT:    s_setpc_b64 s[30:31]
1003;
1004; GFX11-LABEL: test_call_external_void_func_i16_signext:
1005; GFX11:       ; %bb.0:
1006; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1007; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1008; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
1009; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
1010; GFX11-NEXT:    s_mov_b32 exec_lo, s0
1011; GFX11-NEXT:    global_load_u16 v0, v[0:1], off glc dlc
1012; GFX11-NEXT:    s_waitcnt vmcnt(0)
1013; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
1014; GFX11-NEXT:    s_mov_b32 s33, s32
1015; GFX11-NEXT:    s_add_i32 s32, s32, 16
1016; GFX11-NEXT:    s_getpc_b64 s[0:1]
1017; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_i16_signext@rel32@lo+4
1018; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_i16_signext@rel32@hi+12
1019; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
1020; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
1021; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
1022; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1023; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
1024; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
1025; GFX11-NEXT:    s_add_i32 s32, s32, -16
1026; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
1027; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
1028; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
1029; GFX11-NEXT:    s_mov_b32 exec_lo, s0
1030; GFX11-NEXT:    s_waitcnt vmcnt(0)
1031; GFX11-NEXT:    s_setpc_b64 s[30:31]
1032;
1033; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_signext:
1034; GFX10-SCRATCH:       ; %bb.0:
1035; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1036; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
1037; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
1038; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
1039; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
1040; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
1041; GFX10-SCRATCH-NEXT:    global_load_ushort v0, v[0:1], off glc dlc
1042; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
1043; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
1044; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
1045; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
1046; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
1047; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_i16_signext@rel32@lo+4
1048; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_i16_signext@rel32@hi+12
1049; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
1050; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
1051; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
1052; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
1053; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
1054; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
1055; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
1056; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
1057; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
1058; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
1059; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
1060; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
1061; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
1062  %var = load volatile i16, i16 addrspace(1)* undef
1063  call amdgpu_gfx void @external_void_func_i16_signext(i16 signext %var)
1064  ret void
1065}
1066
1067define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 {
1068; GFX9-LABEL: test_call_external_void_func_i16_zeroext:
1069; GFX9:       ; %bb.0:
1070; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1071; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
1072; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1073; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
1074; GFX9-NEXT:    global_load_ushort v0, v[0:1], off glc
1075; GFX9-NEXT:    s_waitcnt vmcnt(0)
1076; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
1077; GFX9-NEXT:    s_mov_b32 s33, s32
1078; GFX9-NEXT:    s_addk_i32 s32, 0x400
1079; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
1080; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
1081; GFX9-NEXT:    s_getpc_b64 s[34:35]
1082; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_i16_zeroext@rel32@lo+4
1083; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_i16_zeroext@rel32@hi+12
1084; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
1085; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
1086; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
1087; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
1088; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
1089; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
1090; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1091; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
1092; GFX9-NEXT:    s_waitcnt vmcnt(0)
1093; GFX9-NEXT:    s_setpc_b64 s[30:31]
1094;
1095; GFX10-LABEL: test_call_external_void_func_i16_zeroext:
1096; GFX10:       ; %bb.0:
1097; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1098; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1099; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
1100; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1101; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
1102; GFX10-NEXT:    s_mov_b32 exec_lo, s34
1103; GFX10-NEXT:    global_load_ushort v0, v[0:1], off glc dlc
1104; GFX10-NEXT:    s_waitcnt vmcnt(0)
1105; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
1106; GFX10-NEXT:    s_mov_b32 s33, s32
1107; GFX10-NEXT:    s_addk_i32 s32, 0x200
1108; GFX10-NEXT:    s_getpc_b64 s[34:35]
1109; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_i16_zeroext@rel32@lo+4
1110; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_i16_zeroext@rel32@hi+12
1111; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
1112; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
1113; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
1114; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
1115; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
1116; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
1117; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
1118; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
1119; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1120; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
1121; GFX10-NEXT:    s_mov_b32 exec_lo, s34
1122; GFX10-NEXT:    s_waitcnt vmcnt(0)
1123; GFX10-NEXT:    s_setpc_b64 s[30:31]
1124;
1125; GFX11-LABEL: test_call_external_void_func_i16_zeroext:
1126; GFX11:       ; %bb.0:
1127; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1128; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1129; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
1130; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
1131; GFX11-NEXT:    s_mov_b32 exec_lo, s0
1132; GFX11-NEXT:    global_load_u16 v0, v[0:1], off glc dlc
1133; GFX11-NEXT:    s_waitcnt vmcnt(0)
1134; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
1135; GFX11-NEXT:    s_mov_b32 s33, s32
1136; GFX11-NEXT:    s_add_i32 s32, s32, 16
1137; GFX11-NEXT:    s_getpc_b64 s[0:1]
1138; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_i16_zeroext@rel32@lo+4
1139; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_i16_zeroext@rel32@hi+12
1140; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
1141; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
1142; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
1143; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1144; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
1145; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
1146; GFX11-NEXT:    s_add_i32 s32, s32, -16
1147; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
1148; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
1149; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
1150; GFX11-NEXT:    s_mov_b32 exec_lo, s0
1151; GFX11-NEXT:    s_waitcnt vmcnt(0)
1152; GFX11-NEXT:    s_setpc_b64 s[30:31]
1153;
1154; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_zeroext:
1155; GFX10-SCRATCH:       ; %bb.0:
1156; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1157; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
1158; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
1159; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
1160; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
1161; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
1162; GFX10-SCRATCH-NEXT:    global_load_ushort v0, v[0:1], off glc dlc
1163; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
1164; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
1165; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
1166; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
1167; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
1168; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_i16_zeroext@rel32@lo+4
1169; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_i16_zeroext@rel32@hi+12
1170; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
1171; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
1172; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
1173; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
1174; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
1175; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
1176; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
1177; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
1178; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
1179; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
1180; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
1181; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
1182; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
1183  %var = load volatile i16, i16 addrspace(1)* undef
1184  call amdgpu_gfx void @external_void_func_i16_zeroext(i16 zeroext %var)
1185  ret void
1186}
1187
1188define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 {
1189; GFX9-LABEL: test_call_external_void_func_i32_imm:
1190; GFX9:       ; %bb.0:
1191; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1192; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
1193; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1194; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
1195; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
1196; GFX9-NEXT:    s_mov_b32 s33, s32
1197; GFX9-NEXT:    s_addk_i32 s32, 0x400
1198; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
1199; GFX9-NEXT:    v_mov_b32_e32 v0, 42
1200; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
1201; GFX9-NEXT:    s_getpc_b64 s[34:35]
1202; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_i32@rel32@lo+4
1203; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_i32@rel32@hi+12
1204; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
1205; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
1206; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
1207; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
1208; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
1209; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
1210; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1211; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
1212; GFX9-NEXT:    s_waitcnt vmcnt(0)
1213; GFX9-NEXT:    s_setpc_b64 s[30:31]
1214;
1215; GFX10-LABEL: test_call_external_void_func_i32_imm:
1216; GFX10:       ; %bb.0:
1217; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1218; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1219; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
1220; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1221; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
1222; GFX10-NEXT:    s_mov_b32 exec_lo, s34
1223; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
1224; GFX10-NEXT:    v_mov_b32_e32 v0, 42
1225; GFX10-NEXT:    s_mov_b32 s33, s32
1226; GFX10-NEXT:    s_addk_i32 s32, 0x200
1227; GFX10-NEXT:    s_getpc_b64 s[34:35]
1228; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_i32@rel32@lo+4
1229; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_i32@rel32@hi+12
1230; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
1231; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
1232; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
1233; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
1234; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
1235; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
1236; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
1237; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
1238; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1239; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
1240; GFX10-NEXT:    s_mov_b32 exec_lo, s34
1241; GFX10-NEXT:    s_waitcnt vmcnt(0)
1242; GFX10-NEXT:    s_setpc_b64 s[30:31]
1243;
1244; GFX11-LABEL: test_call_external_void_func_i32_imm:
1245; GFX11:       ; %bb.0:
1246; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1247; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1248; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
1249; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
1250; GFX11-NEXT:    s_mov_b32 exec_lo, s0
1251; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
1252; GFX11-NEXT:    v_mov_b32_e32 v0, 42
1253; GFX11-NEXT:    s_mov_b32 s33, s32
1254; GFX11-NEXT:    s_add_i32 s32, s32, 16
1255; GFX11-NEXT:    s_getpc_b64 s[0:1]
1256; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_i32@rel32@lo+4
1257; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_i32@rel32@hi+12
1258; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
1259; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
1260; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
1261; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1262; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
1263; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
1264; GFX11-NEXT:    s_add_i32 s32, s32, -16
1265; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
1266; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
1267; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
1268; GFX11-NEXT:    s_mov_b32 exec_lo, s0
1269; GFX11-NEXT:    s_waitcnt vmcnt(0)
1270; GFX11-NEXT:    s_setpc_b64 s[30:31]
1271;
1272; GFX10-SCRATCH-LABEL: test_call_external_void_func_i32_imm:
1273; GFX10-SCRATCH:       ; %bb.0:
1274; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1275; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
1276; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
1277; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
1278; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
1279; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
1280; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
1281; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 42
1282; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
1283; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
1284; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
1285; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_i32@rel32@lo+4
1286; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_i32@rel32@hi+12
1287; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
1288; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
1289; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
1290; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
1291; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
1292; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
1293; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
1294; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
1295; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
1296; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
1297; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
1298; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
1299; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
1300  call amdgpu_gfx void @external_void_func_i32(i32 42)
1301  ret void
1302}
1303
1304define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 {
1305; GFX9-LABEL: test_call_external_void_func_i64_imm:
1306; GFX9:       ; %bb.0:
1307; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1308; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
1309; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1310; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
1311; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
1312; GFX9-NEXT:    s_mov_b32 s33, s32
1313; GFX9-NEXT:    s_addk_i32 s32, 0x400
1314; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
1315; GFX9-NEXT:    v_mov_b32_e32 v0, 0x7b
1316; GFX9-NEXT:    v_mov_b32_e32 v1, 0
1317; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
1318; GFX9-NEXT:    s_getpc_b64 s[34:35]
1319; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_i64@rel32@lo+4
1320; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_i64@rel32@hi+12
1321; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
1322; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
1323; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
1324; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
1325; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
1326; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
1327; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1328; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
1329; GFX9-NEXT:    s_waitcnt vmcnt(0)
1330; GFX9-NEXT:    s_setpc_b64 s[30:31]
1331;
1332; GFX10-LABEL: test_call_external_void_func_i64_imm:
1333; GFX10:       ; %bb.0:
1334; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1335; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1336; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
1337; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1338; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
1339; GFX10-NEXT:    s_mov_b32 exec_lo, s34
1340; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
1341; GFX10-NEXT:    v_mov_b32_e32 v0, 0x7b
1342; GFX10-NEXT:    v_mov_b32_e32 v1, 0
1343; GFX10-NEXT:    s_mov_b32 s33, s32
1344; GFX10-NEXT:    s_addk_i32 s32, 0x200
1345; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
1346; GFX10-NEXT:    s_getpc_b64 s[34:35]
1347; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_i64@rel32@lo+4
1348; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_i64@rel32@hi+12
1349; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
1350; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
1351; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
1352; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
1353; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
1354; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
1355; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
1356; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1357; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
1358; GFX10-NEXT:    s_mov_b32 exec_lo, s34
1359; GFX10-NEXT:    s_waitcnt vmcnt(0)
1360; GFX10-NEXT:    s_setpc_b64 s[30:31]
1361;
1362; GFX11-LABEL: test_call_external_void_func_i64_imm:
1363; GFX11:       ; %bb.0:
1364; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1365; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1366; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
1367; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
1368; GFX11-NEXT:    s_mov_b32 exec_lo, s0
1369; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
1370; GFX11-NEXT:    v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0
1371; GFX11-NEXT:    s_mov_b32 s33, s32
1372; GFX11-NEXT:    s_add_i32 s32, s32, 16
1373; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
1374; GFX11-NEXT:    s_getpc_b64 s[0:1]
1375; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_i64@rel32@lo+4
1376; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_i64@rel32@hi+12
1377; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
1378; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
1379; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1380; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
1381; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
1382; GFX11-NEXT:    s_add_i32 s32, s32, -16
1383; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
1384; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
1385; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
1386; GFX11-NEXT:    s_mov_b32 exec_lo, s0
1387; GFX11-NEXT:    s_waitcnt vmcnt(0)
1388; GFX11-NEXT:    s_setpc_b64 s[30:31]
1389;
1390; GFX10-SCRATCH-LABEL: test_call_external_void_func_i64_imm:
1391; GFX10-SCRATCH:       ; %bb.0:
1392; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1393; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
1394; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
1395; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
1396; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
1397; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
1398; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
1399; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 0x7b
1400; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, 0
1401; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
1402; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
1403; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
1404; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
1405; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_i64@rel32@lo+4
1406; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_i64@rel32@hi+12
1407; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
1408; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
1409; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
1410; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
1411; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
1412; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
1413; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
1414; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
1415; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
1416; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
1417; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
1418; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
1419  call amdgpu_gfx void @external_void_func_i64(i64 123)
1420  ret void
1421}
1422
1423define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 {
1424; GFX9-LABEL: test_call_external_void_func_v2i64:
1425; GFX9:       ; %bb.0:
1426; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1427; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
1428; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1429; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
1430; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1431; GFX9-NEXT:    v_mov_b32_e32 v1, 0
1432; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
1433; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
1434; GFX9-NEXT:    s_mov_b32 s33, s32
1435; GFX9-NEXT:    s_addk_i32 s32, 0x400
1436; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
1437; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
1438; GFX9-NEXT:    s_getpc_b64 s[34:35]
1439; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4
1440; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12
1441; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
1442; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
1443; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
1444; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
1445; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
1446; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
1447; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1448; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
1449; GFX9-NEXT:    s_waitcnt vmcnt(0)
1450; GFX9-NEXT:    s_setpc_b64 s[30:31]
1451;
1452; GFX10-LABEL: test_call_external_void_func_v2i64:
1453; GFX10:       ; %bb.0:
1454; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1455; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1456; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
1457; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1458; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
1459; GFX10-NEXT:    s_mov_b32 exec_lo, s34
1460; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1461; GFX10-NEXT:    v_mov_b32_e32 v1, 0
1462; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
1463; GFX10-NEXT:    s_mov_b32 s33, s32
1464; GFX10-NEXT:    s_addk_i32 s32, 0x200
1465; GFX10-NEXT:    s_getpc_b64 s[34:35]
1466; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4
1467; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12
1468; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
1469; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
1470; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
1471; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
1472; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
1473; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
1474; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
1475; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
1476; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
1477; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1478; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
1479; GFX10-NEXT:    s_mov_b32 exec_lo, s34
1480; GFX10-NEXT:    s_waitcnt vmcnt(0)
1481; GFX10-NEXT:    s_setpc_b64 s[30:31]
1482;
1483; GFX11-LABEL: test_call_external_void_func_v2i64:
1484; GFX11:       ; %bb.0:
1485; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1486; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1487; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
1488; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
1489; GFX11-NEXT:    s_mov_b32 exec_lo, s0
1490; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1491; GFX11-NEXT:    v_mov_b32_e32 v1, 0
1492; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
1493; GFX11-NEXT:    s_mov_b32 s33, s32
1494; GFX11-NEXT:    s_add_i32 s32, s32, 16
1495; GFX11-NEXT:    s_getpc_b64 s[0:1]
1496; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v2i64@rel32@lo+4
1497; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v2i64@rel32@hi+12
1498; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off
1499; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
1500; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
1501; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
1502; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1503; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
1504; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
1505; GFX11-NEXT:    s_add_i32 s32, s32, -16
1506; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
1507; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
1508; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
1509; GFX11-NEXT:    s_mov_b32 exec_lo, s0
1510; GFX11-NEXT:    s_waitcnt vmcnt(0)
1511; GFX11-NEXT:    s_setpc_b64 s[30:31]
1512;
1513; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64:
1514; GFX10-SCRATCH:       ; %bb.0:
1515; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1516; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
1517; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
1518; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
1519; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
1520; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
1521; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 0
1522; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, 0
1523; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
1524; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
1525; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
1526; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
1527; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v2i64@rel32@lo+4
1528; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v2i64@rel32@hi+12
1529; GFX10-SCRATCH-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
1530; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
1531; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
1532; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
1533; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
1534; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
1535; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
1536; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
1537; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
1538; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
1539; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
1540; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
1541; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
1542; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
1543  %val = load <2 x i64>, <2 x i64> addrspace(1)* null
1544  call amdgpu_gfx void @external_void_func_v2i64(<2 x i64> %val)
1545  ret void
1546}
1547
1548define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 {
1549; GFX9-LABEL: test_call_external_void_func_v2i64_imm:
1550; GFX9:       ; %bb.0:
1551; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1552; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
1553; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1554; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
1555; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
1556; GFX9-NEXT:    s_mov_b32 s33, s32
1557; GFX9-NEXT:    s_addk_i32 s32, 0x400
1558; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
1559; GFX9-NEXT:    v_mov_b32_e32 v0, 1
1560; GFX9-NEXT:    v_mov_b32_e32 v1, 2
1561; GFX9-NEXT:    v_mov_b32_e32 v2, 3
1562; GFX9-NEXT:    v_mov_b32_e32 v3, 4
1563; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
1564; GFX9-NEXT:    s_getpc_b64 s[34:35]
1565; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4
1566; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12
1567; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
1568; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
1569; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
1570; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
1571; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
1572; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
1573; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1574; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
1575; GFX9-NEXT:    s_waitcnt vmcnt(0)
1576; GFX9-NEXT:    s_setpc_b64 s[30:31]
1577;
1578; GFX10-LABEL: test_call_external_void_func_v2i64_imm:
1579; GFX10:       ; %bb.0:
1580; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1581; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1582; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
1583; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1584; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
1585; GFX10-NEXT:    s_mov_b32 exec_lo, s34
1586; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
1587; GFX10-NEXT:    v_mov_b32_e32 v0, 1
1588; GFX10-NEXT:    v_mov_b32_e32 v1, 2
1589; GFX10-NEXT:    v_mov_b32_e32 v2, 3
1590; GFX10-NEXT:    v_mov_b32_e32 v3, 4
1591; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
1592; GFX10-NEXT:    s_mov_b32 s33, s32
1593; GFX10-NEXT:    s_addk_i32 s32, 0x200
1594; GFX10-NEXT:    s_getpc_b64 s[34:35]
1595; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4
1596; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12
1597; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
1598; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
1599; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
1600; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
1601; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
1602; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
1603; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
1604; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1605; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
1606; GFX10-NEXT:    s_mov_b32 exec_lo, s34
1607; GFX10-NEXT:    s_waitcnt vmcnt(0)
1608; GFX10-NEXT:    s_setpc_b64 s[30:31]
1609;
1610; GFX11-LABEL: test_call_external_void_func_v2i64_imm:
1611; GFX11:       ; %bb.0:
1612; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1613; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1614; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
1615; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
1616; GFX11-NEXT:    s_mov_b32 exec_lo, s0
1617; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
1618; GFX11-NEXT:    v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
1619; GFX11-NEXT:    v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
1620; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
1621; GFX11-NEXT:    s_mov_b32 s33, s32
1622; GFX11-NEXT:    s_add_i32 s32, s32, 16
1623; GFX11-NEXT:    s_getpc_b64 s[0:1]
1624; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v2i64@rel32@lo+4
1625; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v2i64@rel32@hi+12
1626; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
1627; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
1628; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1629; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
1630; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
1631; GFX11-NEXT:    s_add_i32 s32, s32, -16
1632; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
1633; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
1634; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
1635; GFX11-NEXT:    s_mov_b32 exec_lo, s0
1636; GFX11-NEXT:    s_waitcnt vmcnt(0)
1637; GFX11-NEXT:    s_setpc_b64 s[30:31]
1638;
1639; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64_imm:
1640; GFX10-SCRATCH:       ; %bb.0:
1641; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1642; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
1643; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
1644; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
1645; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
1646; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
1647; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
1648; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 1
1649; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, 2
1650; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v2, 3
1651; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v3, 4
1652; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
1653; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
1654; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
1655; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
1656; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v2i64@rel32@lo+4
1657; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v2i64@rel32@hi+12
1658; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
1659; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
1660; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
1661; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
1662; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
1663; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
1664; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
1665; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
1666; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
1667; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
1668; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
1669; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
1670  call amdgpu_gfx void @external_void_func_v2i64(<2 x i64> <i64 8589934593, i64 17179869187>)
1671  ret void
1672}
1673
1674define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 {
1675; GFX9-LABEL: test_call_external_void_func_v3i64:
1676; GFX9:       ; %bb.0:
1677; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1678; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
1679; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1680; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
1681; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1682; GFX9-NEXT:    v_mov_b32_e32 v1, 0
1683; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
1684; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
1685; GFX9-NEXT:    s_mov_b32 s33, s32
1686; GFX9-NEXT:    s_addk_i32 s32, 0x400
1687; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
1688; GFX9-NEXT:    v_mov_b32_e32 v4, 1
1689; GFX9-NEXT:    v_mov_b32_e32 v5, 2
1690; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
1691; GFX9-NEXT:    s_getpc_b64 s[34:35]
1692; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v3i64@rel32@lo+4
1693; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v3i64@rel32@hi+12
1694; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
1695; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
1696; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
1697; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
1698; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
1699; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
1700; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1701; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
1702; GFX9-NEXT:    s_waitcnt vmcnt(0)
1703; GFX9-NEXT:    s_setpc_b64 s[30:31]
1704;
1705; GFX10-LABEL: test_call_external_void_func_v3i64:
1706; GFX10:       ; %bb.0:
1707; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1708; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1709; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
1710; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1711; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
1712; GFX10-NEXT:    s_mov_b32 exec_lo, s34
1713; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1714; GFX10-NEXT:    v_mov_b32_e32 v1, 0
1715; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
1716; GFX10-NEXT:    v_mov_b32_e32 v4, 1
1717; GFX10-NEXT:    v_mov_b32_e32 v5, 2
1718; GFX10-NEXT:    s_mov_b32 s33, s32
1719; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
1720; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
1721; GFX10-NEXT:    s_addk_i32 s32, 0x200
1722; GFX10-NEXT:    s_getpc_b64 s[34:35]
1723; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v3i64@rel32@lo+4
1724; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v3i64@rel32@hi+12
1725; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
1726; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
1727; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
1728; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
1729; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
1730; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
1731; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
1732; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1733; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
1734; GFX10-NEXT:    s_mov_b32 exec_lo, s34
1735; GFX10-NEXT:    s_waitcnt vmcnt(0)
1736; GFX10-NEXT:    s_setpc_b64 s[30:31]
1737;
1738; GFX11-LABEL: test_call_external_void_func_v3i64:
1739; GFX11:       ; %bb.0:
1740; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1741; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1742; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
1743; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
1744; GFX11-NEXT:    s_mov_b32 exec_lo, s0
1745; GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v5, 2
1746; GFX11-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 1
1747; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
1748; GFX11-NEXT:    s_mov_b32 s33, s32
1749; GFX11-NEXT:    s_add_i32 s32, s32, 16
1750; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off
1751; GFX11-NEXT:    s_getpc_b64 s[0:1]
1752; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v3i64@rel32@lo+4
1753; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v3i64@rel32@hi+12
1754; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
1755; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
1756; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
1757; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1758; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
1759; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
1760; GFX11-NEXT:    s_add_i32 s32, s32, -16
1761; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
1762; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
1763; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
1764; GFX11-NEXT:    s_mov_b32 exec_lo, s0
1765; GFX11-NEXT:    s_waitcnt vmcnt(0)
1766; GFX11-NEXT:    s_setpc_b64 s[30:31]
1767;
1768; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i64:
1769; GFX10-SCRATCH:       ; %bb.0:
1770; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1771; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
1772; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
1773; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
1774; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
1775; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
1776; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 0
1777; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, 0
1778; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
1779; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v4, 1
1780; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v5, 2
1781; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
1782; GFX10-SCRATCH-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
1783; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
1784; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
1785; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
1786; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v3i64@rel32@lo+4
1787; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v3i64@rel32@hi+12
1788; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
1789; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
1790; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
1791; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
1792; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
1793; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
1794; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
1795; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
1796; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
1797; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
1798; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
1799; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
1800  %load = load <2 x i64>, <2 x i64> addrspace(1)* null
1801  %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
1802
1803  call amdgpu_gfx void @external_void_func_v3i64(<3 x i64> %val)
1804  ret void
1805}
1806
1807define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 {
1808; GFX9-LABEL: test_call_external_void_func_v4i64:
1809; GFX9:       ; %bb.0:
1810; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1811; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
1812; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1813; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
1814; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1815; GFX9-NEXT:    v_mov_b32_e32 v1, 0
1816; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
1817; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
1818; GFX9-NEXT:    s_mov_b32 s33, s32
1819; GFX9-NEXT:    s_addk_i32 s32, 0x400
1820; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
1821; GFX9-NEXT:    v_mov_b32_e32 v4, 1
1822; GFX9-NEXT:    v_mov_b32_e32 v5, 2
1823; GFX9-NEXT:    v_mov_b32_e32 v6, 3
1824; GFX9-NEXT:    v_mov_b32_e32 v7, 4
1825; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
1826; GFX9-NEXT:    s_getpc_b64 s[34:35]
1827; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v4i64@rel32@lo+4
1828; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v4i64@rel32@hi+12
1829; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
1830; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
1831; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
1832; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
1833; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
1834; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
1835; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1836; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
1837; GFX9-NEXT:    s_waitcnt vmcnt(0)
1838; GFX9-NEXT:    s_setpc_b64 s[30:31]
1839;
1840; GFX10-LABEL: test_call_external_void_func_v4i64:
1841; GFX10:       ; %bb.0:
1842; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1843; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1844; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
1845; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1846; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
1847; GFX10-NEXT:    s_mov_b32 exec_lo, s34
1848; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1849; GFX10-NEXT:    v_mov_b32_e32 v1, 0
1850; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
1851; GFX10-NEXT:    v_mov_b32_e32 v4, 1
1852; GFX10-NEXT:    v_mov_b32_e32 v5, 2
1853; GFX10-NEXT:    v_mov_b32_e32 v6, 3
1854; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
1855; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
1856; GFX10-NEXT:    v_mov_b32_e32 v7, 4
1857; GFX10-NEXT:    s_mov_b32 s33, s32
1858; GFX10-NEXT:    s_addk_i32 s32, 0x200
1859; GFX10-NEXT:    s_getpc_b64 s[34:35]
1860; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v4i64@rel32@lo+4
1861; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v4i64@rel32@hi+12
1862; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
1863; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
1864; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
1865; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
1866; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
1867; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
1868; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
1869; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1870; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
1871; GFX10-NEXT:    s_mov_b32 exec_lo, s34
1872; GFX10-NEXT:    s_waitcnt vmcnt(0)
1873; GFX10-NEXT:    s_setpc_b64 s[30:31]
1874;
1875; GFX11-LABEL: test_call_external_void_func_v4i64:
1876; GFX11:       ; %bb.0:
1877; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1878; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1879; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
1880; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
1881; GFX11-NEXT:    s_mov_b32 exec_lo, s0
1882; GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v5, 2
1883; GFX11-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 1
1884; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
1885; GFX11-NEXT:    v_dual_mov_b32 v6, 3 :: v_dual_mov_b32 v7, 4
1886; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off
1887; GFX11-NEXT:    s_mov_b32 s33, s32
1888; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
1889; GFX11-NEXT:    s_add_i32 s32, s32, 16
1890; GFX11-NEXT:    s_getpc_b64 s[0:1]
1891; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v4i64@rel32@lo+4
1892; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v4i64@rel32@hi+12
1893; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
1894; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
1895; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1896; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
1897; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
1898; GFX11-NEXT:    s_add_i32 s32, s32, -16
1899; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
1900; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
1901; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
1902; GFX11-NEXT:    s_mov_b32 exec_lo, s0
1903; GFX11-NEXT:    s_waitcnt vmcnt(0)
1904; GFX11-NEXT:    s_setpc_b64 s[30:31]
1905;
1906; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i64:
1907; GFX10-SCRATCH:       ; %bb.0:
1908; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1909; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
1910; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
1911; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
1912; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
1913; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
1914; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 0
1915; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, 0
1916; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
1917; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v4, 1
1918; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v5, 2
1919; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v6, 3
1920; GFX10-SCRATCH-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
1921; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
1922; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v7, 4
1923; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
1924; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
1925; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
1926; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v4i64@rel32@lo+4
1927; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v4i64@rel32@hi+12
1928; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
1929; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
1930; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
1931; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
1932; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
1933; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
1934; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
1935; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
1936; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
1937; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
1938; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
1939; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
1940  %load = load <2 x i64>, <2 x i64> addrspace(1)* null
1941  %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 17179869187>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1942  call amdgpu_gfx void @external_void_func_v4i64(<4 x i64> %val)
1943  ret void
1944}
1945
1946define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 {
1947; GFX9-LABEL: test_call_external_void_func_f16_imm:
1948; GFX9:       ; %bb.0:
1949; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1950; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
1951; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1952; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
1953; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
1954; GFX9-NEXT:    s_mov_b32 s33, s32
1955; GFX9-NEXT:    s_addk_i32 s32, 0x400
1956; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
1957; GFX9-NEXT:    v_mov_b32_e32 v0, 0x4400
1958; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
1959; GFX9-NEXT:    s_getpc_b64 s[34:35]
1960; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_f16@rel32@lo+4
1961; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_f16@rel32@hi+12
1962; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
1963; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
1964; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
1965; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
1966; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
1967; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
1968; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1969; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
1970; GFX9-NEXT:    s_waitcnt vmcnt(0)
1971; GFX9-NEXT:    s_setpc_b64 s[30:31]
1972;
1973; GFX10-LABEL: test_call_external_void_func_f16_imm:
1974; GFX10:       ; %bb.0:
1975; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1976; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1977; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
1978; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1979; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
1980; GFX10-NEXT:    s_mov_b32 exec_lo, s34
1981; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
1982; GFX10-NEXT:    v_mov_b32_e32 v0, 0x4400
1983; GFX10-NEXT:    s_mov_b32 s33, s32
1984; GFX10-NEXT:    s_addk_i32 s32, 0x200
1985; GFX10-NEXT:    s_getpc_b64 s[34:35]
1986; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_f16@rel32@lo+4
1987; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_f16@rel32@hi+12
1988; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
1989; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
1990; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
1991; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
1992; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
1993; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
1994; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
1995; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
1996; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1997; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
1998; GFX10-NEXT:    s_mov_b32 exec_lo, s34
1999; GFX10-NEXT:    s_waitcnt vmcnt(0)
2000; GFX10-NEXT:    s_setpc_b64 s[30:31]
2001;
2002; GFX11-LABEL: test_call_external_void_func_f16_imm:
2003; GFX11:       ; %bb.0:
2004; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2005; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2006; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
2007; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
2008; GFX11-NEXT:    s_mov_b32 exec_lo, s0
2009; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
2010; GFX11-NEXT:    v_mov_b32_e32 v0, 0x4400
2011; GFX11-NEXT:    s_mov_b32 s33, s32
2012; GFX11-NEXT:    s_add_i32 s32, s32, 16
2013; GFX11-NEXT:    s_getpc_b64 s[0:1]
2014; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_f16@rel32@lo+4
2015; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_f16@rel32@hi+12
2016; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
2017; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
2018; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
2019; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2020; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
2021; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
2022; GFX11-NEXT:    s_add_i32 s32, s32, -16
2023; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
2024; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
2025; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
2026; GFX11-NEXT:    s_mov_b32 exec_lo, s0
2027; GFX11-NEXT:    s_waitcnt vmcnt(0)
2028; GFX11-NEXT:    s_setpc_b64 s[30:31]
2029;
2030; GFX10-SCRATCH-LABEL: test_call_external_void_func_f16_imm:
2031; GFX10-SCRATCH:       ; %bb.0:
2032; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2033; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
2034; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
2035; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
2036; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
2037; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
2038; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
2039; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 0x4400
2040; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
2041; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
2042; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
2043; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_f16@rel32@lo+4
2044; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_f16@rel32@hi+12
2045; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
2046; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
2047; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
2048; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
2049; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
2050; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
2051; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
2052; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
2053; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
2054; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
2055; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
2056; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
2057; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
2058  call amdgpu_gfx void @external_void_func_f16(half 4.0)
2059  ret void
2060}
2061
2062define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 {
2063; GFX9-LABEL: test_call_external_void_func_f32_imm:
2064; GFX9:       ; %bb.0:
2065; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2066; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
2067; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2068; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
2069; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
2070; GFX9-NEXT:    s_mov_b32 s33, s32
2071; GFX9-NEXT:    s_addk_i32 s32, 0x400
2072; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
2073; GFX9-NEXT:    v_mov_b32_e32 v0, 4.0
2074; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
2075; GFX9-NEXT:    s_getpc_b64 s[34:35]
2076; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_f32@rel32@lo+4
2077; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_f32@rel32@hi+12
2078; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
2079; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
2080; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
2081; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
2082; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
2083; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
2084; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2085; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
2086; GFX9-NEXT:    s_waitcnt vmcnt(0)
2087; GFX9-NEXT:    s_setpc_b64 s[30:31]
2088;
2089; GFX10-LABEL: test_call_external_void_func_f32_imm:
2090; GFX10:       ; %bb.0:
2091; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2092; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2093; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
2094; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2095; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
2096; GFX10-NEXT:    s_mov_b32 exec_lo, s34
2097; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
2098; GFX10-NEXT:    v_mov_b32_e32 v0, 4.0
2099; GFX10-NEXT:    s_mov_b32 s33, s32
2100; GFX10-NEXT:    s_addk_i32 s32, 0x200
2101; GFX10-NEXT:    s_getpc_b64 s[34:35]
2102; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_f32@rel32@lo+4
2103; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_f32@rel32@hi+12
2104; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
2105; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
2106; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
2107; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
2108; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
2109; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
2110; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
2111; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
2112; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2113; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
2114; GFX10-NEXT:    s_mov_b32 exec_lo, s34
2115; GFX10-NEXT:    s_waitcnt vmcnt(0)
2116; GFX10-NEXT:    s_setpc_b64 s[30:31]
2117;
2118; GFX11-LABEL: test_call_external_void_func_f32_imm:
2119; GFX11:       ; %bb.0:
2120; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2121; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2122; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
2123; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
2124; GFX11-NEXT:    s_mov_b32 exec_lo, s0
2125; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
2126; GFX11-NEXT:    v_mov_b32_e32 v0, 4.0
2127; GFX11-NEXT:    s_mov_b32 s33, s32
2128; GFX11-NEXT:    s_add_i32 s32, s32, 16
2129; GFX11-NEXT:    s_getpc_b64 s[0:1]
2130; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_f32@rel32@lo+4
2131; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_f32@rel32@hi+12
2132; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
2133; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
2134; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
2135; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2136; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
2137; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
2138; GFX11-NEXT:    s_add_i32 s32, s32, -16
2139; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
2140; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
2141; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
2142; GFX11-NEXT:    s_mov_b32 exec_lo, s0
2143; GFX11-NEXT:    s_waitcnt vmcnt(0)
2144; GFX11-NEXT:    s_setpc_b64 s[30:31]
2145;
2146; GFX10-SCRATCH-LABEL: test_call_external_void_func_f32_imm:
2147; GFX10-SCRATCH:       ; %bb.0:
2148; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2149; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
2150; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
2151; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
2152; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
2153; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
2154; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
2155; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 4.0
2156; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
2157; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
2158; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
2159; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_f32@rel32@lo+4
2160; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_f32@rel32@hi+12
2161; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
2162; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
2163; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
2164; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
2165; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
2166; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
2167; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
2168; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
2169; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
2170; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
2171; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
2172; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
2173; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
2174  call amdgpu_gfx void @external_void_func_f32(float 4.0)
2175  ret void
2176}
2177
2178define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 {
2179; GFX9-LABEL: test_call_external_void_func_v2f32_imm:
2180; GFX9:       ; %bb.0:
2181; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2182; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
2183; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2184; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
2185; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
2186; GFX9-NEXT:    s_mov_b32 s33, s32
2187; GFX9-NEXT:    s_addk_i32 s32, 0x400
2188; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
2189; GFX9-NEXT:    v_mov_b32_e32 v0, 1.0
2190; GFX9-NEXT:    v_mov_b32_e32 v1, 2.0
2191; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
2192; GFX9-NEXT:    s_getpc_b64 s[34:35]
2193; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v2f32@rel32@lo+4
2194; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v2f32@rel32@hi+12
2195; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
2196; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
2197; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
2198; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
2199; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
2200; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
2201; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2202; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
2203; GFX9-NEXT:    s_waitcnt vmcnt(0)
2204; GFX9-NEXT:    s_setpc_b64 s[30:31]
2205;
2206; GFX10-LABEL: test_call_external_void_func_v2f32_imm:
2207; GFX10:       ; %bb.0:
2208; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2209; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2210; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
2211; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2212; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
2213; GFX10-NEXT:    s_mov_b32 exec_lo, s34
2214; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
2215; GFX10-NEXT:    v_mov_b32_e32 v0, 1.0
2216; GFX10-NEXT:    v_mov_b32_e32 v1, 2.0
2217; GFX10-NEXT:    s_mov_b32 s33, s32
2218; GFX10-NEXT:    s_addk_i32 s32, 0x200
2219; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
2220; GFX10-NEXT:    s_getpc_b64 s[34:35]
2221; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v2f32@rel32@lo+4
2222; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v2f32@rel32@hi+12
2223; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
2224; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
2225; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
2226; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
2227; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
2228; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
2229; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
2230; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2231; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
2232; GFX10-NEXT:    s_mov_b32 exec_lo, s34
2233; GFX10-NEXT:    s_waitcnt vmcnt(0)
2234; GFX10-NEXT:    s_setpc_b64 s[30:31]
2235;
2236; GFX11-LABEL: test_call_external_void_func_v2f32_imm:
2237; GFX11:       ; %bb.0:
2238; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2239; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2240; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
2241; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
2242; GFX11-NEXT:    s_mov_b32 exec_lo, s0
2243; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
2244; GFX11-NEXT:    v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0
2245; GFX11-NEXT:    s_mov_b32 s33, s32
2246; GFX11-NEXT:    s_add_i32 s32, s32, 16
2247; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
2248; GFX11-NEXT:    s_getpc_b64 s[0:1]
2249; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v2f32@rel32@lo+4
2250; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v2f32@rel32@hi+12
2251; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
2252; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
2253; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2254; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
2255; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
2256; GFX11-NEXT:    s_add_i32 s32, s32, -16
2257; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
2258; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
2259; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
2260; GFX11-NEXT:    s_mov_b32 exec_lo, s0
2261; GFX11-NEXT:    s_waitcnt vmcnt(0)
2262; GFX11-NEXT:    s_setpc_b64 s[30:31]
2263;
2264; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f32_imm:
2265; GFX10-SCRATCH:       ; %bb.0:
2266; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2267; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
2268; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
2269; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
2270; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
2271; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
2272; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
2273; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 1.0
2274; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, 2.0
2275; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
2276; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
2277; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
2278; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
2279; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v2f32@rel32@lo+4
2280; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v2f32@rel32@hi+12
2281; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
2282; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
2283; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
2284; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
2285; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
2286; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
2287; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
2288; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
2289; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
2290; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
2291; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
2292; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
2293  call amdgpu_gfx void @external_void_func_v2f32(<2 x float> <float 1.0, float 2.0>)
2294  ret void
2295}
2296
2297define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 {
2298; GFX9-LABEL: test_call_external_void_func_v3f32_imm:
2299; GFX9:       ; %bb.0:
2300; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2301; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
2302; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2303; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
2304; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
2305; GFX9-NEXT:    s_mov_b32 s33, s32
2306; GFX9-NEXT:    s_addk_i32 s32, 0x400
2307; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
2308; GFX9-NEXT:    v_mov_b32_e32 v0, 1.0
2309; GFX9-NEXT:    v_mov_b32_e32 v1, 2.0
2310; GFX9-NEXT:    v_mov_b32_e32 v2, 4.0
2311; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
2312; GFX9-NEXT:    s_getpc_b64 s[34:35]
2313; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v3f32@rel32@lo+4
2314; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v3f32@rel32@hi+12
2315; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
2316; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
2317; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
2318; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
2319; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
2320; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
2321; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2322; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
2323; GFX9-NEXT:    s_waitcnt vmcnt(0)
2324; GFX9-NEXT:    s_setpc_b64 s[30:31]
2325;
2326; GFX10-LABEL: test_call_external_void_func_v3f32_imm:
2327; GFX10:       ; %bb.0:
2328; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2329; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2330; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
2331; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2332; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
2333; GFX10-NEXT:    s_mov_b32 exec_lo, s34
2334; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
2335; GFX10-NEXT:    v_mov_b32_e32 v0, 1.0
2336; GFX10-NEXT:    v_mov_b32_e32 v1, 2.0
2337; GFX10-NEXT:    v_mov_b32_e32 v2, 4.0
2338; GFX10-NEXT:    s_mov_b32 s33, s32
2339; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
2340; GFX10-NEXT:    s_addk_i32 s32, 0x200
2341; GFX10-NEXT:    s_getpc_b64 s[34:35]
2342; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v3f32@rel32@lo+4
2343; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v3f32@rel32@hi+12
2344; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
2345; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
2346; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
2347; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
2348; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
2349; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
2350; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
2351; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2352; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
2353; GFX10-NEXT:    s_mov_b32 exec_lo, s34
2354; GFX10-NEXT:    s_waitcnt vmcnt(0)
2355; GFX10-NEXT:    s_setpc_b64 s[30:31]
2356;
2357; GFX11-LABEL: test_call_external_void_func_v3f32_imm:
2358; GFX11:       ; %bb.0:
2359; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2360; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2361; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
2362; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
2363; GFX11-NEXT:    s_mov_b32 exec_lo, s0
2364; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
2365; GFX11-NEXT:    v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0
2366; GFX11-NEXT:    v_mov_b32_e32 v2, 4.0
2367; GFX11-NEXT:    s_mov_b32 s33, s32
2368; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
2369; GFX11-NEXT:    s_add_i32 s32, s32, 16
2370; GFX11-NEXT:    s_getpc_b64 s[0:1]
2371; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v3f32@rel32@lo+4
2372; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v3f32@rel32@hi+12
2373; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
2374; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
2375; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2376; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
2377; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
2378; GFX11-NEXT:    s_add_i32 s32, s32, -16
2379; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
2380; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
2381; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
2382; GFX11-NEXT:    s_mov_b32 exec_lo, s0
2383; GFX11-NEXT:    s_waitcnt vmcnt(0)
2384; GFX11-NEXT:    s_setpc_b64 s[30:31]
2385;
2386; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f32_imm:
2387; GFX10-SCRATCH:       ; %bb.0:
2388; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2389; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
2390; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
2391; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
2392; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
2393; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
2394; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
2395; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 1.0
2396; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, 2.0
2397; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v2, 4.0
2398; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
2399; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
2400; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
2401; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
2402; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v3f32@rel32@lo+4
2403; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v3f32@rel32@hi+12
2404; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
2405; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
2406; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
2407; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
2408; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
2409; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
2410; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
2411; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
2412; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
2413; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
2414; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
2415; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
2416  call amdgpu_gfx void @external_void_func_v3f32(<3 x float> <float 1.0, float 2.0, float 4.0>)
2417  ret void
2418}
2419
2420define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 {
2421; GFX9-LABEL: test_call_external_void_func_v5f32_imm:
2422; GFX9:       ; %bb.0:
2423; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2424; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
2425; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2426; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
2427; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
2428; GFX9-NEXT:    s_mov_b32 s33, s32
2429; GFX9-NEXT:    s_addk_i32 s32, 0x400
2430; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
2431; GFX9-NEXT:    v_mov_b32_e32 v0, 1.0
2432; GFX9-NEXT:    v_mov_b32_e32 v1, 2.0
2433; GFX9-NEXT:    v_mov_b32_e32 v2, 4.0
2434; GFX9-NEXT:    v_mov_b32_e32 v3, -1.0
2435; GFX9-NEXT:    v_mov_b32_e32 v4, 0.5
2436; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
2437; GFX9-NEXT:    s_getpc_b64 s[34:35]
2438; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v5f32@rel32@lo+4
2439; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v5f32@rel32@hi+12
2440; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
2441; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
2442; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
2443; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
2444; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
2445; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
2446; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2447; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
2448; GFX9-NEXT:    s_waitcnt vmcnt(0)
2449; GFX9-NEXT:    s_setpc_b64 s[30:31]
2450;
2451; GFX10-LABEL: test_call_external_void_func_v5f32_imm:
2452; GFX10:       ; %bb.0:
2453; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2454; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2455; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
2456; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2457; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
2458; GFX10-NEXT:    s_mov_b32 exec_lo, s34
2459; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
2460; GFX10-NEXT:    v_mov_b32_e32 v0, 1.0
2461; GFX10-NEXT:    v_mov_b32_e32 v1, 2.0
2462; GFX10-NEXT:    v_mov_b32_e32 v2, 4.0
2463; GFX10-NEXT:    v_mov_b32_e32 v3, -1.0
2464; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
2465; GFX10-NEXT:    v_mov_b32_e32 v4, 0.5
2466; GFX10-NEXT:    s_mov_b32 s33, s32
2467; GFX10-NEXT:    s_addk_i32 s32, 0x200
2468; GFX10-NEXT:    s_getpc_b64 s[34:35]
2469; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v5f32@rel32@lo+4
2470; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v5f32@rel32@hi+12
2471; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
2472; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
2473; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
2474; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
2475; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
2476; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
2477; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
2478; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2479; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
2480; GFX10-NEXT:    s_mov_b32 exec_lo, s34
2481; GFX10-NEXT:    s_waitcnt vmcnt(0)
2482; GFX10-NEXT:    s_setpc_b64 s[30:31]
2483;
2484; GFX11-LABEL: test_call_external_void_func_v5f32_imm:
2485; GFX11:       ; %bb.0:
2486; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2487; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2488; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
2489; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
2490; GFX11-NEXT:    s_mov_b32 exec_lo, s0
2491; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
2492; GFX11-NEXT:    v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0
2493; GFX11-NEXT:    v_dual_mov_b32 v2, 4.0 :: v_dual_mov_b32 v3, -1.0
2494; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
2495; GFX11-NEXT:    v_mov_b32_e32 v4, 0.5
2496; GFX11-NEXT:    s_mov_b32 s33, s32
2497; GFX11-NEXT:    s_add_i32 s32, s32, 16
2498; GFX11-NEXT:    s_getpc_b64 s[0:1]
2499; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v5f32@rel32@lo+4
2500; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v5f32@rel32@hi+12
2501; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
2502; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
2503; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2504; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
2505; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
2506; GFX11-NEXT:    s_add_i32 s32, s32, -16
2507; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
2508; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
2509; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
2510; GFX11-NEXT:    s_mov_b32 exec_lo, s0
2511; GFX11-NEXT:    s_waitcnt vmcnt(0)
2512; GFX11-NEXT:    s_setpc_b64 s[30:31]
2513;
2514; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5f32_imm:
2515; GFX10-SCRATCH:       ; %bb.0:
2516; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2517; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
2518; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
2519; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
2520; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
2521; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
2522; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
2523; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 1.0
2524; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, 2.0
2525; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v2, 4.0
2526; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v3, -1.0
2527; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
2528; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v4, 0.5
2529; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
2530; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
2531; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
2532; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v5f32@rel32@lo+4
2533; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v5f32@rel32@hi+12
2534; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
2535; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
2536; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
2537; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
2538; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
2539; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
2540; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
2541; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
2542; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
2543; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
2544; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
2545; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
2546  call amdgpu_gfx void @external_void_func_v5f32(<5 x float> <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>)
2547  ret void
2548}
2549
2550define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 {
2551; GFX9-LABEL: test_call_external_void_func_f64_imm:
2552; GFX9:       ; %bb.0:
2553; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2554; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
2555; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2556; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
2557; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
2558; GFX9-NEXT:    s_mov_b32 s33, s32
2559; GFX9-NEXT:    s_addk_i32 s32, 0x400
2560; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
2561; GFX9-NEXT:    v_mov_b32_e32 v0, 0
2562; GFX9-NEXT:    v_mov_b32_e32 v1, 0x40100000
2563; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
2564; GFX9-NEXT:    s_getpc_b64 s[34:35]
2565; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_f64@rel32@lo+4
2566; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_f64@rel32@hi+12
2567; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
2568; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
2569; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
2570; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
2571; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
2572; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
2573; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2574; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
2575; GFX9-NEXT:    s_waitcnt vmcnt(0)
2576; GFX9-NEXT:    s_setpc_b64 s[30:31]
2577;
2578; GFX10-LABEL: test_call_external_void_func_f64_imm:
2579; GFX10:       ; %bb.0:
2580; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2581; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2582; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
2583; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2584; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
2585; GFX10-NEXT:    s_mov_b32 exec_lo, s34
2586; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
2587; GFX10-NEXT:    v_mov_b32_e32 v0, 0
2588; GFX10-NEXT:    v_mov_b32_e32 v1, 0x40100000
2589; GFX10-NEXT:    s_mov_b32 s33, s32
2590; GFX10-NEXT:    s_addk_i32 s32, 0x200
2591; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
2592; GFX10-NEXT:    s_getpc_b64 s[34:35]
2593; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_f64@rel32@lo+4
2594; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_f64@rel32@hi+12
2595; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
2596; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
2597; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
2598; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
2599; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
2600; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
2601; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
2602; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2603; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
2604; GFX10-NEXT:    s_mov_b32 exec_lo, s34
2605; GFX10-NEXT:    s_waitcnt vmcnt(0)
2606; GFX10-NEXT:    s_setpc_b64 s[30:31]
2607;
2608; GFX11-LABEL: test_call_external_void_func_f64_imm:
2609; GFX11:       ; %bb.0:
2610; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2611; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2612; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
2613; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
2614; GFX11-NEXT:    s_mov_b32 exec_lo, s0
2615; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
2616; GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x40100000
2617; GFX11-NEXT:    s_mov_b32 s33, s32
2618; GFX11-NEXT:    s_add_i32 s32, s32, 16
2619; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
2620; GFX11-NEXT:    s_getpc_b64 s[0:1]
2621; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_f64@rel32@lo+4
2622; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_f64@rel32@hi+12
2623; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
2624; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
2625; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2626; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
2627; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
2628; GFX11-NEXT:    s_add_i32 s32, s32, -16
2629; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
2630; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
2631; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
2632; GFX11-NEXT:    s_mov_b32 exec_lo, s0
2633; GFX11-NEXT:    s_waitcnt vmcnt(0)
2634; GFX11-NEXT:    s_setpc_b64 s[30:31]
2635;
2636; GFX10-SCRATCH-LABEL: test_call_external_void_func_f64_imm:
2637; GFX10-SCRATCH:       ; %bb.0:
2638; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2639; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
2640; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
2641; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
2642; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
2643; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
2644; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
2645; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 0
2646; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, 0x40100000
2647; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
2648; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
2649; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
2650; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
2651; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_f64@rel32@lo+4
2652; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_f64@rel32@hi+12
2653; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
2654; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
2655; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
2656; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
2657; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
2658; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
2659; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
2660; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
2661; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
2662; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
2663; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
2664; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
2665  call amdgpu_gfx void @external_void_func_f64(double 4.0)
2666  ret void
2667}
2668
2669define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 {
2670; GFX9-LABEL: test_call_external_void_func_v2f64_imm:
2671; GFX9:       ; %bb.0:
2672; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2673; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
2674; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2675; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
2676; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
2677; GFX9-NEXT:    s_mov_b32 s33, s32
2678; GFX9-NEXT:    s_addk_i32 s32, 0x400
2679; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
2680; GFX9-NEXT:    v_mov_b32_e32 v0, 0
2681; GFX9-NEXT:    v_mov_b32_e32 v1, 2.0
2682; GFX9-NEXT:    v_mov_b32_e32 v2, 0
2683; GFX9-NEXT:    v_mov_b32_e32 v3, 0x40100000
2684; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
2685; GFX9-NEXT:    s_getpc_b64 s[34:35]
2686; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v2f64@rel32@lo+4
2687; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v2f64@rel32@hi+12
2688; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
2689; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
2690; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
2691; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
2692; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
2693; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
2694; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2695; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
2696; GFX9-NEXT:    s_waitcnt vmcnt(0)
2697; GFX9-NEXT:    s_setpc_b64 s[30:31]
2698;
2699; GFX10-LABEL: test_call_external_void_func_v2f64_imm:
2700; GFX10:       ; %bb.0:
2701; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2702; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2703; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
2704; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2705; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
2706; GFX10-NEXT:    s_mov_b32 exec_lo, s34
2707; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
2708; GFX10-NEXT:    v_mov_b32_e32 v0, 0
2709; GFX10-NEXT:    v_mov_b32_e32 v1, 2.0
2710; GFX10-NEXT:    v_mov_b32_e32 v2, 0
2711; GFX10-NEXT:    v_mov_b32_e32 v3, 0x40100000
2712; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
2713; GFX10-NEXT:    s_mov_b32 s33, s32
2714; GFX10-NEXT:    s_addk_i32 s32, 0x200
2715; GFX10-NEXT:    s_getpc_b64 s[34:35]
2716; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v2f64@rel32@lo+4
2717; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v2f64@rel32@hi+12
2718; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
2719; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
2720; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
2721; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
2722; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
2723; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
2724; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
2725; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2726; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
2727; GFX10-NEXT:    s_mov_b32 exec_lo, s34
2728; GFX10-NEXT:    s_waitcnt vmcnt(0)
2729; GFX10-NEXT:    s_setpc_b64 s[30:31]
2730;
2731; GFX11-LABEL: test_call_external_void_func_v2f64_imm:
2732; GFX11:       ; %bb.0:
2733; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2734; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2735; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
2736; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
2737; GFX11-NEXT:    s_mov_b32 exec_lo, s0
2738; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
2739; GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 2.0
2740; GFX11-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0x40100000
2741; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
2742; GFX11-NEXT:    s_mov_b32 s33, s32
2743; GFX11-NEXT:    s_add_i32 s32, s32, 16
2744; GFX11-NEXT:    s_getpc_b64 s[0:1]
2745; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v2f64@rel32@lo+4
2746; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v2f64@rel32@hi+12
2747; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
2748; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
2749; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2750; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
2751; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
2752; GFX11-NEXT:    s_add_i32 s32, s32, -16
2753; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
2754; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
2755; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
2756; GFX11-NEXT:    s_mov_b32 exec_lo, s0
2757; GFX11-NEXT:    s_waitcnt vmcnt(0)
2758; GFX11-NEXT:    s_setpc_b64 s[30:31]
2759;
2760; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f64_imm:
2761; GFX10-SCRATCH:       ; %bb.0:
2762; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2763; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
2764; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
2765; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
2766; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
2767; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
2768; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
2769; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 0
2770; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, 2.0
2771; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v2, 0
2772; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v3, 0x40100000
2773; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
2774; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
2775; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
2776; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
2777; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v2f64@rel32@lo+4
2778; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v2f64@rel32@hi+12
2779; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
2780; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
2781; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
2782; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
2783; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
2784; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
2785; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
2786; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
2787; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
2788; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
2789; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
2790; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
2791  call amdgpu_gfx void @external_void_func_v2f64(<2 x double> <double 2.0, double 4.0>)
2792  ret void
2793}
2794
2795define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 {
2796; GFX9-LABEL: test_call_external_void_func_v3f64_imm:
2797; GFX9:       ; %bb.0:
2798; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2799; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
2800; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2801; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
2802; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
2803; GFX9-NEXT:    s_mov_b32 s33, s32
2804; GFX9-NEXT:    s_addk_i32 s32, 0x400
2805; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
2806; GFX9-NEXT:    v_mov_b32_e32 v0, 0
2807; GFX9-NEXT:    v_mov_b32_e32 v1, 2.0
2808; GFX9-NEXT:    v_mov_b32_e32 v2, 0
2809; GFX9-NEXT:    v_mov_b32_e32 v3, 0x40100000
2810; GFX9-NEXT:    v_mov_b32_e32 v4, 0
2811; GFX9-NEXT:    v_mov_b32_e32 v5, 0x40200000
2812; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
2813; GFX9-NEXT:    s_getpc_b64 s[34:35]
2814; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v3f64@rel32@lo+4
2815; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v3f64@rel32@hi+12
2816; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
2817; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
2818; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
2819; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
2820; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
2821; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
2822; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2823; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
2824; GFX9-NEXT:    s_waitcnt vmcnt(0)
2825; GFX9-NEXT:    s_setpc_b64 s[30:31]
2826;
2827; GFX10-LABEL: test_call_external_void_func_v3f64_imm:
2828; GFX10:       ; %bb.0:
2829; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2830; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2831; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
2832; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2833; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
2834; GFX10-NEXT:    s_mov_b32 exec_lo, s34
2835; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
2836; GFX10-NEXT:    v_mov_b32_e32 v0, 0
2837; GFX10-NEXT:    v_mov_b32_e32 v1, 2.0
2838; GFX10-NEXT:    v_mov_b32_e32 v2, 0
2839; GFX10-NEXT:    v_mov_b32_e32 v3, 0x40100000
2840; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
2841; GFX10-NEXT:    v_mov_b32_e32 v4, 0
2842; GFX10-NEXT:    v_mov_b32_e32 v5, 0x40200000
2843; GFX10-NEXT:    s_mov_b32 s33, s32
2844; GFX10-NEXT:    s_addk_i32 s32, 0x200
2845; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
2846; GFX10-NEXT:    s_getpc_b64 s[34:35]
2847; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v3f64@rel32@lo+4
2848; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v3f64@rel32@hi+12
2849; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
2850; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
2851; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
2852; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
2853; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
2854; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
2855; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2856; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
2857; GFX10-NEXT:    s_mov_b32 exec_lo, s34
2858; GFX10-NEXT:    s_waitcnt vmcnt(0)
2859; GFX10-NEXT:    s_setpc_b64 s[30:31]
2860;
2861; GFX11-LABEL: test_call_external_void_func_v3f64_imm:
2862; GFX11:       ; %bb.0:
2863; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2864; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2865; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
2866; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
2867; GFX11-NEXT:    s_mov_b32 exec_lo, s0
2868; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
2869; GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 2.0
2870; GFX11-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0x40100000
2871; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
2872; GFX11-NEXT:    v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0x40200000
2873; GFX11-NEXT:    s_mov_b32 s33, s32
2874; GFX11-NEXT:    s_add_i32 s32, s32, 16
2875; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
2876; GFX11-NEXT:    s_getpc_b64 s[0:1]
2877; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v3f64@rel32@lo+4
2878; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v3f64@rel32@hi+12
2879; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2880; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
2881; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
2882; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
2883; GFX11-NEXT:    s_add_i32 s32, s32, -16
2884; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
2885; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
2886; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
2887; GFX11-NEXT:    s_mov_b32 exec_lo, s0
2888; GFX11-NEXT:    s_waitcnt vmcnt(0)
2889; GFX11-NEXT:    s_setpc_b64 s[30:31]
2890;
2891; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f64_imm:
2892; GFX10-SCRATCH:       ; %bb.0:
2893; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2894; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
2895; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
2896; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
2897; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
2898; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
2899; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
2900; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 0
2901; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, 2.0
2902; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v2, 0
2903; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v3, 0x40100000
2904; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
2905; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v4, 0
2906; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v5, 0x40200000
2907; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
2908; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
2909; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
2910; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
2911; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v3f64@rel32@lo+4
2912; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v3f64@rel32@hi+12
2913; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
2914; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
2915; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
2916; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
2917; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
2918; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
2919; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
2920; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
2921; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
2922; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
2923; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
2924  call amdgpu_gfx void @external_void_func_v3f64(<3 x double> <double 2.0, double 4.0, double 8.0>)
2925  ret void
2926}
2927
2928define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 {
2929; GFX9-LABEL: test_call_external_void_func_v2i16:
2930; GFX9:       ; %bb.0:
2931; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2932; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
2933; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2934; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
2935; GFX9-NEXT:    global_load_dword v0, v[0:1], off
2936; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
2937; GFX9-NEXT:    s_mov_b32 s33, s32
2938; GFX9-NEXT:    s_addk_i32 s32, 0x400
2939; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
2940; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
2941; GFX9-NEXT:    s_getpc_b64 s[34:35]
2942; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v2i16@rel32@lo+4
2943; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v2i16@rel32@hi+12
2944; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
2945; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
2946; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
2947; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
2948; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
2949; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
2950; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2951; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
2952; GFX9-NEXT:    s_waitcnt vmcnt(0)
2953; GFX9-NEXT:    s_setpc_b64 s[30:31]
2954;
2955; GFX10-LABEL: test_call_external_void_func_v2i16:
2956; GFX10:       ; %bb.0:
2957; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2958; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2959; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
2960; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
2961; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
2962; GFX10-NEXT:    s_mov_b32 exec_lo, s34
2963; GFX10-NEXT:    global_load_dword v0, v[0:1], off
2964; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
2965; GFX10-NEXT:    s_mov_b32 s33, s32
2966; GFX10-NEXT:    s_addk_i32 s32, 0x200
2967; GFX10-NEXT:    s_getpc_b64 s[34:35]
2968; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v2i16@rel32@lo+4
2969; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v2i16@rel32@hi+12
2970; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
2971; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
2972; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
2973; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
2974; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
2975; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
2976; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
2977; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
2978; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2979; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
2980; GFX10-NEXT:    s_mov_b32 exec_lo, s34
2981; GFX10-NEXT:    s_waitcnt vmcnt(0)
2982; GFX10-NEXT:    s_setpc_b64 s[30:31]
2983;
2984; GFX11-LABEL: test_call_external_void_func_v2i16:
2985; GFX11:       ; %bb.0:
2986; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2987; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2988; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
2989; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
2990; GFX11-NEXT:    s_mov_b32 exec_lo, s0
2991; GFX11-NEXT:    global_load_b32 v0, v[0:1], off
2992; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
2993; GFX11-NEXT:    s_mov_b32 s33, s32
2994; GFX11-NEXT:    s_add_i32 s32, s32, 16
2995; GFX11-NEXT:    s_getpc_b64 s[0:1]
2996; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v2i16@rel32@lo+4
2997; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v2i16@rel32@hi+12
2998; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
2999; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
3000; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
3001; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3002; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
3003; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
3004; GFX11-NEXT:    s_add_i32 s32, s32, -16
3005; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
3006; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
3007; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
3008; GFX11-NEXT:    s_mov_b32 exec_lo, s0
3009; GFX11-NEXT:    s_waitcnt vmcnt(0)
3010; GFX11-NEXT:    s_setpc_b64 s[30:31]
3011;
3012; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i16:
3013; GFX10-SCRATCH:       ; %bb.0:
3014; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3015; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
3016; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
3017; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
3018; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
3019; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
3020; GFX10-SCRATCH-NEXT:    global_load_dword v0, v[0:1], off
3021; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
3022; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
3023; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
3024; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
3025; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v2i16@rel32@lo+4
3026; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v2i16@rel32@hi+12
3027; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
3028; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
3029; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
3030; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
3031; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
3032; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
3033; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
3034; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
3035; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
3036; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
3037; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
3038; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
3039; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
3040  %val = load <2 x i16>, <2 x i16> addrspace(1)* undef
3041  call amdgpu_gfx void @external_void_func_v2i16(<2 x i16> %val)
3042  ret void
3043}
3044
3045define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 {
3046; GFX9-LABEL: test_call_external_void_func_v3i16:
3047; GFX9:       ; %bb.0:
3048; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3049; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
3050; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3051; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
3052; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
3053; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
3054; GFX9-NEXT:    s_mov_b32 s33, s32
3055; GFX9-NEXT:    s_addk_i32 s32, 0x400
3056; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
3057; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
3058; GFX9-NEXT:    s_getpc_b64 s[34:35]
3059; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4
3060; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12
3061; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
3062; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
3063; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
3064; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
3065; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
3066; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
3067; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3068; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
3069; GFX9-NEXT:    s_waitcnt vmcnt(0)
3070; GFX9-NEXT:    s_setpc_b64 s[30:31]
3071;
3072; GFX10-LABEL: test_call_external_void_func_v3i16:
3073; GFX10:       ; %bb.0:
3074; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3075; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3076; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
3077; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3078; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
3079; GFX10-NEXT:    s_mov_b32 exec_lo, s34
3080; GFX10-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
3081; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
3082; GFX10-NEXT:    s_mov_b32 s33, s32
3083; GFX10-NEXT:    s_addk_i32 s32, 0x200
3084; GFX10-NEXT:    s_getpc_b64 s[34:35]
3085; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4
3086; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12
3087; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
3088; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
3089; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
3090; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
3091; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
3092; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
3093; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
3094; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
3095; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3096; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
3097; GFX10-NEXT:    s_mov_b32 exec_lo, s34
3098; GFX10-NEXT:    s_waitcnt vmcnt(0)
3099; GFX10-NEXT:    s_setpc_b64 s[30:31]
3100;
3101; GFX11-LABEL: test_call_external_void_func_v3i16:
3102; GFX11:       ; %bb.0:
3103; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3104; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3105; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
3106; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
3107; GFX11-NEXT:    s_mov_b32 exec_lo, s0
3108; GFX11-NEXT:    global_load_b64 v[0:1], v[0:1], off
3109; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
3110; GFX11-NEXT:    s_mov_b32 s33, s32
3111; GFX11-NEXT:    s_add_i32 s32, s32, 16
3112; GFX11-NEXT:    s_getpc_b64 s[0:1]
3113; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v3i16@rel32@lo+4
3114; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v3i16@rel32@hi+12
3115; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
3116; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
3117; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
3118; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3119; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
3120; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
3121; GFX11-NEXT:    s_add_i32 s32, s32, -16
3122; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
3123; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
3124; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
3125; GFX11-NEXT:    s_mov_b32 exec_lo, s0
3126; GFX11-NEXT:    s_waitcnt vmcnt(0)
3127; GFX11-NEXT:    s_setpc_b64 s[30:31]
3128;
3129; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16:
3130; GFX10-SCRATCH:       ; %bb.0:
3131; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3132; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
3133; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
3134; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
3135; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
3136; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
3137; GFX10-SCRATCH-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
3138; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
3139; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
3140; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
3141; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
3142; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v3i16@rel32@lo+4
3143; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v3i16@rel32@hi+12
3144; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
3145; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
3146; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
3147; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
3148; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
3149; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
3150; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
3151; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
3152; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
3153; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
3154; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
3155; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
3156; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
3157  %val = load <3 x i16>, <3 x i16> addrspace(1)* undef
3158  call amdgpu_gfx void @external_void_func_v3i16(<3 x i16> %val)
3159  ret void
3160}
3161
3162define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 {
3163; GFX9-LABEL: test_call_external_void_func_v3f16:
3164; GFX9:       ; %bb.0:
3165; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3166; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
3167; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3168; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
3169; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
3170; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
3171; GFX9-NEXT:    s_mov_b32 s33, s32
3172; GFX9-NEXT:    s_addk_i32 s32, 0x400
3173; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
3174; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
3175; GFX9-NEXT:    s_getpc_b64 s[34:35]
3176; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4
3177; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12
3178; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
3179; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
3180; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
3181; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
3182; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
3183; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
3184; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3185; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
3186; GFX9-NEXT:    s_waitcnt vmcnt(0)
3187; GFX9-NEXT:    s_setpc_b64 s[30:31]
3188;
3189; GFX10-LABEL: test_call_external_void_func_v3f16:
3190; GFX10:       ; %bb.0:
3191; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3192; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3193; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
3194; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3195; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
3196; GFX10-NEXT:    s_mov_b32 exec_lo, s34
3197; GFX10-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
3198; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
3199; GFX10-NEXT:    s_mov_b32 s33, s32
3200; GFX10-NEXT:    s_addk_i32 s32, 0x200
3201; GFX10-NEXT:    s_getpc_b64 s[34:35]
3202; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4
3203; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12
3204; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
3205; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
3206; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
3207; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
3208; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
3209; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
3210; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
3211; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
3212; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3213; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
3214; GFX10-NEXT:    s_mov_b32 exec_lo, s34
3215; GFX10-NEXT:    s_waitcnt vmcnt(0)
3216; GFX10-NEXT:    s_setpc_b64 s[30:31]
3217;
3218; GFX11-LABEL: test_call_external_void_func_v3f16:
3219; GFX11:       ; %bb.0:
3220; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3221; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3222; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
3223; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
3224; GFX11-NEXT:    s_mov_b32 exec_lo, s0
3225; GFX11-NEXT:    global_load_b64 v[0:1], v[0:1], off
3226; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
3227; GFX11-NEXT:    s_mov_b32 s33, s32
3228; GFX11-NEXT:    s_add_i32 s32, s32, 16
3229; GFX11-NEXT:    s_getpc_b64 s[0:1]
3230; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v3f16@rel32@lo+4
3231; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v3f16@rel32@hi+12
3232; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
3233; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
3234; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
3235; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3236; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
3237; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
3238; GFX11-NEXT:    s_add_i32 s32, s32, -16
3239; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
3240; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
3241; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
3242; GFX11-NEXT:    s_mov_b32 exec_lo, s0
3243; GFX11-NEXT:    s_waitcnt vmcnt(0)
3244; GFX11-NEXT:    s_setpc_b64 s[30:31]
3245;
3246; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16:
3247; GFX10-SCRATCH:       ; %bb.0:
3248; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3249; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
3250; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
3251; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
3252; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
3253; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
3254; GFX10-SCRATCH-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
3255; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
3256; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
3257; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
3258; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
3259; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v3f16@rel32@lo+4
3260; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v3f16@rel32@hi+12
3261; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
3262; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
3263; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
3264; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
3265; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
3266; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
3267; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
3268; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
3269; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
3270; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
3271; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
3272; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
3273; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
3274  %val = load <3 x half>, <3 x half> addrspace(1)* undef
3275  call amdgpu_gfx void @external_void_func_v3f16(<3 x half> %val)
3276  ret void
3277}
3278
3279define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 {
3280; GFX9-LABEL: test_call_external_void_func_v3i16_imm:
3281; GFX9:       ; %bb.0:
3282; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3283; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
3284; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3285; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
3286; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
3287; GFX9-NEXT:    s_mov_b32 s33, s32
3288; GFX9-NEXT:    s_addk_i32 s32, 0x400
3289; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
3290; GFX9-NEXT:    v_mov_b32_e32 v0, 0x20001
3291; GFX9-NEXT:    v_mov_b32_e32 v1, 3
3292; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
3293; GFX9-NEXT:    s_getpc_b64 s[34:35]
3294; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4
3295; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12
3296; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
3297; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
3298; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
3299; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
3300; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
3301; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
3302; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3303; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
3304; GFX9-NEXT:    s_waitcnt vmcnt(0)
3305; GFX9-NEXT:    s_setpc_b64 s[30:31]
3306;
3307; GFX10-LABEL: test_call_external_void_func_v3i16_imm:
3308; GFX10:       ; %bb.0:
3309; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3310; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3311; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
3312; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3313; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
3314; GFX10-NEXT:    s_mov_b32 exec_lo, s34
3315; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
3316; GFX10-NEXT:    v_mov_b32_e32 v0, 0x20001
3317; GFX10-NEXT:    v_mov_b32_e32 v1, 3
3318; GFX10-NEXT:    s_mov_b32 s33, s32
3319; GFX10-NEXT:    s_addk_i32 s32, 0x200
3320; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
3321; GFX10-NEXT:    s_getpc_b64 s[34:35]
3322; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4
3323; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12
3324; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
3325; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
3326; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
3327; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
3328; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
3329; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
3330; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
3331; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3332; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
3333; GFX10-NEXT:    s_mov_b32 exec_lo, s34
3334; GFX10-NEXT:    s_waitcnt vmcnt(0)
3335; GFX10-NEXT:    s_setpc_b64 s[30:31]
3336;
3337; GFX11-LABEL: test_call_external_void_func_v3i16_imm:
3338; GFX11:       ; %bb.0:
3339; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3340; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3341; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
3342; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
3343; GFX11-NEXT:    s_mov_b32 exec_lo, s0
3344; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
3345; GFX11-NEXT:    v_dual_mov_b32 v0, 0x20001 :: v_dual_mov_b32 v1, 3
3346; GFX11-NEXT:    s_mov_b32 s33, s32
3347; GFX11-NEXT:    s_add_i32 s32, s32, 16
3348; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
3349; GFX11-NEXT:    s_getpc_b64 s[0:1]
3350; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v3i16@rel32@lo+4
3351; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v3i16@rel32@hi+12
3352; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
3353; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
3354; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3355; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
3356; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
3357; GFX11-NEXT:    s_add_i32 s32, s32, -16
3358; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
3359; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
3360; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
3361; GFX11-NEXT:    s_mov_b32 exec_lo, s0
3362; GFX11-NEXT:    s_waitcnt vmcnt(0)
3363; GFX11-NEXT:    s_setpc_b64 s[30:31]
3364;
3365; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16_imm:
3366; GFX10-SCRATCH:       ; %bb.0:
3367; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3368; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
3369; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
3370; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
3371; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
3372; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
3373; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
3374; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 0x20001
3375; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, 3
3376; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
3377; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
3378; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
3379; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
3380; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v3i16@rel32@lo+4
3381; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v3i16@rel32@hi+12
3382; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
3383; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
3384; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
3385; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
3386; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
3387; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
3388; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
3389; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
3390; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
3391; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
3392; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
3393; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
3394  call amdgpu_gfx void @external_void_func_v3i16(<3 x i16> <i16 1, i16 2, i16 3>)
3395  ret void
3396}
3397
3398define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 {
3399; GFX9-LABEL: test_call_external_void_func_v3f16_imm:
3400; GFX9:       ; %bb.0:
3401; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3402; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
3403; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3404; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
3405; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
3406; GFX9-NEXT:    s_mov_b32 s33, s32
3407; GFX9-NEXT:    s_addk_i32 s32, 0x400
3408; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
3409; GFX9-NEXT:    v_mov_b32_e32 v0, 0x40003c00
3410; GFX9-NEXT:    v_mov_b32_e32 v1, 0x4400
3411; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
3412; GFX9-NEXT:    s_getpc_b64 s[34:35]
3413; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4
3414; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12
3415; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
3416; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
3417; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
3418; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
3419; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
3420; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
3421; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3422; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
3423; GFX9-NEXT:    s_waitcnt vmcnt(0)
3424; GFX9-NEXT:    s_setpc_b64 s[30:31]
3425;
3426; GFX10-LABEL: test_call_external_void_func_v3f16_imm:
3427; GFX10:       ; %bb.0:
3428; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3429; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3430; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
3431; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3432; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
3433; GFX10-NEXT:    s_mov_b32 exec_lo, s34
3434; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
3435; GFX10-NEXT:    v_mov_b32_e32 v0, 0x40003c00
3436; GFX10-NEXT:    v_mov_b32_e32 v1, 0x4400
3437; GFX10-NEXT:    s_mov_b32 s33, s32
3438; GFX10-NEXT:    s_addk_i32 s32, 0x200
3439; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
3440; GFX10-NEXT:    s_getpc_b64 s[34:35]
3441; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4
3442; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12
3443; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
3444; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
3445; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
3446; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
3447; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
3448; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
3449; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
3450; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3451; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
3452; GFX10-NEXT:    s_mov_b32 exec_lo, s34
3453; GFX10-NEXT:    s_waitcnt vmcnt(0)
3454; GFX10-NEXT:    s_setpc_b64 s[30:31]
3455;
3456; GFX11-LABEL: test_call_external_void_func_v3f16_imm:
3457; GFX11:       ; %bb.0:
3458; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3459; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3460; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
3461; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
3462; GFX11-NEXT:    s_mov_b32 exec_lo, s0
3463; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
3464; GFX11-NEXT:    v_mov_b32_e32 v0, 0x40003c00
3465; GFX11-NEXT:    v_mov_b32_e32 v1, 0x4400
3466; GFX11-NEXT:    s_mov_b32 s33, s32
3467; GFX11-NEXT:    s_add_i32 s32, s32, 16
3468; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
3469; GFX11-NEXT:    s_getpc_b64 s[0:1]
3470; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v3f16@rel32@lo+4
3471; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v3f16@rel32@hi+12
3472; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
3473; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
3474; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3475; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
3476; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
3477; GFX11-NEXT:    s_add_i32 s32, s32, -16
3478; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
3479; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
3480; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
3481; GFX11-NEXT:    s_mov_b32 exec_lo, s0
3482; GFX11-NEXT:    s_waitcnt vmcnt(0)
3483; GFX11-NEXT:    s_setpc_b64 s[30:31]
3484;
3485; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16_imm:
3486; GFX10-SCRATCH:       ; %bb.0:
3487; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3488; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
3489; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
3490; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
3491; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
3492; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
3493; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
3494; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 0x40003c00
3495; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, 0x4400
3496; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
3497; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
3498; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
3499; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
3500; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v3f16@rel32@lo+4
3501; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v3f16@rel32@hi+12
3502; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
3503; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
3504; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
3505; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
3506; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
3507; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
3508; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
3509; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
3510; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
3511; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
3512; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
3513; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
3514  call amdgpu_gfx void @external_void_func_v3f16(<3 x half> <half 1.0, half 2.0, half 4.0>)
3515  ret void
3516}
3517
3518define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 {
3519; GFX9-LABEL: test_call_external_void_func_v4i16:
3520; GFX9:       ; %bb.0:
3521; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3522; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
3523; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3524; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
3525; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
3526; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
3527; GFX9-NEXT:    s_mov_b32 s33, s32
3528; GFX9-NEXT:    s_addk_i32 s32, 0x400
3529; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
3530; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
3531; GFX9-NEXT:    s_getpc_b64 s[34:35]
3532; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4
3533; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12
3534; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
3535; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
3536; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
3537; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
3538; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
3539; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
3540; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3541; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
3542; GFX9-NEXT:    s_waitcnt vmcnt(0)
3543; GFX9-NEXT:    s_setpc_b64 s[30:31]
3544;
3545; GFX10-LABEL: test_call_external_void_func_v4i16:
3546; GFX10:       ; %bb.0:
3547; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3548; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3549; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
3550; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3551; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
3552; GFX10-NEXT:    s_mov_b32 exec_lo, s34
3553; GFX10-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
3554; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
3555; GFX10-NEXT:    s_mov_b32 s33, s32
3556; GFX10-NEXT:    s_addk_i32 s32, 0x200
3557; GFX10-NEXT:    s_getpc_b64 s[34:35]
3558; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4
3559; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12
3560; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
3561; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
3562; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
3563; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
3564; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
3565; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
3566; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
3567; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
3568; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3569; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
3570; GFX10-NEXT:    s_mov_b32 exec_lo, s34
3571; GFX10-NEXT:    s_waitcnt vmcnt(0)
3572; GFX10-NEXT:    s_setpc_b64 s[30:31]
3573;
3574; GFX11-LABEL: test_call_external_void_func_v4i16:
3575; GFX11:       ; %bb.0:
3576; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3577; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3578; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
3579; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
3580; GFX11-NEXT:    s_mov_b32 exec_lo, s0
3581; GFX11-NEXT:    global_load_b64 v[0:1], v[0:1], off
3582; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
3583; GFX11-NEXT:    s_mov_b32 s33, s32
3584; GFX11-NEXT:    s_add_i32 s32, s32, 16
3585; GFX11-NEXT:    s_getpc_b64 s[0:1]
3586; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v4i16@rel32@lo+4
3587; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v4i16@rel32@hi+12
3588; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
3589; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
3590; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
3591; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3592; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
3593; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
3594; GFX11-NEXT:    s_add_i32 s32, s32, -16
3595; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
3596; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
3597; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
3598; GFX11-NEXT:    s_mov_b32 exec_lo, s0
3599; GFX11-NEXT:    s_waitcnt vmcnt(0)
3600; GFX11-NEXT:    s_setpc_b64 s[30:31]
3601;
3602; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16:
3603; GFX10-SCRATCH:       ; %bb.0:
3604; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3605; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
3606; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
3607; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
3608; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
3609; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
3610; GFX10-SCRATCH-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
3611; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
3612; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
3613; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
3614; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
3615; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v4i16@rel32@lo+4
3616; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v4i16@rel32@hi+12
3617; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
3618; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
3619; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
3620; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
3621; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
3622; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
3623; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
3624; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
3625; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
3626; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
3627; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
3628; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
3629; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
3630  %val = load <4 x i16>, <4 x i16> addrspace(1)* undef
3631  call amdgpu_gfx void @external_void_func_v4i16(<4 x i16> %val)
3632  ret void
3633}
3634
3635define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 {
3636; GFX9-LABEL: test_call_external_void_func_v4i16_imm:
3637; GFX9:       ; %bb.0:
3638; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3639; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
3640; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3641; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
3642; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
3643; GFX9-NEXT:    s_mov_b32 s33, s32
3644; GFX9-NEXT:    s_addk_i32 s32, 0x400
3645; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
3646; GFX9-NEXT:    v_mov_b32_e32 v0, 0x20001
3647; GFX9-NEXT:    v_mov_b32_e32 v1, 0x40003
3648; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
3649; GFX9-NEXT:    s_getpc_b64 s[34:35]
3650; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4
3651; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12
3652; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
3653; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
3654; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
3655; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
3656; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
3657; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
3658; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3659; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
3660; GFX9-NEXT:    s_waitcnt vmcnt(0)
3661; GFX9-NEXT:    s_setpc_b64 s[30:31]
3662;
3663; GFX10-LABEL: test_call_external_void_func_v4i16_imm:
3664; GFX10:       ; %bb.0:
3665; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3666; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3667; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
3668; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3669; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
3670; GFX10-NEXT:    s_mov_b32 exec_lo, s34
3671; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
3672; GFX10-NEXT:    v_mov_b32_e32 v0, 0x20001
3673; GFX10-NEXT:    v_mov_b32_e32 v1, 0x40003
3674; GFX10-NEXT:    s_mov_b32 s33, s32
3675; GFX10-NEXT:    s_addk_i32 s32, 0x200
3676; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
3677; GFX10-NEXT:    s_getpc_b64 s[34:35]
3678; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4
3679; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12
3680; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
3681; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
3682; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
3683; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
3684; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
3685; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
3686; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
3687; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3688; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
3689; GFX10-NEXT:    s_mov_b32 exec_lo, s34
3690; GFX10-NEXT:    s_waitcnt vmcnt(0)
3691; GFX10-NEXT:    s_setpc_b64 s[30:31]
3692;
3693; GFX11-LABEL: test_call_external_void_func_v4i16_imm:
3694; GFX11:       ; %bb.0:
3695; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3696; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3697; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
3698; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
3699; GFX11-NEXT:    s_mov_b32 exec_lo, s0
3700; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
3701; GFX11-NEXT:    v_mov_b32_e32 v0, 0x20001
3702; GFX11-NEXT:    v_mov_b32_e32 v1, 0x40003
3703; GFX11-NEXT:    s_mov_b32 s33, s32
3704; GFX11-NEXT:    s_add_i32 s32, s32, 16
3705; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
3706; GFX11-NEXT:    s_getpc_b64 s[0:1]
3707; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v4i16@rel32@lo+4
3708; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v4i16@rel32@hi+12
3709; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
3710; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
3711; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3712; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
3713; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
3714; GFX11-NEXT:    s_add_i32 s32, s32, -16
3715; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
3716; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
3717; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
3718; GFX11-NEXT:    s_mov_b32 exec_lo, s0
3719; GFX11-NEXT:    s_waitcnt vmcnt(0)
3720; GFX11-NEXT:    s_setpc_b64 s[30:31]
3721;
3722; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16_imm:
3723; GFX10-SCRATCH:       ; %bb.0:
3724; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3725; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
3726; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
3727; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
3728; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
3729; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
3730; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
3731; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 0x20001
3732; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, 0x40003
3733; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
3734; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
3735; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
3736; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
3737; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v4i16@rel32@lo+4
3738; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v4i16@rel32@hi+12
3739; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
3740; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
3741; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
3742; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
3743; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
3744; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
3745; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
3746; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
3747; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
3748; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
3749; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
3750; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
3751  call amdgpu_gfx void @external_void_func_v4i16(<4 x i16> <i16 1, i16 2, i16 3, i16 4>)
3752  ret void
3753}
3754
3755define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 {
3756; GFX9-LABEL: test_call_external_void_func_v2f16:
3757; GFX9:       ; %bb.0:
3758; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3759; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
3760; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3761; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
3762; GFX9-NEXT:    global_load_dword v0, v[0:1], off
3763; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
3764; GFX9-NEXT:    s_mov_b32 s33, s32
3765; GFX9-NEXT:    s_addk_i32 s32, 0x400
3766; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
3767; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
3768; GFX9-NEXT:    s_getpc_b64 s[34:35]
3769; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v2f16@rel32@lo+4
3770; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v2f16@rel32@hi+12
3771; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
3772; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
3773; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
3774; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
3775; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
3776; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
3777; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3778; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
3779; GFX9-NEXT:    s_waitcnt vmcnt(0)
3780; GFX9-NEXT:    s_setpc_b64 s[30:31]
3781;
3782; GFX10-LABEL: test_call_external_void_func_v2f16:
3783; GFX10:       ; %bb.0:
3784; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3785; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3786; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
3787; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3788; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
3789; GFX10-NEXT:    s_mov_b32 exec_lo, s34
3790; GFX10-NEXT:    global_load_dword v0, v[0:1], off
3791; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
3792; GFX10-NEXT:    s_mov_b32 s33, s32
3793; GFX10-NEXT:    s_addk_i32 s32, 0x200
3794; GFX10-NEXT:    s_getpc_b64 s[34:35]
3795; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v2f16@rel32@lo+4
3796; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v2f16@rel32@hi+12
3797; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
3798; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
3799; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
3800; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
3801; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
3802; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
3803; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
3804; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
3805; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3806; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
3807; GFX10-NEXT:    s_mov_b32 exec_lo, s34
3808; GFX10-NEXT:    s_waitcnt vmcnt(0)
3809; GFX10-NEXT:    s_setpc_b64 s[30:31]
3810;
3811; GFX11-LABEL: test_call_external_void_func_v2f16:
3812; GFX11:       ; %bb.0:
3813; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3814; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3815; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
3816; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
3817; GFX11-NEXT:    s_mov_b32 exec_lo, s0
3818; GFX11-NEXT:    global_load_b32 v0, v[0:1], off
3819; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
3820; GFX11-NEXT:    s_mov_b32 s33, s32
3821; GFX11-NEXT:    s_add_i32 s32, s32, 16
3822; GFX11-NEXT:    s_getpc_b64 s[0:1]
3823; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v2f16@rel32@lo+4
3824; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v2f16@rel32@hi+12
3825; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
3826; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
3827; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
3828; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3829; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
3830; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
3831; GFX11-NEXT:    s_add_i32 s32, s32, -16
3832; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
3833; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
3834; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
3835; GFX11-NEXT:    s_mov_b32 exec_lo, s0
3836; GFX11-NEXT:    s_waitcnt vmcnt(0)
3837; GFX11-NEXT:    s_setpc_b64 s[30:31]
3838;
3839; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f16:
3840; GFX10-SCRATCH:       ; %bb.0:
3841; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3842; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
3843; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
3844; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
3845; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
3846; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
3847; GFX10-SCRATCH-NEXT:    global_load_dword v0, v[0:1], off
3848; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
3849; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
3850; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
3851; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
3852; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v2f16@rel32@lo+4
3853; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v2f16@rel32@hi+12
3854; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
3855; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
3856; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
3857; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
3858; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
3859; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
3860; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
3861; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
3862; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
3863; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
3864; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
3865; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
3866; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
3867  %val = load <2 x half>, <2 x half> addrspace(1)* undef
3868  call amdgpu_gfx void @external_void_func_v2f16(<2 x half> %val)
3869  ret void
3870}
3871
3872define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 {
3873; GFX9-LABEL: test_call_external_void_func_v2i32:
3874; GFX9:       ; %bb.0:
3875; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3876; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
3877; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3878; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
3879; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
3880; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
3881; GFX9-NEXT:    s_mov_b32 s33, s32
3882; GFX9-NEXT:    s_addk_i32 s32, 0x400
3883; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
3884; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
3885; GFX9-NEXT:    s_getpc_b64 s[34:35]
3886; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4
3887; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12
3888; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
3889; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
3890; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
3891; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
3892; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
3893; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
3894; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3895; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
3896; GFX9-NEXT:    s_waitcnt vmcnt(0)
3897; GFX9-NEXT:    s_setpc_b64 s[30:31]
3898;
3899; GFX10-LABEL: test_call_external_void_func_v2i32:
3900; GFX10:       ; %bb.0:
3901; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3902; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3903; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
3904; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3905; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
3906; GFX10-NEXT:    s_mov_b32 exec_lo, s34
3907; GFX10-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
3908; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
3909; GFX10-NEXT:    s_mov_b32 s33, s32
3910; GFX10-NEXT:    s_addk_i32 s32, 0x200
3911; GFX10-NEXT:    s_getpc_b64 s[34:35]
3912; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4
3913; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12
3914; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
3915; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
3916; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
3917; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
3918; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
3919; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
3920; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
3921; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
3922; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
3923; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
3924; GFX10-NEXT:    s_mov_b32 exec_lo, s34
3925; GFX10-NEXT:    s_waitcnt vmcnt(0)
3926; GFX10-NEXT:    s_setpc_b64 s[30:31]
3927;
3928; GFX11-LABEL: test_call_external_void_func_v2i32:
3929; GFX11:       ; %bb.0:
3930; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3931; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3932; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
3933; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
3934; GFX11-NEXT:    s_mov_b32 exec_lo, s0
3935; GFX11-NEXT:    global_load_b64 v[0:1], v[0:1], off
3936; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
3937; GFX11-NEXT:    s_mov_b32 s33, s32
3938; GFX11-NEXT:    s_add_i32 s32, s32, 16
3939; GFX11-NEXT:    s_getpc_b64 s[0:1]
3940; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v2i32@rel32@lo+4
3941; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v2i32@rel32@hi+12
3942; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
3943; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
3944; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
3945; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3946; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
3947; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
3948; GFX11-NEXT:    s_add_i32 s32, s32, -16
3949; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
3950; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
3951; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
3952; GFX11-NEXT:    s_mov_b32 exec_lo, s0
3953; GFX11-NEXT:    s_waitcnt vmcnt(0)
3954; GFX11-NEXT:    s_setpc_b64 s[30:31]
3955;
3956; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32:
3957; GFX10-SCRATCH:       ; %bb.0:
3958; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3959; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
3960; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
3961; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
3962; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
3963; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
3964; GFX10-SCRATCH-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
3965; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
3966; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
3967; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
3968; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
3969; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v2i32@rel32@lo+4
3970; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v2i32@rel32@hi+12
3971; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
3972; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
3973; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
3974; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
3975; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
3976; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
3977; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
3978; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
3979; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
3980; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
3981; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
3982; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
3983; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
3984  %val = load <2 x i32>, <2 x i32> addrspace(1)* undef
3985  call amdgpu_gfx void @external_void_func_v2i32(<2 x i32> %val)
3986  ret void
3987}
3988
3989define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 {
3990; GFX9-LABEL: test_call_external_void_func_v2i32_imm:
3991; GFX9:       ; %bb.0:
3992; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3993; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
3994; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
3995; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
3996; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
3997; GFX9-NEXT:    s_mov_b32 s33, s32
3998; GFX9-NEXT:    s_addk_i32 s32, 0x400
3999; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
4000; GFX9-NEXT:    v_mov_b32_e32 v0, 1
4001; GFX9-NEXT:    v_mov_b32_e32 v1, 2
4002; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
4003; GFX9-NEXT:    s_getpc_b64 s[34:35]
4004; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4
4005; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12
4006; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
4007; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
4008; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
4009; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
4010; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
4011; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
4012; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
4013; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
4014; GFX9-NEXT:    s_waitcnt vmcnt(0)
4015; GFX9-NEXT:    s_setpc_b64 s[30:31]
4016;
4017; GFX10-LABEL: test_call_external_void_func_v2i32_imm:
4018; GFX10:       ; %bb.0:
4019; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4020; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4021; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
4022; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
4023; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
4024; GFX10-NEXT:    s_mov_b32 exec_lo, s34
4025; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
4026; GFX10-NEXT:    v_mov_b32_e32 v0, 1
4027; GFX10-NEXT:    v_mov_b32_e32 v1, 2
4028; GFX10-NEXT:    s_mov_b32 s33, s32
4029; GFX10-NEXT:    s_addk_i32 s32, 0x200
4030; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
4031; GFX10-NEXT:    s_getpc_b64 s[34:35]
4032; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4
4033; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12
4034; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
4035; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
4036; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
4037; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
4038; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
4039; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
4040; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
4041; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
4042; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
4043; GFX10-NEXT:    s_mov_b32 exec_lo, s34
4044; GFX10-NEXT:    s_waitcnt vmcnt(0)
4045; GFX10-NEXT:    s_setpc_b64 s[30:31]
4046;
4047; GFX11-LABEL: test_call_external_void_func_v2i32_imm:
4048; GFX11:       ; %bb.0:
4049; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4050; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4051; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
4052; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
4053; GFX11-NEXT:    s_mov_b32 exec_lo, s0
4054; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
4055; GFX11-NEXT:    v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
4056; GFX11-NEXT:    s_mov_b32 s33, s32
4057; GFX11-NEXT:    s_add_i32 s32, s32, 16
4058; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
4059; GFX11-NEXT:    s_getpc_b64 s[0:1]
4060; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v2i32@rel32@lo+4
4061; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v2i32@rel32@hi+12
4062; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
4063; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
4064; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4065; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
4066; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
4067; GFX11-NEXT:    s_add_i32 s32, s32, -16
4068; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
4069; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
4070; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
4071; GFX11-NEXT:    s_mov_b32 exec_lo, s0
4072; GFX11-NEXT:    s_waitcnt vmcnt(0)
4073; GFX11-NEXT:    s_setpc_b64 s[30:31]
4074;
4075; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32_imm:
4076; GFX10-SCRATCH:       ; %bb.0:
4077; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4078; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
4079; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
4080; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
4081; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
4082; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
4083; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
4084; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 1
4085; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, 2
4086; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
4087; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
4088; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
4089; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
4090; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v2i32@rel32@lo+4
4091; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v2i32@rel32@hi+12
4092; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
4093; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
4094; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
4095; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
4096; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
4097; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
4098; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
4099; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
4100; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
4101; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
4102; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
4103; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
4104  call amdgpu_gfx void @external_void_func_v2i32(<2 x i32> <i32 1, i32 2>)
4105  ret void
4106}
4107
4108define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 {
4109; GFX9-LABEL: test_call_external_void_func_v3i32_imm:
4110; GFX9:       ; %bb.0:
4111; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4112; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
4113; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
4114; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
4115; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
4116; GFX9-NEXT:    s_mov_b32 s33, s32
4117; GFX9-NEXT:    s_addk_i32 s32, 0x400
4118; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
4119; GFX9-NEXT:    v_mov_b32_e32 v0, 3
4120; GFX9-NEXT:    v_mov_b32_e32 v1, 4
4121; GFX9-NEXT:    v_mov_b32_e32 v2, 5
4122; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
4123; GFX9-NEXT:    s_getpc_b64 s[34:35]
4124; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v3i32@rel32@lo+4
4125; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v3i32@rel32@hi+12
4126; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
4127; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
4128; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
4129; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
4130; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
4131; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
4132; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
4133; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
4134; GFX9-NEXT:    s_waitcnt vmcnt(0)
4135; GFX9-NEXT:    s_setpc_b64 s[30:31]
4136;
4137; GFX10-LABEL: test_call_external_void_func_v3i32_imm:
4138; GFX10:       ; %bb.0:
4139; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4140; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4141; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
4142; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
4143; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
4144; GFX10-NEXT:    s_mov_b32 exec_lo, s34
4145; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
4146; GFX10-NEXT:    v_mov_b32_e32 v0, 3
4147; GFX10-NEXT:    v_mov_b32_e32 v1, 4
4148; GFX10-NEXT:    v_mov_b32_e32 v2, 5
4149; GFX10-NEXT:    s_mov_b32 s33, s32
4150; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
4151; GFX10-NEXT:    s_addk_i32 s32, 0x200
4152; GFX10-NEXT:    s_getpc_b64 s[34:35]
4153; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v3i32@rel32@lo+4
4154; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v3i32@rel32@hi+12
4155; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
4156; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
4157; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
4158; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
4159; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
4160; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
4161; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
4162; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
4163; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
4164; GFX10-NEXT:    s_mov_b32 exec_lo, s34
4165; GFX10-NEXT:    s_waitcnt vmcnt(0)
4166; GFX10-NEXT:    s_setpc_b64 s[30:31]
4167;
4168; GFX11-LABEL: test_call_external_void_func_v3i32_imm:
4169; GFX11:       ; %bb.0:
4170; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4171; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4172; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
4173; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
4174; GFX11-NEXT:    s_mov_b32 exec_lo, s0
4175; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
4176; GFX11-NEXT:    v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 4
4177; GFX11-NEXT:    v_mov_b32_e32 v2, 5
4178; GFX11-NEXT:    s_mov_b32 s33, s32
4179; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
4180; GFX11-NEXT:    s_add_i32 s32, s32, 16
4181; GFX11-NEXT:    s_getpc_b64 s[0:1]
4182; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v3i32@rel32@lo+4
4183; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v3i32@rel32@hi+12
4184; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
4185; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
4186; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4187; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
4188; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
4189; GFX11-NEXT:    s_add_i32 s32, s32, -16
4190; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
4191; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
4192; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
4193; GFX11-NEXT:    s_mov_b32 exec_lo, s0
4194; GFX11-NEXT:    s_waitcnt vmcnt(0)
4195; GFX11-NEXT:    s_setpc_b64 s[30:31]
4196;
4197; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_imm:
4198; GFX10-SCRATCH:       ; %bb.0:
4199; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4200; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
4201; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
4202; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
4203; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
4204; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
4205; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
4206; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 3
4207; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, 4
4208; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v2, 5
4209; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
4210; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
4211; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
4212; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
4213; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v3i32@rel32@lo+4
4214; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v3i32@rel32@hi+12
4215; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
4216; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
4217; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
4218; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
4219; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
4220; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
4221; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
4222; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
4223; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
4224; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
4225; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
4226; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
4227  call amdgpu_gfx void @external_void_func_v3i32(<3 x i32> <i32 3, i32 4, i32 5>)
4228  ret void
4229}
4230
4231define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 {
4232; GFX9-LABEL: test_call_external_void_func_v3i32_i32:
4233; GFX9:       ; %bb.0:
4234; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4235; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
4236; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
4237; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
4238; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
4239; GFX9-NEXT:    s_mov_b32 s33, s32
4240; GFX9-NEXT:    s_addk_i32 s32, 0x400
4241; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
4242; GFX9-NEXT:    v_mov_b32_e32 v0, 3
4243; GFX9-NEXT:    v_mov_b32_e32 v1, 4
4244; GFX9-NEXT:    v_mov_b32_e32 v2, 5
4245; GFX9-NEXT:    v_mov_b32_e32 v3, 6
4246; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
4247; GFX9-NEXT:    s_getpc_b64 s[34:35]
4248; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v3i32_i32@rel32@lo+4
4249; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v3i32_i32@rel32@hi+12
4250; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
4251; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
4252; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
4253; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
4254; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
4255; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
4256; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
4257; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
4258; GFX9-NEXT:    s_waitcnt vmcnt(0)
4259; GFX9-NEXT:    s_setpc_b64 s[30:31]
4260;
4261; GFX10-LABEL: test_call_external_void_func_v3i32_i32:
4262; GFX10:       ; %bb.0:
4263; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4264; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4265; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
4266; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
4267; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
4268; GFX10-NEXT:    s_mov_b32 exec_lo, s34
4269; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
4270; GFX10-NEXT:    v_mov_b32_e32 v0, 3
4271; GFX10-NEXT:    v_mov_b32_e32 v1, 4
4272; GFX10-NEXT:    v_mov_b32_e32 v2, 5
4273; GFX10-NEXT:    v_mov_b32_e32 v3, 6
4274; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
4275; GFX10-NEXT:    s_mov_b32 s33, s32
4276; GFX10-NEXT:    s_addk_i32 s32, 0x200
4277; GFX10-NEXT:    s_getpc_b64 s[34:35]
4278; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v3i32_i32@rel32@lo+4
4279; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v3i32_i32@rel32@hi+12
4280; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
4281; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
4282; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
4283; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
4284; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
4285; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
4286; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
4287; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
4288; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
4289; GFX10-NEXT:    s_mov_b32 exec_lo, s34
4290; GFX10-NEXT:    s_waitcnt vmcnt(0)
4291; GFX10-NEXT:    s_setpc_b64 s[30:31]
4292;
4293; GFX11-LABEL: test_call_external_void_func_v3i32_i32:
4294; GFX11:       ; %bb.0:
4295; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4296; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4297; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
4298; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
4299; GFX11-NEXT:    s_mov_b32 exec_lo, s0
4300; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
4301; GFX11-NEXT:    v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 4
4302; GFX11-NEXT:    v_dual_mov_b32 v2, 5 :: v_dual_mov_b32 v3, 6
4303; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
4304; GFX11-NEXT:    s_mov_b32 s33, s32
4305; GFX11-NEXT:    s_add_i32 s32, s32, 16
4306; GFX11-NEXT:    s_getpc_b64 s[0:1]
4307; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v3i32_i32@rel32@lo+4
4308; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v3i32_i32@rel32@hi+12
4309; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
4310; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
4311; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4312; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
4313; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
4314; GFX11-NEXT:    s_add_i32 s32, s32, -16
4315; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
4316; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
4317; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
4318; GFX11-NEXT:    s_mov_b32 exec_lo, s0
4319; GFX11-NEXT:    s_waitcnt vmcnt(0)
4320; GFX11-NEXT:    s_setpc_b64 s[30:31]
4321;
4322; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_i32:
4323; GFX10-SCRATCH:       ; %bb.0:
4324; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4325; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
4326; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
4327; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
4328; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
4329; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
4330; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
4331; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 3
4332; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, 4
4333; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v2, 5
4334; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v3, 6
4335; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
4336; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
4337; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
4338; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
4339; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v3i32_i32@rel32@lo+4
4340; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v3i32_i32@rel32@hi+12
4341; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
4342; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
4343; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
4344; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
4345; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
4346; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
4347; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
4348; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
4349; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
4350; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
4351; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
4352; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
4353  call amdgpu_gfx void @external_void_func_v3i32_i32(<3 x i32> <i32 3, i32 4, i32 5>, i32 6)
4354  ret void
4355}
4356
4357define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 {
4358; GFX9-LABEL: test_call_external_void_func_v4i32:
4359; GFX9:       ; %bb.0:
4360; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4361; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
4362; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
4363; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
4364; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
4365; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
4366; GFX9-NEXT:    s_mov_b32 s33, s32
4367; GFX9-NEXT:    s_addk_i32 s32, 0x400
4368; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
4369; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
4370; GFX9-NEXT:    s_getpc_b64 s[34:35]
4371; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4
4372; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12
4373; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
4374; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
4375; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
4376; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
4377; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
4378; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
4379; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
4380; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
4381; GFX9-NEXT:    s_waitcnt vmcnt(0)
4382; GFX9-NEXT:    s_setpc_b64 s[30:31]
4383;
4384; GFX10-LABEL: test_call_external_void_func_v4i32:
4385; GFX10:       ; %bb.0:
4386; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4387; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4388; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
4389; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
4390; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
4391; GFX10-NEXT:    s_mov_b32 exec_lo, s34
4392; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
4393; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
4394; GFX10-NEXT:    s_mov_b32 s33, s32
4395; GFX10-NEXT:    s_addk_i32 s32, 0x200
4396; GFX10-NEXT:    s_getpc_b64 s[34:35]
4397; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4
4398; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12
4399; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
4400; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
4401; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
4402; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
4403; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
4404; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
4405; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
4406; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
4407; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
4408; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
4409; GFX10-NEXT:    s_mov_b32 exec_lo, s34
4410; GFX10-NEXT:    s_waitcnt vmcnt(0)
4411; GFX10-NEXT:    s_setpc_b64 s[30:31]
4412;
4413; GFX11-LABEL: test_call_external_void_func_v4i32:
4414; GFX11:       ; %bb.0:
4415; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4416; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4417; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
4418; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
4419; GFX11-NEXT:    s_mov_b32 exec_lo, s0
4420; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off
4421; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
4422; GFX11-NEXT:    s_mov_b32 s33, s32
4423; GFX11-NEXT:    s_add_i32 s32, s32, 16
4424; GFX11-NEXT:    s_getpc_b64 s[0:1]
4425; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v4i32@rel32@lo+4
4426; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v4i32@rel32@hi+12
4427; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
4428; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
4429; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
4430; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4431; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
4432; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
4433; GFX11-NEXT:    s_add_i32 s32, s32, -16
4434; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
4435; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
4436; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
4437; GFX11-NEXT:    s_mov_b32 exec_lo, s0
4438; GFX11-NEXT:    s_waitcnt vmcnt(0)
4439; GFX11-NEXT:    s_setpc_b64 s[30:31]
4440;
4441; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32:
4442; GFX10-SCRATCH:       ; %bb.0:
4443; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4444; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
4445; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
4446; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
4447; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
4448; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
4449; GFX10-SCRATCH-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
4450; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
4451; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
4452; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
4453; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
4454; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v4i32@rel32@lo+4
4455; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v4i32@rel32@hi+12
4456; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
4457; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
4458; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
4459; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
4460; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
4461; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
4462; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
4463; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
4464; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
4465; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
4466; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
4467; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
4468; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
4469  %val = load <4 x i32>, <4 x i32> addrspace(1)* undef
4470  call amdgpu_gfx void @external_void_func_v4i32(<4 x i32> %val)
4471  ret void
4472}
4473
4474define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 {
4475; GFX9-LABEL: test_call_external_void_func_v4i32_imm:
4476; GFX9:       ; %bb.0:
4477; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4478; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
4479; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
4480; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
4481; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
4482; GFX9-NEXT:    s_mov_b32 s33, s32
4483; GFX9-NEXT:    s_addk_i32 s32, 0x400
4484; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
4485; GFX9-NEXT:    v_mov_b32_e32 v0, 1
4486; GFX9-NEXT:    v_mov_b32_e32 v1, 2
4487; GFX9-NEXT:    v_mov_b32_e32 v2, 3
4488; GFX9-NEXT:    v_mov_b32_e32 v3, 4
4489; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
4490; GFX9-NEXT:    s_getpc_b64 s[34:35]
4491; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4
4492; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12
4493; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
4494; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
4495; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
4496; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
4497; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
4498; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
4499; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
4500; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
4501; GFX9-NEXT:    s_waitcnt vmcnt(0)
4502; GFX9-NEXT:    s_setpc_b64 s[30:31]
4503;
4504; GFX10-LABEL: test_call_external_void_func_v4i32_imm:
4505; GFX10:       ; %bb.0:
4506; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4507; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4508; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
4509; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
4510; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
4511; GFX10-NEXT:    s_mov_b32 exec_lo, s34
4512; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
4513; GFX10-NEXT:    v_mov_b32_e32 v0, 1
4514; GFX10-NEXT:    v_mov_b32_e32 v1, 2
4515; GFX10-NEXT:    v_mov_b32_e32 v2, 3
4516; GFX10-NEXT:    v_mov_b32_e32 v3, 4
4517; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
4518; GFX10-NEXT:    s_mov_b32 s33, s32
4519; GFX10-NEXT:    s_addk_i32 s32, 0x200
4520; GFX10-NEXT:    s_getpc_b64 s[34:35]
4521; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4
4522; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12
4523; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
4524; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
4525; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
4526; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
4527; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
4528; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
4529; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
4530; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
4531; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
4532; GFX10-NEXT:    s_mov_b32 exec_lo, s34
4533; GFX10-NEXT:    s_waitcnt vmcnt(0)
4534; GFX10-NEXT:    s_setpc_b64 s[30:31]
4535;
4536; GFX11-LABEL: test_call_external_void_func_v4i32_imm:
4537; GFX11:       ; %bb.0:
4538; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4539; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4540; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
4541; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
4542; GFX11-NEXT:    s_mov_b32 exec_lo, s0
4543; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
4544; GFX11-NEXT:    v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
4545; GFX11-NEXT:    v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
4546; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
4547; GFX11-NEXT:    s_mov_b32 s33, s32
4548; GFX11-NEXT:    s_add_i32 s32, s32, 16
4549; GFX11-NEXT:    s_getpc_b64 s[0:1]
4550; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v4i32@rel32@lo+4
4551; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v4i32@rel32@hi+12
4552; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
4553; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
4554; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4555; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
4556; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
4557; GFX11-NEXT:    s_add_i32 s32, s32, -16
4558; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
4559; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
4560; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
4561; GFX11-NEXT:    s_mov_b32 exec_lo, s0
4562; GFX11-NEXT:    s_waitcnt vmcnt(0)
4563; GFX11-NEXT:    s_setpc_b64 s[30:31]
4564;
4565; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32_imm:
4566; GFX10-SCRATCH:       ; %bb.0:
4567; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4568; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
4569; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
4570; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
4571; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
4572; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
4573; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
4574; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 1
4575; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, 2
4576; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v2, 3
4577; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v3, 4
4578; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
4579; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
4580; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
4581; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
4582; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v4i32@rel32@lo+4
4583; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v4i32@rel32@hi+12
4584; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
4585; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
4586; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
4587; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
4588; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
4589; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
4590; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
4591; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
4592; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
4593; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
4594; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
4595; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
4596  call amdgpu_gfx void @external_void_func_v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>)
4597  ret void
4598}
4599
4600define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 {
4601; GFX9-LABEL: test_call_external_void_func_v5i32_imm:
4602; GFX9:       ; %bb.0:
4603; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4604; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
4605; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
4606; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
4607; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
4608; GFX9-NEXT:    s_mov_b32 s33, s32
4609; GFX9-NEXT:    s_addk_i32 s32, 0x400
4610; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
4611; GFX9-NEXT:    v_mov_b32_e32 v0, 1
4612; GFX9-NEXT:    v_mov_b32_e32 v1, 2
4613; GFX9-NEXT:    v_mov_b32_e32 v2, 3
4614; GFX9-NEXT:    v_mov_b32_e32 v3, 4
4615; GFX9-NEXT:    v_mov_b32_e32 v4, 5
4616; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
4617; GFX9-NEXT:    s_getpc_b64 s[34:35]
4618; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v5i32@rel32@lo+4
4619; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v5i32@rel32@hi+12
4620; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
4621; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
4622; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
4623; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
4624; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
4625; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
4626; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
4627; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
4628; GFX9-NEXT:    s_waitcnt vmcnt(0)
4629; GFX9-NEXT:    s_setpc_b64 s[30:31]
4630;
4631; GFX10-LABEL: test_call_external_void_func_v5i32_imm:
4632; GFX10:       ; %bb.0:
4633; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4634; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4635; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
4636; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
4637; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
4638; GFX10-NEXT:    s_mov_b32 exec_lo, s34
4639; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
4640; GFX10-NEXT:    v_mov_b32_e32 v0, 1
4641; GFX10-NEXT:    v_mov_b32_e32 v1, 2
4642; GFX10-NEXT:    v_mov_b32_e32 v2, 3
4643; GFX10-NEXT:    v_mov_b32_e32 v3, 4
4644; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
4645; GFX10-NEXT:    v_mov_b32_e32 v4, 5
4646; GFX10-NEXT:    s_mov_b32 s33, s32
4647; GFX10-NEXT:    s_addk_i32 s32, 0x200
4648; GFX10-NEXT:    s_getpc_b64 s[34:35]
4649; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v5i32@rel32@lo+4
4650; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v5i32@rel32@hi+12
4651; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
4652; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
4653; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
4654; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
4655; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
4656; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
4657; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
4658; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
4659; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
4660; GFX10-NEXT:    s_mov_b32 exec_lo, s34
4661; GFX10-NEXT:    s_waitcnt vmcnt(0)
4662; GFX10-NEXT:    s_setpc_b64 s[30:31]
4663;
4664; GFX11-LABEL: test_call_external_void_func_v5i32_imm:
4665; GFX11:       ; %bb.0:
4666; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4667; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4668; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
4669; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
4670; GFX11-NEXT:    s_mov_b32 exec_lo, s0
4671; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
4672; GFX11-NEXT:    v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
4673; GFX11-NEXT:    v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
4674; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
4675; GFX11-NEXT:    v_mov_b32_e32 v4, 5
4676; GFX11-NEXT:    s_mov_b32 s33, s32
4677; GFX11-NEXT:    s_add_i32 s32, s32, 16
4678; GFX11-NEXT:    s_getpc_b64 s[0:1]
4679; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v5i32@rel32@lo+4
4680; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v5i32@rel32@hi+12
4681; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
4682; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
4683; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4684; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
4685; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
4686; GFX11-NEXT:    s_add_i32 s32, s32, -16
4687; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
4688; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
4689; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
4690; GFX11-NEXT:    s_mov_b32 exec_lo, s0
4691; GFX11-NEXT:    s_waitcnt vmcnt(0)
4692; GFX11-NEXT:    s_setpc_b64 s[30:31]
4693;
4694; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5i32_imm:
4695; GFX10-SCRATCH:       ; %bb.0:
4696; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4697; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
4698; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
4699; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
4700; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
4701; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
4702; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
4703; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 1
4704; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, 2
4705; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v2, 3
4706; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v3, 4
4707; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
4708; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v4, 5
4709; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
4710; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
4711; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
4712; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v5i32@rel32@lo+4
4713; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v5i32@rel32@hi+12
4714; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
4715; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
4716; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
4717; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
4718; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
4719; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
4720; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
4721; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
4722; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
4723; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
4724; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
4725; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
4726  call amdgpu_gfx void @external_void_func_v5i32(<5 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5>)
4727  ret void
4728}
4729
4730define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 {
4731; GFX9-LABEL: test_call_external_void_func_v8i32:
4732; GFX9:       ; %bb.0:
4733; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4734; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
4735; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
4736; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
4737; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
4738; GFX9-NEXT:    v_mov_b32_e32 v8, 0
4739; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
4740; GFX9-NEXT:    s_mov_b32 s33, s32
4741; GFX9-NEXT:    s_addk_i32 s32, 0x400
4742; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
4743; GFX9-NEXT:    global_load_dwordx4 v[0:3], v8, s[34:35]
4744; GFX9-NEXT:    global_load_dwordx4 v[4:7], v8, s[34:35] offset:16
4745; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
4746; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
4747; GFX9-NEXT:    s_getpc_b64 s[34:35]
4748; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4
4749; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12
4750; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
4751; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
4752; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
4753; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
4754; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
4755; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
4756; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
4757; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
4758; GFX9-NEXT:    s_waitcnt vmcnt(0)
4759; GFX9-NEXT:    s_setpc_b64 s[30:31]
4760;
4761; GFX10-LABEL: test_call_external_void_func_v8i32:
4762; GFX10:       ; %bb.0:
4763; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4764; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4765; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
4766; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
4767; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
4768; GFX10-NEXT:    s_mov_b32 exec_lo, s34
4769; GFX10-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
4770; GFX10-NEXT:    v_mov_b32_e32 v8, 0
4771; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
4772; GFX10-NEXT:    s_mov_b32 s33, s32
4773; GFX10-NEXT:    s_addk_i32 s32, 0x200
4774; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
4775; GFX10-NEXT:    s_clause 0x1
4776; GFX10-NEXT:    global_load_dwordx4 v[0:3], v8, s[34:35]
4777; GFX10-NEXT:    global_load_dwordx4 v[4:7], v8, s[34:35] offset:16
4778; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
4779; GFX10-NEXT:    s_getpc_b64 s[34:35]
4780; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4
4781; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12
4782; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
4783; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
4784; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
4785; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
4786; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
4787; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
4788; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
4789; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
4790; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
4791; GFX10-NEXT:    s_mov_b32 exec_lo, s34
4792; GFX10-NEXT:    s_waitcnt vmcnt(0)
4793; GFX10-NEXT:    s_setpc_b64 s[30:31]
4794;
4795; GFX11-LABEL: test_call_external_void_func_v8i32:
4796; GFX11:       ; %bb.0:
4797; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4798; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4799; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
4800; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
4801; GFX11-NEXT:    s_mov_b32 exec_lo, s0
4802; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
4803; GFX11-NEXT:    v_mov_b32_e32 v4, 0
4804; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
4805; GFX11-NEXT:    s_mov_b32 s33, s32
4806; GFX11-NEXT:    s_add_i32 s32, s32, 16
4807; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
4808; GFX11-NEXT:    s_clause 0x1
4809; GFX11-NEXT:    global_load_b128 v[0:3], v4, s[0:1]
4810; GFX11-NEXT:    global_load_b128 v[4:7], v4, s[0:1] offset:16
4811; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
4812; GFX11-NEXT:    s_getpc_b64 s[0:1]
4813; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v8i32@rel32@lo+4
4814; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12
4815; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
4816; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
4817; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4818; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
4819; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
4820; GFX11-NEXT:    s_add_i32 s32, s32, -16
4821; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
4822; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
4823; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
4824; GFX11-NEXT:    s_mov_b32 exec_lo, s0
4825; GFX11-NEXT:    s_waitcnt vmcnt(0)
4826; GFX11-NEXT:    s_setpc_b64 s[30:31]
4827;
4828; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32:
4829; GFX10-SCRATCH:       ; %bb.0:
4830; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4831; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
4832; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
4833; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
4834; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
4835; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
4836; GFX10-SCRATCH-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
4837; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v8, 0
4838; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
4839; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
4840; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
4841; GFX10-SCRATCH-NEXT:    s_waitcnt lgkmcnt(0)
4842; GFX10-SCRATCH-NEXT:    s_clause 0x1
4843; GFX10-SCRATCH-NEXT:    global_load_dwordx4 v[0:3], v8, s[0:1]
4844; GFX10-SCRATCH-NEXT:    global_load_dwordx4 v[4:7], v8, s[0:1] offset:16
4845; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
4846; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
4847; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v8i32@rel32@lo+4
4848; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12
4849; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
4850; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
4851; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
4852; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
4853; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
4854; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
4855; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
4856; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
4857; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
4858; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
4859; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
4860; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
4861  %ptr = load <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef
4862  %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr
4863  call amdgpu_gfx void @external_void_func_v8i32(<8 x i32> %val)
4864  ret void
4865}
4866
4867define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 {
4868; GFX9-LABEL: test_call_external_void_func_v8i32_imm:
4869; GFX9:       ; %bb.0:
4870; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4871; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
4872; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
4873; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
4874; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
4875; GFX9-NEXT:    s_mov_b32 s33, s32
4876; GFX9-NEXT:    s_addk_i32 s32, 0x400
4877; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
4878; GFX9-NEXT:    v_mov_b32_e32 v0, 1
4879; GFX9-NEXT:    v_mov_b32_e32 v1, 2
4880; GFX9-NEXT:    v_mov_b32_e32 v2, 3
4881; GFX9-NEXT:    v_mov_b32_e32 v3, 4
4882; GFX9-NEXT:    v_mov_b32_e32 v4, 5
4883; GFX9-NEXT:    v_mov_b32_e32 v5, 6
4884; GFX9-NEXT:    v_mov_b32_e32 v6, 7
4885; GFX9-NEXT:    v_mov_b32_e32 v7, 8
4886; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
4887; GFX9-NEXT:    s_getpc_b64 s[34:35]
4888; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4
4889; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12
4890; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
4891; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
4892; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
4893; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
4894; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
4895; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
4896; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
4897; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
4898; GFX9-NEXT:    s_waitcnt vmcnt(0)
4899; GFX9-NEXT:    s_setpc_b64 s[30:31]
4900;
4901; GFX10-LABEL: test_call_external_void_func_v8i32_imm:
4902; GFX10:       ; %bb.0:
4903; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4904; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4905; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
4906; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
4907; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
4908; GFX10-NEXT:    s_mov_b32 exec_lo, s34
4909; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
4910; GFX10-NEXT:    v_mov_b32_e32 v0, 1
4911; GFX10-NEXT:    v_mov_b32_e32 v1, 2
4912; GFX10-NEXT:    v_mov_b32_e32 v2, 3
4913; GFX10-NEXT:    v_mov_b32_e32 v3, 4
4914; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
4915; GFX10-NEXT:    v_mov_b32_e32 v4, 5
4916; GFX10-NEXT:    v_mov_b32_e32 v5, 6
4917; GFX10-NEXT:    v_mov_b32_e32 v6, 7
4918; GFX10-NEXT:    v_mov_b32_e32 v7, 8
4919; GFX10-NEXT:    s_mov_b32 s33, s32
4920; GFX10-NEXT:    s_addk_i32 s32, 0x200
4921; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
4922; GFX10-NEXT:    s_getpc_b64 s[34:35]
4923; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4
4924; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12
4925; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
4926; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
4927; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
4928; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
4929; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
4930; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
4931; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
4932; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
4933; GFX10-NEXT:    s_mov_b32 exec_lo, s34
4934; GFX10-NEXT:    s_waitcnt vmcnt(0)
4935; GFX10-NEXT:    s_setpc_b64 s[30:31]
4936;
4937; GFX11-LABEL: test_call_external_void_func_v8i32_imm:
4938; GFX11:       ; %bb.0:
4939; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4940; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4941; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
4942; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
4943; GFX11-NEXT:    s_mov_b32 exec_lo, s0
4944; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
4945; GFX11-NEXT:    v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
4946; GFX11-NEXT:    v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
4947; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
4948; GFX11-NEXT:    v_dual_mov_b32 v4, 5 :: v_dual_mov_b32 v5, 6
4949; GFX11-NEXT:    v_dual_mov_b32 v6, 7 :: v_dual_mov_b32 v7, 8
4950; GFX11-NEXT:    s_mov_b32 s33, s32
4951; GFX11-NEXT:    s_add_i32 s32, s32, 16
4952; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
4953; GFX11-NEXT:    s_getpc_b64 s[0:1]
4954; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v8i32@rel32@lo+4
4955; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12
4956; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
4957; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
4958; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
4959; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
4960; GFX11-NEXT:    s_add_i32 s32, s32, -16
4961; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
4962; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
4963; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
4964; GFX11-NEXT:    s_mov_b32 exec_lo, s0
4965; GFX11-NEXT:    s_waitcnt vmcnt(0)
4966; GFX11-NEXT:    s_setpc_b64 s[30:31]
4967;
4968; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32_imm:
4969; GFX10-SCRATCH:       ; %bb.0:
4970; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4971; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
4972; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
4973; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
4974; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
4975; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
4976; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
4977; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 1
4978; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, 2
4979; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v2, 3
4980; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v3, 4
4981; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
4982; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v4, 5
4983; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v5, 6
4984; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v6, 7
4985; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v7, 8
4986; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
4987; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
4988; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
4989; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
4990; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v8i32@rel32@lo+4
4991; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12
4992; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
4993; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
4994; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
4995; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
4996; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
4997; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
4998; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
4999; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
5000; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
5001; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
5002; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
5003  call amdgpu_gfx void @external_void_func_v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>)
5004  ret void
5005}
5006
5007define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 {
5008; GFX9-LABEL: test_call_external_void_func_v16i32:
5009; GFX9:       ; %bb.0:
5010; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5011; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
5012; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5013; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
5014; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
5015; GFX9-NEXT:    v_mov_b32_e32 v16, 0
5016; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
5017; GFX9-NEXT:    s_mov_b32 s33, s32
5018; GFX9-NEXT:    s_addk_i32 s32, 0x400
5019; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
5020; GFX9-NEXT:    global_load_dwordx4 v[0:3], v16, s[34:35]
5021; GFX9-NEXT:    global_load_dwordx4 v[4:7], v16, s[34:35] offset:16
5022; GFX9-NEXT:    global_load_dwordx4 v[8:11], v16, s[34:35] offset:32
5023; GFX9-NEXT:    global_load_dwordx4 v[12:15], v16, s[34:35] offset:48
5024; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
5025; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
5026; GFX9-NEXT:    s_getpc_b64 s[34:35]
5027; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v16i32@rel32@lo+4
5028; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v16i32@rel32@hi+12
5029; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
5030; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
5031; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
5032; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
5033; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
5034; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
5035; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5036; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
5037; GFX9-NEXT:    s_waitcnt vmcnt(0)
5038; GFX9-NEXT:    s_setpc_b64 s[30:31]
5039;
5040; GFX10-LABEL: test_call_external_void_func_v16i32:
5041; GFX10:       ; %bb.0:
5042; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5043; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
5044; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
5045; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5046; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
5047; GFX10-NEXT:    s_mov_b32 exec_lo, s34
5048; GFX10-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
5049; GFX10-NEXT:    v_mov_b32_e32 v16, 0
5050; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
5051; GFX10-NEXT:    s_mov_b32 s33, s32
5052; GFX10-NEXT:    s_addk_i32 s32, 0x200
5053; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
5054; GFX10-NEXT:    s_clause 0x3
5055; GFX10-NEXT:    global_load_dwordx4 v[0:3], v16, s[34:35]
5056; GFX10-NEXT:    global_load_dwordx4 v[4:7], v16, s[34:35] offset:16
5057; GFX10-NEXT:    global_load_dwordx4 v[8:11], v16, s[34:35] offset:32
5058; GFX10-NEXT:    global_load_dwordx4 v[12:15], v16, s[34:35] offset:48
5059; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
5060; GFX10-NEXT:    s_getpc_b64 s[34:35]
5061; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v16i32@rel32@lo+4
5062; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v16i32@rel32@hi+12
5063; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
5064; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
5065; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
5066; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
5067; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
5068; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
5069; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
5070; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5071; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
5072; GFX10-NEXT:    s_mov_b32 exec_lo, s34
5073; GFX10-NEXT:    s_waitcnt vmcnt(0)
5074; GFX10-NEXT:    s_setpc_b64 s[30:31]
5075;
5076; GFX11-LABEL: test_call_external_void_func_v16i32:
5077; GFX11:       ; %bb.0:
5078; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5079; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
5080; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
5081; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
5082; GFX11-NEXT:    s_mov_b32 exec_lo, s0
5083; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
5084; GFX11-NEXT:    v_mov_b32_e32 v12, 0
5085; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
5086; GFX11-NEXT:    s_mov_b32 s33, s32
5087; GFX11-NEXT:    s_add_i32 s32, s32, 16
5088; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
5089; GFX11-NEXT:    s_clause 0x3
5090; GFX11-NEXT:    global_load_b128 v[0:3], v12, s[0:1]
5091; GFX11-NEXT:    global_load_b128 v[4:7], v12, s[0:1] offset:16
5092; GFX11-NEXT:    global_load_b128 v[8:11], v12, s[0:1] offset:32
5093; GFX11-NEXT:    global_load_b128 v[12:15], v12, s[0:1] offset:48
5094; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
5095; GFX11-NEXT:    s_getpc_b64 s[0:1]
5096; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v16i32@rel32@lo+4
5097; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v16i32@rel32@hi+12
5098; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
5099; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
5100; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
5101; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
5102; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
5103; GFX11-NEXT:    s_add_i32 s32, s32, -16
5104; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
5105; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
5106; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
5107; GFX11-NEXT:    s_mov_b32 exec_lo, s0
5108; GFX11-NEXT:    s_waitcnt vmcnt(0)
5109; GFX11-NEXT:    s_setpc_b64 s[30:31]
5110;
5111; GFX10-SCRATCH-LABEL: test_call_external_void_func_v16i32:
5112; GFX10-SCRATCH:       ; %bb.0:
5113; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5114; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
5115; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
5116; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
5117; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
5118; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
5119; GFX10-SCRATCH-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
5120; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v16, 0
5121; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
5122; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
5123; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
5124; GFX10-SCRATCH-NEXT:    s_waitcnt lgkmcnt(0)
5125; GFX10-SCRATCH-NEXT:    s_clause 0x3
5126; GFX10-SCRATCH-NEXT:    global_load_dwordx4 v[0:3], v16, s[0:1]
5127; GFX10-SCRATCH-NEXT:    global_load_dwordx4 v[4:7], v16, s[0:1] offset:16
5128; GFX10-SCRATCH-NEXT:    global_load_dwordx4 v[8:11], v16, s[0:1] offset:32
5129; GFX10-SCRATCH-NEXT:    global_load_dwordx4 v[12:15], v16, s[0:1] offset:48
5130; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
5131; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
5132; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v16i32@rel32@lo+4
5133; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v16i32@rel32@hi+12
5134; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
5135; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
5136; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
5137; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
5138; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
5139; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
5140; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
5141; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
5142; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
5143; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
5144; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
5145; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
5146  %ptr = load <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef
5147  %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr
5148  call amdgpu_gfx void @external_void_func_v16i32(<16 x i32> %val)
5149  ret void
5150}
5151
5152define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 {
5153; GFX9-LABEL: test_call_external_void_func_v32i32:
5154; GFX9:       ; %bb.0:
5155; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5156; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
5157; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5158; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
5159; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
5160; GFX9-NEXT:    v_mov_b32_e32 v28, 0
5161; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
5162; GFX9-NEXT:    s_mov_b32 s33, s32
5163; GFX9-NEXT:    s_addk_i32 s32, 0x400
5164; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
5165; GFX9-NEXT:    global_load_dwordx4 v[0:3], v28, s[34:35]
5166; GFX9-NEXT:    global_load_dwordx4 v[4:7], v28, s[34:35] offset:16
5167; GFX9-NEXT:    global_load_dwordx4 v[8:11], v28, s[34:35] offset:32
5168; GFX9-NEXT:    global_load_dwordx4 v[12:15], v28, s[34:35] offset:48
5169; GFX9-NEXT:    global_load_dwordx4 v[16:19], v28, s[34:35] offset:64
5170; GFX9-NEXT:    global_load_dwordx4 v[20:23], v28, s[34:35] offset:80
5171; GFX9-NEXT:    global_load_dwordx4 v[24:27], v28, s[34:35] offset:96
5172; GFX9-NEXT:    s_nop 0
5173; GFX9-NEXT:    global_load_dwordx4 v[28:31], v28, s[34:35] offset:112
5174; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
5175; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
5176; GFX9-NEXT:    s_getpc_b64 s[34:35]
5177; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v32i32@rel32@lo+4
5178; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v32i32@rel32@hi+12
5179; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
5180; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
5181; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
5182; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
5183; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
5184; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
5185; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5186; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
5187; GFX9-NEXT:    s_waitcnt vmcnt(0)
5188; GFX9-NEXT:    s_setpc_b64 s[30:31]
5189;
5190; GFX10-LABEL: test_call_external_void_func_v32i32:
5191; GFX10:       ; %bb.0:
5192; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5193; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
5194; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
5195; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5196; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
5197; GFX10-NEXT:    s_mov_b32 exec_lo, s34
5198; GFX10-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
5199; GFX10-NEXT:    v_mov_b32_e32 v32, 0
5200; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
5201; GFX10-NEXT:    s_mov_b32 s33, s32
5202; GFX10-NEXT:    s_addk_i32 s32, 0x200
5203; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
5204; GFX10-NEXT:    s_clause 0x7
5205; GFX10-NEXT:    global_load_dwordx4 v[0:3], v32, s[34:35]
5206; GFX10-NEXT:    global_load_dwordx4 v[4:7], v32, s[34:35] offset:16
5207; GFX10-NEXT:    global_load_dwordx4 v[8:11], v32, s[34:35] offset:32
5208; GFX10-NEXT:    global_load_dwordx4 v[12:15], v32, s[34:35] offset:48
5209; GFX10-NEXT:    global_load_dwordx4 v[16:19], v32, s[34:35] offset:64
5210; GFX10-NEXT:    global_load_dwordx4 v[20:23], v32, s[34:35] offset:80
5211; GFX10-NEXT:    global_load_dwordx4 v[24:27], v32, s[34:35] offset:96
5212; GFX10-NEXT:    global_load_dwordx4 v[28:31], v32, s[34:35] offset:112
5213; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
5214; GFX10-NEXT:    s_getpc_b64 s[34:35]
5215; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v32i32@rel32@lo+4
5216; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v32i32@rel32@hi+12
5217; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
5218; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
5219; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
5220; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
5221; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
5222; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
5223; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
5224; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5225; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
5226; GFX10-NEXT:    s_mov_b32 exec_lo, s34
5227; GFX10-NEXT:    s_waitcnt vmcnt(0)
5228; GFX10-NEXT:    s_setpc_b64 s[30:31]
5229;
5230; GFX11-LABEL: test_call_external_void_func_v32i32:
5231; GFX11:       ; %bb.0:
5232; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5233; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
5234; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
5235; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
5236; GFX11-NEXT:    s_mov_b32 exec_lo, s0
5237; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
5238; GFX11-NEXT:    v_mov_b32_e32 v28, 0
5239; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
5240; GFX11-NEXT:    s_mov_b32 s33, s32
5241; GFX11-NEXT:    s_add_i32 s32, s32, 16
5242; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
5243; GFX11-NEXT:    s_clause 0x7
5244; GFX11-NEXT:    global_load_b128 v[0:3], v28, s[0:1]
5245; GFX11-NEXT:    global_load_b128 v[4:7], v28, s[0:1] offset:16
5246; GFX11-NEXT:    global_load_b128 v[8:11], v28, s[0:1] offset:32
5247; GFX11-NEXT:    global_load_b128 v[12:15], v28, s[0:1] offset:48
5248; GFX11-NEXT:    global_load_b128 v[16:19], v28, s[0:1] offset:64
5249; GFX11-NEXT:    global_load_b128 v[20:23], v28, s[0:1] offset:80
5250; GFX11-NEXT:    global_load_b128 v[24:27], v28, s[0:1] offset:96
5251; GFX11-NEXT:    global_load_b128 v[28:31], v28, s[0:1] offset:112
5252; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
5253; GFX11-NEXT:    s_getpc_b64 s[0:1]
5254; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v32i32@rel32@lo+4
5255; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v32i32@rel32@hi+12
5256; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
5257; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
5258; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
5259; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
5260; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
5261; GFX11-NEXT:    s_add_i32 s32, s32, -16
5262; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
5263; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
5264; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
5265; GFX11-NEXT:    s_mov_b32 exec_lo, s0
5266; GFX11-NEXT:    s_waitcnt vmcnt(0)
5267; GFX11-NEXT:    s_setpc_b64 s[30:31]
5268;
5269; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32:
5270; GFX10-SCRATCH:       ; %bb.0:
5271; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5272; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
5273; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
5274; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
5275; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
5276; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
5277; GFX10-SCRATCH-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
5278; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v32, 0
5279; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
5280; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
5281; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
5282; GFX10-SCRATCH-NEXT:    s_waitcnt lgkmcnt(0)
5283; GFX10-SCRATCH-NEXT:    s_clause 0x7
5284; GFX10-SCRATCH-NEXT:    global_load_dwordx4 v[0:3], v32, s[0:1]
5285; GFX10-SCRATCH-NEXT:    global_load_dwordx4 v[4:7], v32, s[0:1] offset:16
5286; GFX10-SCRATCH-NEXT:    global_load_dwordx4 v[8:11], v32, s[0:1] offset:32
5287; GFX10-SCRATCH-NEXT:    global_load_dwordx4 v[12:15], v32, s[0:1] offset:48
5288; GFX10-SCRATCH-NEXT:    global_load_dwordx4 v[16:19], v32, s[0:1] offset:64
5289; GFX10-SCRATCH-NEXT:    global_load_dwordx4 v[20:23], v32, s[0:1] offset:80
5290; GFX10-SCRATCH-NEXT:    global_load_dwordx4 v[24:27], v32, s[0:1] offset:96
5291; GFX10-SCRATCH-NEXT:    global_load_dwordx4 v[28:31], v32, s[0:1] offset:112
5292; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
5293; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
5294; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v32i32@rel32@lo+4
5295; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v32i32@rel32@hi+12
5296; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
5297; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
5298; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
5299; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
5300; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
5301; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
5302; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
5303; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
5304; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
5305; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
5306; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
5307; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
5308  %ptr = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
5309  %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr
5310  call amdgpu_gfx void @external_void_func_v32i32(<32 x i32> %val)
5311  ret void
5312}
5313
5314define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 {
5315; GFX9-LABEL: test_call_external_void_func_v32i32_i32:
5316; GFX9:       ; %bb.0:
5317; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5318; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
5319; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5320; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
5321; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
5322; GFX9-NEXT:    v_mov_b32_e32 v28, 0
5323; GFX9-NEXT:    global_load_dword v32, v[0:1], off
5324; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
5325; GFX9-NEXT:    s_mov_b32 s33, s32
5326; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
5327; GFX9-NEXT:    global_load_dwordx4 v[0:3], v28, s[34:35]
5328; GFX9-NEXT:    global_load_dwordx4 v[4:7], v28, s[34:35] offset:16
5329; GFX9-NEXT:    global_load_dwordx4 v[8:11], v28, s[34:35] offset:32
5330; GFX9-NEXT:    global_load_dwordx4 v[12:15], v28, s[34:35] offset:48
5331; GFX9-NEXT:    global_load_dwordx4 v[16:19], v28, s[34:35] offset:64
5332; GFX9-NEXT:    global_load_dwordx4 v[20:23], v28, s[34:35] offset:80
5333; GFX9-NEXT:    global_load_dwordx4 v[24:27], v28, s[34:35] offset:96
5334; GFX9-NEXT:    s_nop 0
5335; GFX9-NEXT:    global_load_dwordx4 v[28:31], v28, s[34:35] offset:112
5336; GFX9-NEXT:    s_addk_i32 s32, 0x400
5337; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
5338; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
5339; GFX9-NEXT:    s_getpc_b64 s[34:35]
5340; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v32i32_i32@rel32@lo+4
5341; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v32i32_i32@rel32@hi+12
5342; GFX9-NEXT:    s_waitcnt vmcnt(8)
5343; GFX9-NEXT:    buffer_store_dword v32, off, s[0:3], s32
5344; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
5345; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
5346; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
5347; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
5348; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
5349; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
5350; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5351; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
5352; GFX9-NEXT:    s_waitcnt vmcnt(0)
5353; GFX9-NEXT:    s_setpc_b64 s[30:31]
5354;
5355; GFX10-LABEL: test_call_external_void_func_v32i32_i32:
5356; GFX10:       ; %bb.0:
5357; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5358; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
5359; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
5360; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5361; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
5362; GFX10-NEXT:    s_mov_b32 exec_lo, s34
5363; GFX10-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
5364; GFX10-NEXT:    v_mov_b32_e32 v32, 0
5365; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
5366; GFX10-NEXT:    s_mov_b32 s33, s32
5367; GFX10-NEXT:    s_addk_i32 s32, 0x200
5368; GFX10-NEXT:    global_load_dword v33, v[0:1], off
5369; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
5370; GFX10-NEXT:    s_clause 0x7
5371; GFX10-NEXT:    global_load_dwordx4 v[0:3], v32, s[34:35]
5372; GFX10-NEXT:    global_load_dwordx4 v[4:7], v32, s[34:35] offset:16
5373; GFX10-NEXT:    global_load_dwordx4 v[8:11], v32, s[34:35] offset:32
5374; GFX10-NEXT:    global_load_dwordx4 v[12:15], v32, s[34:35] offset:48
5375; GFX10-NEXT:    global_load_dwordx4 v[16:19], v32, s[34:35] offset:64
5376; GFX10-NEXT:    global_load_dwordx4 v[20:23], v32, s[34:35] offset:80
5377; GFX10-NEXT:    global_load_dwordx4 v[24:27], v32, s[34:35] offset:96
5378; GFX10-NEXT:    global_load_dwordx4 v[28:31], v32, s[34:35] offset:112
5379; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
5380; GFX10-NEXT:    s_getpc_b64 s[34:35]
5381; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v32i32_i32@rel32@lo+4
5382; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v32i32_i32@rel32@hi+12
5383; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
5384; GFX10-NEXT:    s_waitcnt vmcnt(8)
5385; GFX10-NEXT:    buffer_store_dword v33, off, s[0:3], s32
5386; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
5387; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
5388; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
5389; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
5390; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
5391; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
5392; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5393; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
5394; GFX10-NEXT:    s_mov_b32 exec_lo, s34
5395; GFX10-NEXT:    s_waitcnt vmcnt(0)
5396; GFX10-NEXT:    s_setpc_b64 s[30:31]
5397;
5398; GFX11-LABEL: test_call_external_void_func_v32i32_i32:
5399; GFX11:       ; %bb.0:
5400; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5401; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
5402; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
5403; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
5404; GFX11-NEXT:    s_mov_b32 exec_lo, s0
5405; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
5406; GFX11-NEXT:    v_mov_b32_e32 v28, 0
5407; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
5408; GFX11-NEXT:    s_mov_b32 s33, s32
5409; GFX11-NEXT:    s_add_i32 s32, s32, 16
5410; GFX11-NEXT:    global_load_b32 v32, v[0:1], off
5411; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
5412; GFX11-NEXT:    s_clause 0x7
5413; GFX11-NEXT:    global_load_b128 v[0:3], v28, s[0:1]
5414; GFX11-NEXT:    global_load_b128 v[4:7], v28, s[0:1] offset:16
5415; GFX11-NEXT:    global_load_b128 v[8:11], v28, s[0:1] offset:32
5416; GFX11-NEXT:    global_load_b128 v[12:15], v28, s[0:1] offset:48
5417; GFX11-NEXT:    global_load_b128 v[16:19], v28, s[0:1] offset:64
5418; GFX11-NEXT:    global_load_b128 v[20:23], v28, s[0:1] offset:80
5419; GFX11-NEXT:    global_load_b128 v[24:27], v28, s[0:1] offset:96
5420; GFX11-NEXT:    global_load_b128 v[28:31], v28, s[0:1] offset:112
5421; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
5422; GFX11-NEXT:    s_getpc_b64 s[0:1]
5423; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v32i32_i32@rel32@lo+4
5424; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v32i32_i32@rel32@hi+12
5425; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
5426; GFX11-NEXT:    s_waitcnt vmcnt(8)
5427; GFX11-NEXT:    scratch_store_b32 off, v32, s32
5428; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
5429; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
5430; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
5431; GFX11-NEXT:    s_add_i32 s32, s32, -16
5432; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
5433; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
5434; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
5435; GFX11-NEXT:    s_mov_b32 exec_lo, s0
5436; GFX11-NEXT:    s_waitcnt vmcnt(0)
5437; GFX11-NEXT:    s_setpc_b64 s[30:31]
5438;
5439; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32_i32:
5440; GFX10-SCRATCH:       ; %bb.0:
5441; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5442; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
5443; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
5444; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
5445; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
5446; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
5447; GFX10-SCRATCH-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
5448; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v32, 0
5449; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
5450; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
5451; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
5452; GFX10-SCRATCH-NEXT:    global_load_dword v33, v[0:1], off
5453; GFX10-SCRATCH-NEXT:    s_waitcnt lgkmcnt(0)
5454; GFX10-SCRATCH-NEXT:    s_clause 0x7
5455; GFX10-SCRATCH-NEXT:    global_load_dwordx4 v[0:3], v32, s[0:1]
5456; GFX10-SCRATCH-NEXT:    global_load_dwordx4 v[4:7], v32, s[0:1] offset:16
5457; GFX10-SCRATCH-NEXT:    global_load_dwordx4 v[8:11], v32, s[0:1] offset:32
5458; GFX10-SCRATCH-NEXT:    global_load_dwordx4 v[12:15], v32, s[0:1] offset:48
5459; GFX10-SCRATCH-NEXT:    global_load_dwordx4 v[16:19], v32, s[0:1] offset:64
5460; GFX10-SCRATCH-NEXT:    global_load_dwordx4 v[20:23], v32, s[0:1] offset:80
5461; GFX10-SCRATCH-NEXT:    global_load_dwordx4 v[24:27], v32, s[0:1] offset:96
5462; GFX10-SCRATCH-NEXT:    global_load_dwordx4 v[28:31], v32, s[0:1] offset:112
5463; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
5464; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
5465; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v32i32_i32@rel32@lo+4
5466; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v32i32_i32@rel32@hi+12
5467; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
5468; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(8)
5469; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v33, s32
5470; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
5471; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
5472; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
5473; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
5474; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
5475; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
5476; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
5477; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
5478; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
5479; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
5480; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
5481  %ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
5482  %val0 = load <32 x i32>, <32 x i32> addrspace(1)* %ptr0
5483  %val1 = load i32, i32 addrspace(1)* undef
5484  call amdgpu_gfx void @external_void_func_v32i32_i32(<32 x i32> %val0, i32 %val1)
5485  ret void
5486}
5487
5488define amdgpu_gfx void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %out) #0 {
5489; GFX9-LABEL: test_call_external_i32_func_i32_imm:
5490; GFX9:       ; %bb.0:
5491; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5492; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
5493; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
5494; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
5495; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
5496; GFX9-NEXT:    s_mov_b32 s33, s32
5497; GFX9-NEXT:    s_addk_i32 s32, 0x400
5498; GFX9-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
5499; GFX9-NEXT:    buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
5500; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
5501; GFX9-NEXT:    v_mov_b32_e32 v41, v0
5502; GFX9-NEXT:    v_mov_b32_e32 v0, 42
5503; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
5504; GFX9-NEXT:    v_mov_b32_e32 v42, v1
5505; GFX9-NEXT:    s_getpc_b64 s[34:35]
5506; GFX9-NEXT:    s_add_u32 s34, s34, external_i32_func_i32@rel32@lo+4
5507; GFX9-NEXT:    s_addc_u32 s35, s35, external_i32_func_i32@rel32@hi+12
5508; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
5509; GFX9-NEXT:    global_store_dword v[41:42], v0, off
5510; GFX9-NEXT:    s_waitcnt vmcnt(0)
5511; GFX9-NEXT:    buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
5512; GFX9-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
5513; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
5514; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
5515; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
5516; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
5517; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
5518; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
5519; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
5520; GFX9-NEXT:    s_waitcnt vmcnt(0)
5521; GFX9-NEXT:    s_setpc_b64 s[30:31]
5522;
5523; GFX10-LABEL: test_call_external_i32_func_i32_imm:
5524; GFX10:       ; %bb.0:
5525; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5526; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
5527; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
5528; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
5529; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
5530; GFX10-NEXT:    s_mov_b32 exec_lo, s34
5531; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
5532; GFX10-NEXT:    s_mov_b32 s33, s32
5533; GFX10-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
5534; GFX10-NEXT:    buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
5535; GFX10-NEXT:    v_mov_b32_e32 v41, v0
5536; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
5537; GFX10-NEXT:    v_mov_b32_e32 v0, 42
5538; GFX10-NEXT:    s_addk_i32 s32, 0x200
5539; GFX10-NEXT:    v_mov_b32_e32 v42, v1
5540; GFX10-NEXT:    s_getpc_b64 s[34:35]
5541; GFX10-NEXT:    s_add_u32 s34, s34, external_i32_func_i32@rel32@lo+4
5542; GFX10-NEXT:    s_addc_u32 s35, s35, external_i32_func_i32@rel32@hi+12
5543; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
5544; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
5545; GFX10-NEXT:    global_store_dword v[41:42], v0, off
5546; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
5547; GFX10-NEXT:    s_clause 0x1
5548; GFX10-NEXT:    buffer_load_dword v42, off, s[0:3], s33
5549; GFX10-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:4
5550; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
5551; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
5552; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
5553; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
5554; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
5555; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
5556; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
5557; GFX10-NEXT:    s_mov_b32 exec_lo, s34
5558; GFX10-NEXT:    s_waitcnt vmcnt(0)
5559; GFX10-NEXT:    s_setpc_b64 s[30:31]
5560;
5561; GFX11-LABEL: test_call_external_i32_func_i32_imm:
5562; GFX11:       ; %bb.0:
5563; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5564; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
5565; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
5566; GFX11-NEXT:    scratch_store_b32 off, v40, s32 offset:8 ; 4-byte Folded Spill
5567; GFX11-NEXT:    s_mov_b32 exec_lo, s0
5568; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
5569; GFX11-NEXT:    s_mov_b32 s33, s32
5570; GFX11-NEXT:    s_clause 0x1
5571; GFX11-NEXT:    scratch_store_b32 off, v41, s33 offset:4
5572; GFX11-NEXT:    scratch_store_b32 off, v42, s33
5573; GFX11-NEXT:    v_dual_mov_b32 v42, v1 :: v_dual_mov_b32 v41, v0
5574; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
5575; GFX11-NEXT:    v_mov_b32_e32 v0, 42
5576; GFX11-NEXT:    s_add_i32 s32, s32, 16
5577; GFX11-NEXT:    s_getpc_b64 s[0:1]
5578; GFX11-NEXT:    s_add_u32 s0, s0, external_i32_func_i32@rel32@lo+4
5579; GFX11-NEXT:    s_addc_u32 s1, s1, external_i32_func_i32@rel32@hi+12
5580; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
5581; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
5582; GFX11-NEXT:    global_store_b32 v[41:42], v0, off dlc
5583; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
5584; GFX11-NEXT:    s_clause 0x1
5585; GFX11-NEXT:    scratch_load_b32 v42, off, s33
5586; GFX11-NEXT:    scratch_load_b32 v41, off, s33 offset:4
5587; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
5588; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
5589; GFX11-NEXT:    s_add_i32 s32, s32, -16
5590; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
5591; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
5592; GFX11-NEXT:    scratch_load_b32 v40, off, s32 offset:8 ; 4-byte Folded Reload
5593; GFX11-NEXT:    s_mov_b32 exec_lo, s0
5594; GFX11-NEXT:    s_waitcnt vmcnt(0)
5595; GFX11-NEXT:    s_setpc_b64 s[30:31]
5596;
5597; GFX10-SCRATCH-LABEL: test_call_external_i32_func_i32_imm:
5598; GFX10-SCRATCH:       ; %bb.0:
5599; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5600; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
5601; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
5602; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 offset:8 ; 4-byte Folded Spill
5603; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
5604; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
5605; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
5606; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
5607; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v41, s33 offset:4 ; 4-byte Folded Spill
5608; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v42, s33 ; 4-byte Folded Spill
5609; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v41, v0
5610; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
5611; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 42
5612; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
5613; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v42, v1
5614; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
5615; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_i32_func_i32@rel32@lo+4
5616; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_i32_func_i32@rel32@hi+12
5617; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
5618; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
5619; GFX10-SCRATCH-NEXT:    global_store_dword v[41:42], v0, off
5620; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
5621; GFX10-SCRATCH-NEXT:    s_clause 0x1
5622; GFX10-SCRATCH-NEXT:    scratch_load_dword v42, off, s33
5623; GFX10-SCRATCH-NEXT:    scratch_load_dword v41, off, s33 offset:4
5624; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
5625; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
5626; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
5627; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
5628; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
5629; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 offset:8 ; 4-byte Folded Reload
5630; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
5631; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
5632; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
5633; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
5634  %val = call amdgpu_gfx i32 @external_i32_func_i32(i32 42)
5635  store volatile i32 %val, i32 addrspace(1)* %out
5636  ret void
5637}
5638
5639define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 {
5640; GFX9-LABEL: test_call_external_void_func_struct_i8_i32:
5641; GFX9:       ; %bb.0:
5642; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5643; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
5644; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5645; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
5646; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
5647; GFX9-NEXT:    v_mov_b32_e32 v2, 0
5648; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
5649; GFX9-NEXT:    s_mov_b32 s33, s32
5650; GFX9-NEXT:    s_addk_i32 s32, 0x400
5651; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
5652; GFX9-NEXT:    global_load_ubyte v0, v2, s[34:35]
5653; GFX9-NEXT:    global_load_dword v1, v2, s[34:35] offset:4
5654; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
5655; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
5656; GFX9-NEXT:    s_getpc_b64 s[34:35]
5657; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_struct_i8_i32@rel32@lo+4
5658; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_struct_i8_i32@rel32@hi+12
5659; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
5660; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
5661; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
5662; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
5663; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
5664; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
5665; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5666; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
5667; GFX9-NEXT:    s_waitcnt vmcnt(0)
5668; GFX9-NEXT:    s_setpc_b64 s[30:31]
5669;
5670; GFX10-LABEL: test_call_external_void_func_struct_i8_i32:
5671; GFX10:       ; %bb.0:
5672; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5673; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
5674; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
5675; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
5676; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
5677; GFX10-NEXT:    s_mov_b32 exec_lo, s34
5678; GFX10-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
5679; GFX10-NEXT:    v_mov_b32_e32 v2, 0
5680; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
5681; GFX10-NEXT:    s_mov_b32 s33, s32
5682; GFX10-NEXT:    s_addk_i32 s32, 0x200
5683; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
5684; GFX10-NEXT:    s_clause 0x1
5685; GFX10-NEXT:    global_load_ubyte v0, v2, s[34:35]
5686; GFX10-NEXT:    global_load_dword v1, v2, s[34:35] offset:4
5687; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
5688; GFX10-NEXT:    s_getpc_b64 s[34:35]
5689; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_struct_i8_i32@rel32@lo+4
5690; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_struct_i8_i32@rel32@hi+12
5691; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
5692; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
5693; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
5694; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
5695; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
5696; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
5697; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
5698; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
5699; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
5700; GFX10-NEXT:    s_mov_b32 exec_lo, s34
5701; GFX10-NEXT:    s_waitcnt vmcnt(0)
5702; GFX10-NEXT:    s_setpc_b64 s[30:31]
5703;
5704; GFX11-LABEL: test_call_external_void_func_struct_i8_i32:
5705; GFX11:       ; %bb.0:
5706; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5707; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
5708; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
5709; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
5710; GFX11-NEXT:    s_mov_b32 exec_lo, s0
5711; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
5712; GFX11-NEXT:    v_mov_b32_e32 v1, 0
5713; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
5714; GFX11-NEXT:    s_mov_b32 s33, s32
5715; GFX11-NEXT:    s_add_i32 s32, s32, 16
5716; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
5717; GFX11-NEXT:    s_clause 0x1
5718; GFX11-NEXT:    global_load_u8 v0, v1, s[0:1]
5719; GFX11-NEXT:    global_load_b32 v1, v1, s[0:1] offset:4
5720; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
5721; GFX11-NEXT:    s_getpc_b64 s[0:1]
5722; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_struct_i8_i32@rel32@lo+4
5723; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_struct_i8_i32@rel32@hi+12
5724; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
5725; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
5726; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
5727; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
5728; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
5729; GFX11-NEXT:    s_add_i32 s32, s32, -16
5730; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
5731; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
5732; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
5733; GFX11-NEXT:    s_mov_b32 exec_lo, s0
5734; GFX11-NEXT:    s_waitcnt vmcnt(0)
5735; GFX11-NEXT:    s_setpc_b64 s[30:31]
5736;
5737; GFX10-SCRATCH-LABEL: test_call_external_void_func_struct_i8_i32:
5738; GFX10-SCRATCH:       ; %bb.0:
5739; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5740; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
5741; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
5742; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
5743; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
5744; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
5745; GFX10-SCRATCH-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
5746; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v2, 0
5747; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
5748; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
5749; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
5750; GFX10-SCRATCH-NEXT:    s_waitcnt lgkmcnt(0)
5751; GFX10-SCRATCH-NEXT:    s_clause 0x1
5752; GFX10-SCRATCH-NEXT:    global_load_ubyte v0, v2, s[0:1]
5753; GFX10-SCRATCH-NEXT:    global_load_dword v1, v2, s[0:1] offset:4
5754; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
5755; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
5756; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_struct_i8_i32@rel32@lo+4
5757; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_struct_i8_i32@rel32@hi+12
5758; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
5759; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
5760; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
5761; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
5762; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
5763; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
5764; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
5765; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
5766; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
5767; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
5768; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
5769; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
5770  %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef
5771  %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0
5772  call amdgpu_gfx void @external_void_func_struct_i8_i32({ i8, i32 } %val)
5773  ret void
5774}
5775
5776define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 {
5777; GFX9-LABEL: test_call_external_void_func_byval_struct_i8_i32:
5778; GFX9:       ; %bb.0:
5779; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5780; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
5781; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
5782; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
5783; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
5784; GFX9-NEXT:    s_mov_b32 s33, s32
5785; GFX9-NEXT:    v_mov_b32_e32 v0, 3
5786; GFX9-NEXT:    buffer_store_byte v0, off, s[0:3], s33
5787; GFX9-NEXT:    v_mov_b32_e32 v0, 8
5788; GFX9-NEXT:    s_addk_i32 s32, 0x400
5789; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
5790; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:4
5791; GFX9-NEXT:    v_lshrrev_b32_e64 v0, 6, s33
5792; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
5793; GFX9-NEXT:    s_getpc_b64 s[34:35]
5794; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_byval_struct_i8_i32@rel32@lo+4
5795; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_byval_struct_i8_i32@rel32@hi+12
5796; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
5797; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
5798; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
5799; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
5800; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
5801; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
5802; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
5803; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
5804; GFX9-NEXT:    s_waitcnt vmcnt(0)
5805; GFX9-NEXT:    s_setpc_b64 s[30:31]
5806;
5807; GFX10-LABEL: test_call_external_void_func_byval_struct_i8_i32:
5808; GFX10:       ; %bb.0:
5809; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5810; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
5811; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
5812; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
5813; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
5814; GFX10-NEXT:    s_mov_b32 exec_lo, s34
5815; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
5816; GFX10-NEXT:    v_mov_b32_e32 v0, 3
5817; GFX10-NEXT:    v_mov_b32_e32 v1, 8
5818; GFX10-NEXT:    s_mov_b32 s33, s32
5819; GFX10-NEXT:    s_addk_i32 s32, 0x200
5820; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
5821; GFX10-NEXT:    buffer_store_byte v0, off, s[0:3], s33
5822; GFX10-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:4
5823; GFX10-NEXT:    v_lshrrev_b32_e64 v0, 5, s33
5824; GFX10-NEXT:    s_getpc_b64 s[34:35]
5825; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_byval_struct_i8_i32@rel32@lo+4
5826; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_byval_struct_i8_i32@rel32@hi+12
5827; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
5828; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
5829; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
5830; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
5831; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
5832; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
5833; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
5834; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
5835; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
5836; GFX10-NEXT:    s_mov_b32 exec_lo, s34
5837; GFX10-NEXT:    s_waitcnt vmcnt(0)
5838; GFX10-NEXT:    s_setpc_b64 s[30:31]
5839;
5840; GFX11-LABEL: test_call_external_void_func_byval_struct_i8_i32:
5841; GFX11:       ; %bb.0:
5842; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5843; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
5844; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
5845; GFX11-NEXT:    scratch_store_b32 off, v40, s32 offset:8 ; 4-byte Folded Spill
5846; GFX11-NEXT:    s_mov_b32 exec_lo, s0
5847; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
5848; GFX11-NEXT:    v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 8
5849; GFX11-NEXT:    s_mov_b32 s33, s32
5850; GFX11-NEXT:    s_add_i32 s32, s32, 16
5851; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
5852; GFX11-NEXT:    s_clause 0x1
5853; GFX11-NEXT:    scratch_store_b8 off, v0, s33
5854; GFX11-NEXT:    scratch_store_b32 off, v1, s33 offset:4
5855; GFX11-NEXT:    v_mov_b32_e32 v0, s33
5856; GFX11-NEXT:    s_getpc_b64 s[0:1]
5857; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_byval_struct_i8_i32@rel32@lo+4
5858; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_byval_struct_i8_i32@rel32@hi+12
5859; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
5860; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
5861; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
5862; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
5863; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
5864; GFX11-NEXT:    s_add_i32 s32, s32, -16
5865; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
5866; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
5867; GFX11-NEXT:    scratch_load_b32 v40, off, s32 offset:8 ; 4-byte Folded Reload
5868; GFX11-NEXT:    s_mov_b32 exec_lo, s0
5869; GFX11-NEXT:    s_waitcnt vmcnt(0)
5870; GFX11-NEXT:    s_setpc_b64 s[30:31]
5871;
5872; GFX10-SCRATCH-LABEL: test_call_external_void_func_byval_struct_i8_i32:
5873; GFX10-SCRATCH:       ; %bb.0:
5874; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5875; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
5876; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
5877; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 offset:8 ; 4-byte Folded Spill
5878; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
5879; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
5880; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
5881; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 3
5882; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, 8
5883; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
5884; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
5885; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
5886; GFX10-SCRATCH-NEXT:    scratch_store_byte off, v0, s33
5887; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v1, s33 offset:4
5888; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, s33
5889; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
5890; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_byval_struct_i8_i32@rel32@lo+4
5891; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_byval_struct_i8_i32@rel32@hi+12
5892; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
5893; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
5894; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
5895; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
5896; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
5897; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
5898; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
5899; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 offset:8 ; 4-byte Folded Reload
5900; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
5901; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
5902; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
5903; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
5904  %val = alloca { i8, i32 }, align 4, addrspace(5)
5905  %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 0
5906  %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 1
5907  store i8 3, i8 addrspace(5)* %gep0
5908  store i32 8, i32 addrspace(5)* %gep1
5909  call amdgpu_gfx void @external_void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %val)
5910  ret void
5911}
5912
5913define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 {
5914; GFX9-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
5915; GFX9:       ; %bb.0:
5916; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5917; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
5918; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
5919; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
5920; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
5921; GFX9-NEXT:    s_mov_b32 s33, s32
5922; GFX9-NEXT:    v_mov_b32_e32 v0, 3
5923; GFX9-NEXT:    buffer_store_byte v0, off, s[0:3], s33
5924; GFX9-NEXT:    v_mov_b32_e32 v0, 8
5925; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:4
5926; GFX9-NEXT:    v_lshrrev_b32_e64 v0, 6, s33
5927; GFX9-NEXT:    s_addk_i32 s32, 0x800
5928; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
5929; GFX9-NEXT:    v_add_u32_e32 v0, 8, v0
5930; GFX9-NEXT:    v_lshrrev_b32_e64 v1, 6, s33
5931; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
5932; GFX9-NEXT:    s_getpc_b64 s[34:35]
5933; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
5934; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
5935; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
5936; GFX9-NEXT:    buffer_load_ubyte v0, off, s[0:3], s33 offset:8
5937; GFX9-NEXT:    buffer_load_dword v1, off, s[0:3], s33 offset:12
5938; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
5939; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
5940; GFX9-NEXT:    s_addk_i32 s32, 0xf800
5941; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
5942; GFX9-NEXT:    s_waitcnt vmcnt(0)
5943; GFX9-NEXT:    global_store_byte v[0:1], v0, off
5944; GFX9-NEXT:    s_waitcnt vmcnt(0)
5945; GFX9-NEXT:    global_store_dword v[0:1], v1, off
5946; GFX9-NEXT:    s_waitcnt vmcnt(0)
5947; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
5948; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
5949; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
5950; GFX9-NEXT:    s_waitcnt vmcnt(0)
5951; GFX9-NEXT:    s_setpc_b64 s[30:31]
5952;
5953; GFX10-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
5954; GFX10:       ; %bb.0:
5955; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5956; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
5957; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
5958; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
5959; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
5960; GFX10-NEXT:    s_mov_b32 exec_lo, s34
5961; GFX10-NEXT:    v_mov_b32_e32 v0, 3
5962; GFX10-NEXT:    v_mov_b32_e32 v1, 8
5963; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
5964; GFX10-NEXT:    s_mov_b32 s33, s32
5965; GFX10-NEXT:    s_addk_i32 s32, 0x400
5966; GFX10-NEXT:    buffer_store_byte v0, off, s[0:3], s33
5967; GFX10-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:4
5968; GFX10-NEXT:    v_lshrrev_b32_e64 v0, 5, s33
5969; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
5970; GFX10-NEXT:    v_lshrrev_b32_e64 v1, 5, s33
5971; GFX10-NEXT:    s_getpc_b64 s[34:35]
5972; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
5973; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
5974; GFX10-NEXT:    v_add_nc_u32_e32 v0, 8, v0
5975; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
5976; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
5977; GFX10-NEXT:    s_clause 0x1
5978; GFX10-NEXT:    buffer_load_ubyte v0, off, s[0:3], s33 offset:8
5979; GFX10-NEXT:    buffer_load_dword v1, off, s[0:3], s33 offset:12
5980; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
5981; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
5982; GFX10-NEXT:    s_addk_i32 s32, 0xfc00
5983; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
5984; GFX10-NEXT:    s_waitcnt vmcnt(0)
5985; GFX10-NEXT:    global_store_byte v[0:1], v0, off
5986; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
5987; GFX10-NEXT:    global_store_dword v[0:1], v1, off
5988; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
5989; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
5990; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
5991; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
5992; GFX10-NEXT:    s_mov_b32 exec_lo, s34
5993; GFX10-NEXT:    s_waitcnt vmcnt(0)
5994; GFX10-NEXT:    s_setpc_b64 s[30:31]
5995;
5996; GFX11-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
5997; GFX11:       ; %bb.0:
5998; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5999; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
6000; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
6001; GFX11-NEXT:    scratch_store_b32 off, v40, s32 offset:16 ; 4-byte Folded Spill
6002; GFX11-NEXT:    s_mov_b32 exec_lo, s0
6003; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
6004; GFX11-NEXT:    v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 8
6005; GFX11-NEXT:    s_mov_b32 s33, s32
6006; GFX11-NEXT:    s_add_i32 s32, s32, 32
6007; GFX11-NEXT:    s_getpc_b64 s[0:1]
6008; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
6009; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
6010; GFX11-NEXT:    s_add_i32 vcc_lo, s33, 8
6011; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
6012; GFX11-NEXT:    s_clause 0x1
6013; GFX11-NEXT:    scratch_store_b8 off, v0, s33
6014; GFX11-NEXT:    scratch_store_b32 off, v1, s33 offset:4
6015; GFX11-NEXT:    v_dual_mov_b32 v0, vcc_lo :: v_dual_mov_b32 v1, s33
6016; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
6017; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
6018; GFX11-NEXT:    s_clause 0x1
6019; GFX11-NEXT:    scratch_load_u8 v0, off, s33 offset:8
6020; GFX11-NEXT:    scratch_load_b32 v1, off, s33 offset:12
6021; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
6022; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
6023; GFX11-NEXT:    s_addk_i32 s32, 0xffe0
6024; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
6025; GFX11-NEXT:    s_waitcnt vmcnt(0)
6026; GFX11-NEXT:    global_store_b8 v[0:1], v0, off dlc
6027; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
6028; GFX11-NEXT:    global_store_b32 v[0:1], v1, off dlc
6029; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
6030; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
6031; GFX11-NEXT:    scratch_load_b32 v40, off, s32 offset:16 ; 4-byte Folded Reload
6032; GFX11-NEXT:    s_mov_b32 exec_lo, s0
6033; GFX11-NEXT:    s_waitcnt vmcnt(0)
6034; GFX11-NEXT:    s_setpc_b64 s[30:31]
6035;
6036; GFX10-SCRATCH-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
6037; GFX10-SCRATCH:       ; %bb.0:
6038; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6039; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
6040; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
6041; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 offset:16 ; 4-byte Folded Spill
6042; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
6043; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
6044; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
6045; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 3
6046; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
6047; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 32
6048; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, 8
6049; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
6050; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
6051; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
6052; GFX10-SCRATCH-NEXT:    s_add_i32 vcc_lo, s33, 8
6053; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
6054; GFX10-SCRATCH-NEXT:    scratch_store_byte off, v0, s33
6055; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v1, s33 offset:4
6056; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, vcc_lo
6057; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, s33
6058; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
6059; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
6060; GFX10-SCRATCH-NEXT:    s_clause 0x1
6061; GFX10-SCRATCH-NEXT:    scratch_load_ubyte v0, off, s33 offset:8
6062; GFX10-SCRATCH-NEXT:    scratch_load_dword v1, off, s33 offset:12
6063; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
6064; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
6065; GFX10-SCRATCH-NEXT:    s_addk_i32 s32, 0xffe0
6066; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
6067; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
6068; GFX10-SCRATCH-NEXT:    global_store_byte v[0:1], v0, off
6069; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
6070; GFX10-SCRATCH-NEXT:    global_store_dword v[0:1], v1, off
6071; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
6072; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
6073; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 offset:16 ; 4-byte Folded Reload
6074; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
6075; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
6076; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
6077; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
6078  %in.val = alloca { i8, i32 }, align 4, addrspace(5)
6079  %out.val = alloca { i8, i32 }, align 4, addrspace(5)
6080  %in.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 0
6081  %in.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 1
6082  store i8 3, i8 addrspace(5)* %in.gep0
6083  store i32 8, i32 addrspace(5)* %in.gep1
6084  call amdgpu_gfx void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }) %out.val, { i8, i32 } addrspace(5)* byval({ i8, i32 }) %in.val)
6085  %out.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %out.val, i32 0, i32 0
6086  %out.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %out.val, i32 0, i32 1
6087  %out.val0 = load i8, i8 addrspace(5)* %out.gep0
6088  %out.val1 = load i32, i32 addrspace(5)* %out.gep1
6089
6090  store volatile i8 %out.val0, i8 addrspace(1)* undef
6091  store volatile i32 %out.val1, i32 addrspace(1)* undef
6092  ret void
6093}
6094
6095define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 {
6096; GFX9-LABEL: test_call_external_void_func_v16i8:
6097; GFX9:       ; %bb.0:
6098; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6099; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
6100; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6101; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
6102; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
6103; GFX9-NEXT:    v_mov_b32_e32 v0, 0
6104; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
6105; GFX9-NEXT:    s_mov_b32 s33, s32
6106; GFX9-NEXT:    s_addk_i32 s32, 0x400
6107; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
6108; GFX9-NEXT:    global_load_dwordx4 v[0:3], v0, s[34:35]
6109; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
6110; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
6111; GFX9-NEXT:    s_getpc_b64 s[34:35]
6112; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v16i8@rel32@lo+4
6113; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v16i8@rel32@hi+12
6114; GFX9-NEXT:    s_waitcnt vmcnt(0)
6115; GFX9-NEXT:    v_lshrrev_b32_e32 v16, 8, v0
6116; GFX9-NEXT:    v_lshrrev_b32_e32 v17, 16, v0
6117; GFX9-NEXT:    v_lshrrev_b32_e32 v18, 24, v0
6118; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
6119; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
6120; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
6121; GFX9-NEXT:    v_lshrrev_b32_e32 v9, 8, v2
6122; GFX9-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
6123; GFX9-NEXT:    v_lshrrev_b32_e32 v11, 24, v2
6124; GFX9-NEXT:    v_lshrrev_b32_e32 v13, 8, v3
6125; GFX9-NEXT:    v_lshrrev_b32_e32 v14, 16, v3
6126; GFX9-NEXT:    v_lshrrev_b32_e32 v15, 24, v3
6127; GFX9-NEXT:    v_mov_b32_e32 v4, v1
6128; GFX9-NEXT:    v_mov_b32_e32 v8, v2
6129; GFX9-NEXT:    v_mov_b32_e32 v12, v3
6130; GFX9-NEXT:    v_mov_b32_e32 v1, v16
6131; GFX9-NEXT:    v_mov_b32_e32 v2, v17
6132; GFX9-NEXT:    v_mov_b32_e32 v3, v18
6133; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
6134; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
6135; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
6136; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
6137; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
6138; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
6139; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6140; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
6141; GFX9-NEXT:    s_waitcnt vmcnt(0)
6142; GFX9-NEXT:    s_setpc_b64 s[30:31]
6143;
6144; GFX10-LABEL: test_call_external_void_func_v16i8:
6145; GFX10:       ; %bb.0:
6146; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6147; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
6148; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
6149; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6150; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
6151; GFX10-NEXT:    s_mov_b32 exec_lo, s34
6152; GFX10-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
6153; GFX10-NEXT:    v_mov_b32_e32 v0, 0
6154; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
6155; GFX10-NEXT:    s_mov_b32 s33, s32
6156; GFX10-NEXT:    s_addk_i32 s32, 0x200
6157; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
6158; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
6159; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
6160; GFX10-NEXT:    global_load_dwordx4 v[0:3], v0, s[34:35]
6161; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
6162; GFX10-NEXT:    s_getpc_b64 s[34:35]
6163; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v16i8@rel32@lo+4
6164; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v16i8@rel32@hi+12
6165; GFX10-NEXT:    s_waitcnt vmcnt(0)
6166; GFX10-NEXT:    v_lshrrev_b32_e32 v16, 8, v0
6167; GFX10-NEXT:    v_lshrrev_b32_e32 v17, 16, v0
6168; GFX10-NEXT:    v_lshrrev_b32_e32 v18, 24, v0
6169; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
6170; GFX10-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
6171; GFX10-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
6172; GFX10-NEXT:    v_lshrrev_b32_e32 v9, 8, v2
6173; GFX10-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
6174; GFX10-NEXT:    v_lshrrev_b32_e32 v11, 24, v2
6175; GFX10-NEXT:    v_lshrrev_b32_e32 v13, 8, v3
6176; GFX10-NEXT:    v_lshrrev_b32_e32 v14, 16, v3
6177; GFX10-NEXT:    v_lshrrev_b32_e32 v15, 24, v3
6178; GFX10-NEXT:    v_mov_b32_e32 v4, v1
6179; GFX10-NEXT:    v_mov_b32_e32 v8, v2
6180; GFX10-NEXT:    v_mov_b32_e32 v12, v3
6181; GFX10-NEXT:    v_mov_b32_e32 v1, v16
6182; GFX10-NEXT:    v_mov_b32_e32 v2, v17
6183; GFX10-NEXT:    v_mov_b32_e32 v3, v18
6184; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
6185; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
6186; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
6187; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
6188; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
6189; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
6190; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6191; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
6192; GFX10-NEXT:    s_mov_b32 exec_lo, s34
6193; GFX10-NEXT:    s_waitcnt vmcnt(0)
6194; GFX10-NEXT:    s_setpc_b64 s[30:31]
6195;
6196; GFX11-LABEL: test_call_external_void_func_v16i8:
6197; GFX11:       ; %bb.0:
6198; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6199; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
6200; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
6201; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
6202; GFX11-NEXT:    s_mov_b32 exec_lo, s0
6203; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
6204; GFX11-NEXT:    v_mov_b32_e32 v0, 0
6205; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
6206; GFX11-NEXT:    s_mov_b32 s33, s32
6207; GFX11-NEXT:    s_add_i32 s32, s32, 16
6208; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
6209; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
6210; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
6211; GFX11-NEXT:    global_load_b128 v[0:3], v0, s[0:1]
6212; GFX11-NEXT:    s_getpc_b64 s[0:1]
6213; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v16i8@rel32@lo+4
6214; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v16i8@rel32@hi+12
6215; GFX11-NEXT:    s_waitcnt vmcnt(0)
6216; GFX11-NEXT:    v_lshrrev_b32_e32 v16, 8, v0
6217; GFX11-NEXT:    v_lshrrev_b32_e32 v17, 16, v0
6218; GFX11-NEXT:    v_lshrrev_b32_e32 v18, 24, v0
6219; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
6220; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
6221; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
6222; GFX11-NEXT:    v_lshrrev_b32_e32 v9, 8, v2
6223; GFX11-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
6224; GFX11-NEXT:    v_lshrrev_b32_e32 v11, 24, v2
6225; GFX11-NEXT:    v_lshrrev_b32_e32 v13, 8, v3
6226; GFX11-NEXT:    v_lshrrev_b32_e32 v14, 16, v3
6227; GFX11-NEXT:    v_lshrrev_b32_e32 v15, 24, v3
6228; GFX11-NEXT:    v_mov_b32_e32 v4, v1
6229; GFX11-NEXT:    v_mov_b32_e32 v8, v2
6230; GFX11-NEXT:    v_dual_mov_b32 v12, v3 :: v_dual_mov_b32 v3, v18
6231; GFX11-NEXT:    v_dual_mov_b32 v1, v16 :: v_dual_mov_b32 v2, v17
6232; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
6233; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
6234; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
6235; GFX11-NEXT:    s_add_i32 s32, s32, -16
6236; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
6237; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
6238; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
6239; GFX11-NEXT:    s_mov_b32 exec_lo, s0
6240; GFX11-NEXT:    s_waitcnt vmcnt(0)
6241; GFX11-NEXT:    s_setpc_b64 s[30:31]
6242;
6243; GFX10-SCRATCH-LABEL: test_call_external_void_func_v16i8:
6244; GFX10-SCRATCH:       ; %bb.0:
6245; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6246; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
6247; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
6248; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
6249; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
6250; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
6251; GFX10-SCRATCH-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
6252; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 0
6253; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
6254; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
6255; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
6256; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
6257; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
6258; GFX10-SCRATCH-NEXT:    s_waitcnt lgkmcnt(0)
6259; GFX10-SCRATCH-NEXT:    global_load_dwordx4 v[0:3], v0, s[0:1]
6260; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
6261; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
6262; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v16i8@rel32@lo+4
6263; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v16i8@rel32@hi+12
6264; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
6265; GFX10-SCRATCH-NEXT:    v_lshrrev_b32_e32 v16, 8, v0
6266; GFX10-SCRATCH-NEXT:    v_lshrrev_b32_e32 v17, 16, v0
6267; GFX10-SCRATCH-NEXT:    v_lshrrev_b32_e32 v18, 24, v0
6268; GFX10-SCRATCH-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
6269; GFX10-SCRATCH-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
6270; GFX10-SCRATCH-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
6271; GFX10-SCRATCH-NEXT:    v_lshrrev_b32_e32 v9, 8, v2
6272; GFX10-SCRATCH-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
6273; GFX10-SCRATCH-NEXT:    v_lshrrev_b32_e32 v11, 24, v2
6274; GFX10-SCRATCH-NEXT:    v_lshrrev_b32_e32 v13, 8, v3
6275; GFX10-SCRATCH-NEXT:    v_lshrrev_b32_e32 v14, 16, v3
6276; GFX10-SCRATCH-NEXT:    v_lshrrev_b32_e32 v15, 24, v3
6277; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v4, v1
6278; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v8, v2
6279; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v12, v3
6280; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, v16
6281; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v2, v17
6282; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v3, v18
6283; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
6284; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
6285; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
6286; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
6287; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
6288; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
6289; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
6290; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
6291; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
6292; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
6293; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
6294  %ptr = load <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef
6295  %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
6296  call amdgpu_gfx void @external_void_func_v16i8(<16 x i8> %val)
6297  ret void
6298}
6299
6300define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
6301; GFX9-LABEL: tail_call_byval_align16:
6302; GFX9:       ; %bb.0: ; %entry
6303; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6304; GFX9-NEXT:    s_or_saveexec_b64 s[4:5], -1
6305; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
6306; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
6307; GFX9-NEXT:    v_writelane_b32 v40, s33, 32
6308; GFX9-NEXT:    s_mov_b32 s33, s32
6309; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s33 offset:20
6310; GFX9-NEXT:    buffer_load_dword v33, off, s[0:3], s33 offset:16
6311; GFX9-NEXT:    buffer_load_dword v31, off, s[0:3], s33
6312; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
6313; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
6314; GFX9-NEXT:    v_writelane_b32 v40, s34, 2
6315; GFX9-NEXT:    v_writelane_b32 v40, s35, 3
6316; GFX9-NEXT:    v_writelane_b32 v40, s36, 4
6317; GFX9-NEXT:    v_writelane_b32 v40, s37, 5
6318; GFX9-NEXT:    v_writelane_b32 v40, s38, 6
6319; GFX9-NEXT:    v_writelane_b32 v40, s39, 7
6320; GFX9-NEXT:    v_writelane_b32 v40, s40, 8
6321; GFX9-NEXT:    v_writelane_b32 v40, s41, 9
6322; GFX9-NEXT:    v_writelane_b32 v40, s42, 10
6323; GFX9-NEXT:    v_writelane_b32 v40, s43, 11
6324; GFX9-NEXT:    v_writelane_b32 v40, s44, 12
6325; GFX9-NEXT:    v_writelane_b32 v40, s45, 13
6326; GFX9-NEXT:    v_writelane_b32 v40, s46, 14
6327; GFX9-NEXT:    v_writelane_b32 v40, s47, 15
6328; GFX9-NEXT:    v_writelane_b32 v40, s48, 16
6329; GFX9-NEXT:    v_writelane_b32 v40, s49, 17
6330; GFX9-NEXT:    v_writelane_b32 v40, s50, 18
6331; GFX9-NEXT:    v_writelane_b32 v40, s51, 19
6332; GFX9-NEXT:    v_writelane_b32 v40, s52, 20
6333; GFX9-NEXT:    v_writelane_b32 v40, s53, 21
6334; GFX9-NEXT:    v_writelane_b32 v40, s54, 22
6335; GFX9-NEXT:    v_writelane_b32 v40, s55, 23
6336; GFX9-NEXT:    v_writelane_b32 v40, s56, 24
6337; GFX9-NEXT:    v_writelane_b32 v40, s57, 25
6338; GFX9-NEXT:    v_writelane_b32 v40, s58, 26
6339; GFX9-NEXT:    v_writelane_b32 v40, s59, 27
6340; GFX9-NEXT:    v_writelane_b32 v40, s60, 28
6341; GFX9-NEXT:    v_writelane_b32 v40, s61, 29
6342; GFX9-NEXT:    s_addk_i32 s32, 0x800
6343; GFX9-NEXT:    v_writelane_b32 v40, s62, 30
6344; GFX9-NEXT:    v_writelane_b32 v40, s63, 31
6345; GFX9-NEXT:    s_getpc_b64 s[4:5]
6346; GFX9-NEXT:    s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4
6347; GFX9-NEXT:    s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12
6348; GFX9-NEXT:    s_waitcnt vmcnt(2)
6349; GFX9-NEXT:    buffer_store_dword v32, off, s[0:3], s32 offset:4
6350; GFX9-NEXT:    s_waitcnt vmcnt(2)
6351; GFX9-NEXT:    buffer_store_dword v33, off, s[0:3], s32
6352; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
6353; GFX9-NEXT:    v_readlane_b32 s63, v40, 31
6354; GFX9-NEXT:    v_readlane_b32 s62, v40, 30
6355; GFX9-NEXT:    v_readlane_b32 s61, v40, 29
6356; GFX9-NEXT:    v_readlane_b32 s60, v40, 28
6357; GFX9-NEXT:    v_readlane_b32 s59, v40, 27
6358; GFX9-NEXT:    v_readlane_b32 s58, v40, 26
6359; GFX9-NEXT:    v_readlane_b32 s57, v40, 25
6360; GFX9-NEXT:    v_readlane_b32 s56, v40, 24
6361; GFX9-NEXT:    v_readlane_b32 s55, v40, 23
6362; GFX9-NEXT:    v_readlane_b32 s54, v40, 22
6363; GFX9-NEXT:    v_readlane_b32 s53, v40, 21
6364; GFX9-NEXT:    v_readlane_b32 s52, v40, 20
6365; GFX9-NEXT:    v_readlane_b32 s51, v40, 19
6366; GFX9-NEXT:    v_readlane_b32 s50, v40, 18
6367; GFX9-NEXT:    v_readlane_b32 s49, v40, 17
6368; GFX9-NEXT:    v_readlane_b32 s48, v40, 16
6369; GFX9-NEXT:    v_readlane_b32 s47, v40, 15
6370; GFX9-NEXT:    v_readlane_b32 s46, v40, 14
6371; GFX9-NEXT:    v_readlane_b32 s45, v40, 13
6372; GFX9-NEXT:    v_readlane_b32 s44, v40, 12
6373; GFX9-NEXT:    v_readlane_b32 s43, v40, 11
6374; GFX9-NEXT:    v_readlane_b32 s42, v40, 10
6375; GFX9-NEXT:    v_readlane_b32 s41, v40, 9
6376; GFX9-NEXT:    v_readlane_b32 s40, v40, 8
6377; GFX9-NEXT:    v_readlane_b32 s39, v40, 7
6378; GFX9-NEXT:    v_readlane_b32 s38, v40, 6
6379; GFX9-NEXT:    v_readlane_b32 s37, v40, 5
6380; GFX9-NEXT:    v_readlane_b32 s36, v40, 4
6381; GFX9-NEXT:    v_readlane_b32 s35, v40, 3
6382; GFX9-NEXT:    v_readlane_b32 s34, v40, 2
6383; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
6384; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
6385; GFX9-NEXT:    s_addk_i32 s32, 0xf800
6386; GFX9-NEXT:    v_readlane_b32 s33, v40, 32
6387; GFX9-NEXT:    s_or_saveexec_b64 s[4:5], -1
6388; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
6389; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
6390; GFX9-NEXT:    s_waitcnt vmcnt(0)
6391; GFX9-NEXT:    s_setpc_b64 s[30:31]
6392;
6393; GFX10-LABEL: tail_call_byval_align16:
6394; GFX10:       ; %bb.0: ; %entry
6395; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6396; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
6397; GFX10-NEXT:    s_or_saveexec_b32 s4, -1
6398; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
6399; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
6400; GFX10-NEXT:    s_mov_b32 exec_lo, s4
6401; GFX10-NEXT:    s_mov_b32 s6, s33
6402; GFX10-NEXT:    s_mov_b32 s33, s32
6403; GFX10-NEXT:    s_clause 0x2
6404; GFX10-NEXT:    buffer_load_dword v32, off, s[0:3], s33 offset:20
6405; GFX10-NEXT:    buffer_load_dword v33, off, s[0:3], s33 offset:16
6406; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s33
6407; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
6408; GFX10-NEXT:    s_addk_i32 s32, 0x400
6409; GFX10-NEXT:    s_getpc_b64 s[4:5]
6410; GFX10-NEXT:    s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4
6411; GFX10-NEXT:    s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12
6412; GFX10-NEXT:    s_waitcnt vmcnt(2)
6413; GFX10-NEXT:    buffer_store_dword v32, off, s[0:3], s32 offset:4
6414; GFX10-NEXT:    s_waitcnt vmcnt(1)
6415; GFX10-NEXT:    buffer_store_dword v33, off, s[0:3], s32
6416; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
6417; GFX10-NEXT:    v_writelane_b32 v40, s34, 2
6418; GFX10-NEXT:    v_writelane_b32 v40, s35, 3
6419; GFX10-NEXT:    v_writelane_b32 v40, s36, 4
6420; GFX10-NEXT:    v_writelane_b32 v40, s37, 5
6421; GFX10-NEXT:    v_writelane_b32 v40, s38, 6
6422; GFX10-NEXT:    v_writelane_b32 v40, s39, 7
6423; GFX10-NEXT:    v_writelane_b32 v40, s40, 8
6424; GFX10-NEXT:    v_writelane_b32 v40, s41, 9
6425; GFX10-NEXT:    v_writelane_b32 v40, s42, 10
6426; GFX10-NEXT:    v_writelane_b32 v40, s43, 11
6427; GFX10-NEXT:    v_writelane_b32 v40, s44, 12
6428; GFX10-NEXT:    v_writelane_b32 v40, s45, 13
6429; GFX10-NEXT:    v_writelane_b32 v40, s46, 14
6430; GFX10-NEXT:    v_writelane_b32 v40, s47, 15
6431; GFX10-NEXT:    v_writelane_b32 v40, s48, 16
6432; GFX10-NEXT:    v_writelane_b32 v40, s49, 17
6433; GFX10-NEXT:    v_writelane_b32 v40, s50, 18
6434; GFX10-NEXT:    v_writelane_b32 v40, s51, 19
6435; GFX10-NEXT:    v_writelane_b32 v40, s52, 20
6436; GFX10-NEXT:    v_writelane_b32 v40, s53, 21
6437; GFX10-NEXT:    v_writelane_b32 v40, s54, 22
6438; GFX10-NEXT:    v_writelane_b32 v40, s55, 23
6439; GFX10-NEXT:    v_writelane_b32 v40, s56, 24
6440; GFX10-NEXT:    v_writelane_b32 v40, s57, 25
6441; GFX10-NEXT:    v_writelane_b32 v40, s58, 26
6442; GFX10-NEXT:    v_writelane_b32 v40, s59, 27
6443; GFX10-NEXT:    v_writelane_b32 v40, s60, 28
6444; GFX10-NEXT:    v_writelane_b32 v40, s61, 29
6445; GFX10-NEXT:    v_writelane_b32 v40, s62, 30
6446; GFX10-NEXT:    v_writelane_b32 v40, s63, 31
6447; GFX10-NEXT:    s_swappc_b64 s[30:31], s[4:5]
6448; GFX10-NEXT:    v_readlane_b32 s63, v40, 31
6449; GFX10-NEXT:    v_readlane_b32 s62, v40, 30
6450; GFX10-NEXT:    v_readlane_b32 s61, v40, 29
6451; GFX10-NEXT:    v_readlane_b32 s60, v40, 28
6452; GFX10-NEXT:    v_readlane_b32 s59, v40, 27
6453; GFX10-NEXT:    v_readlane_b32 s58, v40, 26
6454; GFX10-NEXT:    v_readlane_b32 s57, v40, 25
6455; GFX10-NEXT:    v_readlane_b32 s56, v40, 24
6456; GFX10-NEXT:    v_readlane_b32 s55, v40, 23
6457; GFX10-NEXT:    v_readlane_b32 s54, v40, 22
6458; GFX10-NEXT:    v_readlane_b32 s53, v40, 21
6459; GFX10-NEXT:    v_readlane_b32 s52, v40, 20
6460; GFX10-NEXT:    v_readlane_b32 s51, v40, 19
6461; GFX10-NEXT:    v_readlane_b32 s50, v40, 18
6462; GFX10-NEXT:    v_readlane_b32 s49, v40, 17
6463; GFX10-NEXT:    v_readlane_b32 s48, v40, 16
6464; GFX10-NEXT:    v_readlane_b32 s47, v40, 15
6465; GFX10-NEXT:    v_readlane_b32 s46, v40, 14
6466; GFX10-NEXT:    v_readlane_b32 s45, v40, 13
6467; GFX10-NEXT:    v_readlane_b32 s44, v40, 12
6468; GFX10-NEXT:    v_readlane_b32 s43, v40, 11
6469; GFX10-NEXT:    v_readlane_b32 s42, v40, 10
6470; GFX10-NEXT:    v_readlane_b32 s41, v40, 9
6471; GFX10-NEXT:    v_readlane_b32 s40, v40, 8
6472; GFX10-NEXT:    v_readlane_b32 s39, v40, 7
6473; GFX10-NEXT:    v_readlane_b32 s38, v40, 6
6474; GFX10-NEXT:    v_readlane_b32 s37, v40, 5
6475; GFX10-NEXT:    v_readlane_b32 s36, v40, 4
6476; GFX10-NEXT:    v_readlane_b32 s35, v40, 3
6477; GFX10-NEXT:    v_readlane_b32 s34, v40, 2
6478; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
6479; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
6480; GFX10-NEXT:    s_addk_i32 s32, 0xfc00
6481; GFX10-NEXT:    s_mov_b32 s33, s6
6482; GFX10-NEXT:    s_or_saveexec_b32 s4, -1
6483; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
6484; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
6485; GFX10-NEXT:    s_mov_b32 exec_lo, s4
6486; GFX10-NEXT:    s_waitcnt vmcnt(0)
6487; GFX10-NEXT:    s_setpc_b64 s[30:31]
6488;
6489; GFX11-LABEL: tail_call_byval_align16:
6490; GFX11:       ; %bb.0: ; %entry
6491; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6492; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
6493; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
6494; GFX11-NEXT:    scratch_store_b32 off, v40, s32 offset:24 ; 4-byte Folded Spill
6495; GFX11-NEXT:    s_mov_b32 exec_lo, s0
6496; GFX11-NEXT:    s_mov_b32 s4, s33
6497; GFX11-NEXT:    s_mov_b32 s33, s32
6498; GFX11-NEXT:    s_clause 0x1
6499; GFX11-NEXT:    scratch_load_b64 v[32:33], off, s33 offset:16
6500; GFX11-NEXT:    scratch_load_b32 v31, off, s33
6501; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
6502; GFX11-NEXT:    s_add_i32 s32, s32, 32
6503; GFX11-NEXT:    s_getpc_b64 s[0:1]
6504; GFX11-NEXT:    s_add_u32 s0, s0, byval_align16_f64_arg@rel32@lo+4
6505; GFX11-NEXT:    s_addc_u32 s1, s1, byval_align16_f64_arg@rel32@hi+12
6506; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
6507; GFX11-NEXT:    v_writelane_b32 v40, s34, 2
6508; GFX11-NEXT:    v_writelane_b32 v40, s35, 3
6509; GFX11-NEXT:    v_writelane_b32 v40, s36, 4
6510; GFX11-NEXT:    v_writelane_b32 v40, s37, 5
6511; GFX11-NEXT:    v_writelane_b32 v40, s38, 6
6512; GFX11-NEXT:    v_writelane_b32 v40, s39, 7
6513; GFX11-NEXT:    v_writelane_b32 v40, s40, 8
6514; GFX11-NEXT:    v_writelane_b32 v40, s41, 9
6515; GFX11-NEXT:    v_writelane_b32 v40, s42, 10
6516; GFX11-NEXT:    v_writelane_b32 v40, s43, 11
6517; GFX11-NEXT:    v_writelane_b32 v40, s44, 12
6518; GFX11-NEXT:    v_writelane_b32 v40, s45, 13
6519; GFX11-NEXT:    v_writelane_b32 v40, s46, 14
6520; GFX11-NEXT:    v_writelane_b32 v40, s47, 15
6521; GFX11-NEXT:    v_writelane_b32 v40, s48, 16
6522; GFX11-NEXT:    v_writelane_b32 v40, s49, 17
6523; GFX11-NEXT:    v_writelane_b32 v40, s50, 18
6524; GFX11-NEXT:    v_writelane_b32 v40, s51, 19
6525; GFX11-NEXT:    v_writelane_b32 v40, s52, 20
6526; GFX11-NEXT:    v_writelane_b32 v40, s53, 21
6527; GFX11-NEXT:    v_writelane_b32 v40, s54, 22
6528; GFX11-NEXT:    v_writelane_b32 v40, s55, 23
6529; GFX11-NEXT:    v_writelane_b32 v40, s56, 24
6530; GFX11-NEXT:    v_writelane_b32 v40, s57, 25
6531; GFX11-NEXT:    v_writelane_b32 v40, s58, 26
6532; GFX11-NEXT:    v_writelane_b32 v40, s59, 27
6533; GFX11-NEXT:    v_writelane_b32 v40, s60, 28
6534; GFX11-NEXT:    v_writelane_b32 v40, s61, 29
6535; GFX11-NEXT:    v_writelane_b32 v40, s62, 30
6536; GFX11-NEXT:    v_writelane_b32 v40, s63, 31
6537; GFX11-NEXT:    s_waitcnt vmcnt(1)
6538; GFX11-NEXT:    scratch_store_b64 off, v[32:33], s32
6539; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
6540; GFX11-NEXT:    v_readlane_b32 s63, v40, 31
6541; GFX11-NEXT:    v_readlane_b32 s62, v40, 30
6542; GFX11-NEXT:    v_readlane_b32 s61, v40, 29
6543; GFX11-NEXT:    v_readlane_b32 s60, v40, 28
6544; GFX11-NEXT:    v_readlane_b32 s59, v40, 27
6545; GFX11-NEXT:    v_readlane_b32 s58, v40, 26
6546; GFX11-NEXT:    v_readlane_b32 s57, v40, 25
6547; GFX11-NEXT:    v_readlane_b32 s56, v40, 24
6548; GFX11-NEXT:    v_readlane_b32 s55, v40, 23
6549; GFX11-NEXT:    v_readlane_b32 s54, v40, 22
6550; GFX11-NEXT:    v_readlane_b32 s53, v40, 21
6551; GFX11-NEXT:    v_readlane_b32 s52, v40, 20
6552; GFX11-NEXT:    v_readlane_b32 s51, v40, 19
6553; GFX11-NEXT:    v_readlane_b32 s50, v40, 18
6554; GFX11-NEXT:    v_readlane_b32 s49, v40, 17
6555; GFX11-NEXT:    v_readlane_b32 s48, v40, 16
6556; GFX11-NEXT:    v_readlane_b32 s47, v40, 15
6557; GFX11-NEXT:    v_readlane_b32 s46, v40, 14
6558; GFX11-NEXT:    v_readlane_b32 s45, v40, 13
6559; GFX11-NEXT:    v_readlane_b32 s44, v40, 12
6560; GFX11-NEXT:    v_readlane_b32 s43, v40, 11
6561; GFX11-NEXT:    v_readlane_b32 s42, v40, 10
6562; GFX11-NEXT:    v_readlane_b32 s41, v40, 9
6563; GFX11-NEXT:    v_readlane_b32 s40, v40, 8
6564; GFX11-NEXT:    v_readlane_b32 s39, v40, 7
6565; GFX11-NEXT:    v_readlane_b32 s38, v40, 6
6566; GFX11-NEXT:    v_readlane_b32 s37, v40, 5
6567; GFX11-NEXT:    v_readlane_b32 s36, v40, 4
6568; GFX11-NEXT:    v_readlane_b32 s35, v40, 3
6569; GFX11-NEXT:    v_readlane_b32 s34, v40, 2
6570; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
6571; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
6572; GFX11-NEXT:    s_addk_i32 s32, 0xffe0
6573; GFX11-NEXT:    s_mov_b32 s33, s4
6574; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
6575; GFX11-NEXT:    scratch_load_b32 v40, off, s32 offset:24 ; 4-byte Folded Reload
6576; GFX11-NEXT:    s_mov_b32 exec_lo, s0
6577; GFX11-NEXT:    s_waitcnt vmcnt(0)
6578; GFX11-NEXT:    s_setpc_b64 s[30:31]
6579;
6580; GFX10-SCRATCH-LABEL: tail_call_byval_align16:
6581; GFX10-SCRATCH:       ; %bb.0: ; %entry
6582; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6583; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
6584; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
6585; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 offset:24 ; 4-byte Folded Spill
6586; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
6587; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
6588; GFX10-SCRATCH-NEXT:    s_mov_b32 s4, s33
6589; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
6590; GFX10-SCRATCH-NEXT:    s_clause 0x1
6591; GFX10-SCRATCH-NEXT:    scratch_load_dwordx2 v[32:33], off, s33 offset:16
6592; GFX10-SCRATCH-NEXT:    scratch_load_dword v31, off, s33
6593; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
6594; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 32
6595; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
6596; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, byval_align16_f64_arg@rel32@lo+4
6597; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, byval_align16_f64_arg@rel32@hi+12
6598; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
6599; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s34, 2
6600; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s35, 3
6601; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s36, 4
6602; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s37, 5
6603; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s38, 6
6604; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s39, 7
6605; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s40, 8
6606; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s41, 9
6607; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s42, 10
6608; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s43, 11
6609; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s44, 12
6610; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s45, 13
6611; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s46, 14
6612; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s47, 15
6613; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s48, 16
6614; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s49, 17
6615; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s50, 18
6616; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s51, 19
6617; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s52, 20
6618; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s53, 21
6619; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s54, 22
6620; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s55, 23
6621; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s56, 24
6622; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s57, 25
6623; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s58, 26
6624; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s59, 27
6625; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s60, 28
6626; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s61, 29
6627; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s62, 30
6628; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s63, 31
6629; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(1)
6630; GFX10-SCRATCH-NEXT:    scratch_store_dwordx2 off, v[32:33], s32
6631; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
6632; GFX10-SCRATCH-NEXT:    v_readlane_b32 s63, v40, 31
6633; GFX10-SCRATCH-NEXT:    v_readlane_b32 s62, v40, 30
6634; GFX10-SCRATCH-NEXT:    v_readlane_b32 s61, v40, 29
6635; GFX10-SCRATCH-NEXT:    v_readlane_b32 s60, v40, 28
6636; GFX10-SCRATCH-NEXT:    v_readlane_b32 s59, v40, 27
6637; GFX10-SCRATCH-NEXT:    v_readlane_b32 s58, v40, 26
6638; GFX10-SCRATCH-NEXT:    v_readlane_b32 s57, v40, 25
6639; GFX10-SCRATCH-NEXT:    v_readlane_b32 s56, v40, 24
6640; GFX10-SCRATCH-NEXT:    v_readlane_b32 s55, v40, 23
6641; GFX10-SCRATCH-NEXT:    v_readlane_b32 s54, v40, 22
6642; GFX10-SCRATCH-NEXT:    v_readlane_b32 s53, v40, 21
6643; GFX10-SCRATCH-NEXT:    v_readlane_b32 s52, v40, 20
6644; GFX10-SCRATCH-NEXT:    v_readlane_b32 s51, v40, 19
6645; GFX10-SCRATCH-NEXT:    v_readlane_b32 s50, v40, 18
6646; GFX10-SCRATCH-NEXT:    v_readlane_b32 s49, v40, 17
6647; GFX10-SCRATCH-NEXT:    v_readlane_b32 s48, v40, 16
6648; GFX10-SCRATCH-NEXT:    v_readlane_b32 s47, v40, 15
6649; GFX10-SCRATCH-NEXT:    v_readlane_b32 s46, v40, 14
6650; GFX10-SCRATCH-NEXT:    v_readlane_b32 s45, v40, 13
6651; GFX10-SCRATCH-NEXT:    v_readlane_b32 s44, v40, 12
6652; GFX10-SCRATCH-NEXT:    v_readlane_b32 s43, v40, 11
6653; GFX10-SCRATCH-NEXT:    v_readlane_b32 s42, v40, 10
6654; GFX10-SCRATCH-NEXT:    v_readlane_b32 s41, v40, 9
6655; GFX10-SCRATCH-NEXT:    v_readlane_b32 s40, v40, 8
6656; GFX10-SCRATCH-NEXT:    v_readlane_b32 s39, v40, 7
6657; GFX10-SCRATCH-NEXT:    v_readlane_b32 s38, v40, 6
6658; GFX10-SCRATCH-NEXT:    v_readlane_b32 s37, v40, 5
6659; GFX10-SCRATCH-NEXT:    v_readlane_b32 s36, v40, 4
6660; GFX10-SCRATCH-NEXT:    v_readlane_b32 s35, v40, 3
6661; GFX10-SCRATCH-NEXT:    v_readlane_b32 s34, v40, 2
6662; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
6663; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
6664; GFX10-SCRATCH-NEXT:    s_addk_i32 s32, 0xffe0
6665; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s4
6666; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
6667; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 offset:24 ; 4-byte Folded Reload
6668; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
6669; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
6670; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
6671; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
6672entry:
6673  %alloca = alloca double, align 8, addrspace(5)
6674  tail call amdgpu_gfx void @byval_align16_f64_arg(<32 x i32> %val, double addrspace(5)* byval(double) align 16 %alloca)
6675  ret void
6676}
6677
6678; inreg arguments are put in sgprs
6679define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 {
6680; GFX9-LABEL: test_call_external_void_func_i1_imm_inreg:
6681; GFX9:       ; %bb.0:
6682; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6683; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
6684; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6685; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
6686; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
6687; GFX9-NEXT:    s_mov_b32 s33, s32
6688; GFX9-NEXT:    s_addk_i32 s32, 0x400
6689; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
6690; GFX9-NEXT:    v_mov_b32_e32 v0, 1
6691; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
6692; GFX9-NEXT:    s_getpc_b64 s[34:35]
6693; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_i1_inreg@rel32@lo+4
6694; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_i1_inreg@rel32@hi+12
6695; GFX9-NEXT:    buffer_store_byte v0, off, s[0:3], s32
6696; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
6697; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
6698; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
6699; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
6700; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
6701; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
6702; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6703; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
6704; GFX9-NEXT:    s_waitcnt vmcnt(0)
6705; GFX9-NEXT:    s_setpc_b64 s[30:31]
6706;
6707; GFX10-LABEL: test_call_external_void_func_i1_imm_inreg:
6708; GFX10:       ; %bb.0:
6709; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6710; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
6711; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
6712; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6713; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
6714; GFX10-NEXT:    s_mov_b32 exec_lo, s34
6715; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
6716; GFX10-NEXT:    v_mov_b32_e32 v0, 1
6717; GFX10-NEXT:    s_mov_b32 s33, s32
6718; GFX10-NEXT:    s_addk_i32 s32, 0x200
6719; GFX10-NEXT:    s_getpc_b64 s[34:35]
6720; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_i1_inreg@rel32@lo+4
6721; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_i1_inreg@rel32@hi+12
6722; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
6723; GFX10-NEXT:    buffer_store_byte v0, off, s[0:3], s32
6724; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
6725; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
6726; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
6727; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
6728; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
6729; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
6730; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
6731; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6732; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
6733; GFX10-NEXT:    s_mov_b32 exec_lo, s34
6734; GFX10-NEXT:    s_waitcnt vmcnt(0)
6735; GFX10-NEXT:    s_setpc_b64 s[30:31]
6736;
6737; GFX11-LABEL: test_call_external_void_func_i1_imm_inreg:
6738; GFX11:       ; %bb.0:
6739; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6740; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
6741; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
6742; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
6743; GFX11-NEXT:    s_mov_b32 exec_lo, s0
6744; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
6745; GFX11-NEXT:    v_mov_b32_e32 v0, 1
6746; GFX11-NEXT:    s_mov_b32 s33, s32
6747; GFX11-NEXT:    s_add_i32 s32, s32, 16
6748; GFX11-NEXT:    s_getpc_b64 s[0:1]
6749; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_i1_inreg@rel32@lo+4
6750; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_i1_inreg@rel32@hi+12
6751; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
6752; GFX11-NEXT:    scratch_store_b8 off, v0, s32
6753; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
6754; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
6755; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
6756; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
6757; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
6758; GFX11-NEXT:    s_add_i32 s32, s32, -16
6759; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
6760; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
6761; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
6762; GFX11-NEXT:    s_mov_b32 exec_lo, s0
6763; GFX11-NEXT:    s_waitcnt vmcnt(0)
6764; GFX11-NEXT:    s_setpc_b64 s[30:31]
6765;
6766; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_imm_inreg:
6767; GFX10-SCRATCH:       ; %bb.0:
6768; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6769; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
6770; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
6771; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
6772; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
6773; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
6774; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
6775; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 1
6776; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
6777; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
6778; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
6779; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_i1_inreg@rel32@lo+4
6780; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_i1_inreg@rel32@hi+12
6781; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
6782; GFX10-SCRATCH-NEXT:    scratch_store_byte off, v0, s32
6783; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
6784; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
6785; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
6786; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
6787; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
6788; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
6789; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
6790; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
6791; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
6792; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
6793; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
6794; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
6795  call amdgpu_gfx void @external_void_func_i1_inreg(i1 inreg true)
6796  ret void
6797}
6798
6799define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 {
6800; GFX9-LABEL: test_call_external_void_func_i8_imm_inreg:
6801; GFX9:       ; %bb.0:
6802; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6803; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
6804; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6805; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
6806; GFX9-NEXT:    v_writelane_b32 v40, s33, 3
6807; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
6808; GFX9-NEXT:    s_mov_b32 s33, s32
6809; GFX9-NEXT:    s_addk_i32 s32, 0x400
6810; GFX9-NEXT:    v_writelane_b32 v40, s30, 1
6811; GFX9-NEXT:    s_movk_i32 s4, 0x7b
6812; GFX9-NEXT:    v_writelane_b32 v40, s31, 2
6813; GFX9-NEXT:    s_getpc_b64 s[34:35]
6814; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_i8_inreg@rel32@lo+4
6815; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_i8_inreg@rel32@hi+12
6816; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
6817; GFX9-NEXT:    v_readlane_b32 s31, v40, 2
6818; GFX9-NEXT:    v_readlane_b32 s30, v40, 1
6819; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
6820; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
6821; GFX9-NEXT:    v_readlane_b32 s33, v40, 3
6822; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
6823; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6824; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
6825; GFX9-NEXT:    s_waitcnt vmcnt(0)
6826; GFX9-NEXT:    s_setpc_b64 s[30:31]
6827;
6828; GFX10-LABEL: test_call_external_void_func_i8_imm_inreg:
6829; GFX10:       ; %bb.0:
6830; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6831; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
6832; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
6833; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6834; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
6835; GFX10-NEXT:    s_mov_b32 exec_lo, s34
6836; GFX10-NEXT:    v_writelane_b32 v40, s33, 3
6837; GFX10-NEXT:    s_mov_b32 s33, s32
6838; GFX10-NEXT:    s_addk_i32 s32, 0x200
6839; GFX10-NEXT:    s_getpc_b64 s[34:35]
6840; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_i8_inreg@rel32@lo+4
6841; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_i8_inreg@rel32@hi+12
6842; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
6843; GFX10-NEXT:    s_movk_i32 s4, 0x7b
6844; GFX10-NEXT:    v_writelane_b32 v40, s30, 1
6845; GFX10-NEXT:    v_writelane_b32 v40, s31, 2
6846; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
6847; GFX10-NEXT:    v_readlane_b32 s31, v40, 2
6848; GFX10-NEXT:    v_readlane_b32 s30, v40, 1
6849; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
6850; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
6851; GFX10-NEXT:    v_readlane_b32 s33, v40, 3
6852; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
6853; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6854; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
6855; GFX10-NEXT:    s_mov_b32 exec_lo, s34
6856; GFX10-NEXT:    s_waitcnt vmcnt(0)
6857; GFX10-NEXT:    s_setpc_b64 s[30:31]
6858;
6859; GFX11-LABEL: test_call_external_void_func_i8_imm_inreg:
6860; GFX11:       ; %bb.0:
6861; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6862; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
6863; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
6864; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
6865; GFX11-NEXT:    s_mov_b32 exec_lo, s0
6866; GFX11-NEXT:    v_writelane_b32 v40, s33, 3
6867; GFX11-NEXT:    s_mov_b32 s33, s32
6868; GFX11-NEXT:    s_add_i32 s32, s32, 16
6869; GFX11-NEXT:    s_getpc_b64 s[0:1]
6870; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_i8_inreg@rel32@lo+4
6871; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_i8_inreg@rel32@hi+12
6872; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
6873; GFX11-NEXT:    s_movk_i32 s4, 0x7b
6874; GFX11-NEXT:    v_writelane_b32 v40, s30, 1
6875; GFX11-NEXT:    v_writelane_b32 v40, s31, 2
6876; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
6877; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
6878; GFX11-NEXT:    v_readlane_b32 s31, v40, 2
6879; GFX11-NEXT:    v_readlane_b32 s30, v40, 1
6880; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
6881; GFX11-NEXT:    s_add_i32 s32, s32, -16
6882; GFX11-NEXT:    v_readlane_b32 s33, v40, 3
6883; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
6884; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
6885; GFX11-NEXT:    s_mov_b32 exec_lo, s0
6886; GFX11-NEXT:    s_waitcnt vmcnt(0)
6887; GFX11-NEXT:    s_setpc_b64 s[30:31]
6888;
6889; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_imm_inreg:
6890; GFX10-SCRATCH:       ; %bb.0:
6891; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6892; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
6893; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
6894; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
6895; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
6896; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
6897; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 3
6898; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
6899; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
6900; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
6901; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_i8_inreg@rel32@lo+4
6902; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_i8_inreg@rel32@hi+12
6903; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
6904; GFX10-SCRATCH-NEXT:    s_movk_i32 s4, 0x7b
6905; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 1
6906; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 2
6907; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
6908; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 2
6909; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 1
6910; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
6911; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
6912; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 3
6913; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
6914; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
6915; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
6916; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
6917; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
6918; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
6919  call amdgpu_gfx void @external_void_func_i8_inreg(i8 inreg 123)
6920  ret void
6921}
6922
6923define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 {
6924; GFX9-LABEL: test_call_external_void_func_i16_imm_inreg:
6925; GFX9:       ; %bb.0:
6926; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6927; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
6928; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6929; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
6930; GFX9-NEXT:    v_writelane_b32 v40, s33, 3
6931; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
6932; GFX9-NEXT:    s_mov_b32 s33, s32
6933; GFX9-NEXT:    s_addk_i32 s32, 0x400
6934; GFX9-NEXT:    v_writelane_b32 v40, s30, 1
6935; GFX9-NEXT:    s_movk_i32 s4, 0x7b
6936; GFX9-NEXT:    v_writelane_b32 v40, s31, 2
6937; GFX9-NEXT:    s_getpc_b64 s[34:35]
6938; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_i16_inreg@rel32@lo+4
6939; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_i16_inreg@rel32@hi+12
6940; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
6941; GFX9-NEXT:    v_readlane_b32 s31, v40, 2
6942; GFX9-NEXT:    v_readlane_b32 s30, v40, 1
6943; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
6944; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
6945; GFX9-NEXT:    v_readlane_b32 s33, v40, 3
6946; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
6947; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6948; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
6949; GFX9-NEXT:    s_waitcnt vmcnt(0)
6950; GFX9-NEXT:    s_setpc_b64 s[30:31]
6951;
6952; GFX10-LABEL: test_call_external_void_func_i16_imm_inreg:
6953; GFX10:       ; %bb.0:
6954; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6955; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
6956; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
6957; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
6958; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
6959; GFX10-NEXT:    s_mov_b32 exec_lo, s34
6960; GFX10-NEXT:    v_writelane_b32 v40, s33, 3
6961; GFX10-NEXT:    s_mov_b32 s33, s32
6962; GFX10-NEXT:    s_addk_i32 s32, 0x200
6963; GFX10-NEXT:    s_getpc_b64 s[34:35]
6964; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_i16_inreg@rel32@lo+4
6965; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_i16_inreg@rel32@hi+12
6966; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
6967; GFX10-NEXT:    s_movk_i32 s4, 0x7b
6968; GFX10-NEXT:    v_writelane_b32 v40, s30, 1
6969; GFX10-NEXT:    v_writelane_b32 v40, s31, 2
6970; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
6971; GFX10-NEXT:    v_readlane_b32 s31, v40, 2
6972; GFX10-NEXT:    v_readlane_b32 s30, v40, 1
6973; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
6974; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
6975; GFX10-NEXT:    v_readlane_b32 s33, v40, 3
6976; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
6977; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
6978; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
6979; GFX10-NEXT:    s_mov_b32 exec_lo, s34
6980; GFX10-NEXT:    s_waitcnt vmcnt(0)
6981; GFX10-NEXT:    s_setpc_b64 s[30:31]
6982;
6983; GFX11-LABEL: test_call_external_void_func_i16_imm_inreg:
6984; GFX11:       ; %bb.0:
6985; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6986; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
6987; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
6988; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
6989; GFX11-NEXT:    s_mov_b32 exec_lo, s0
6990; GFX11-NEXT:    v_writelane_b32 v40, s33, 3
6991; GFX11-NEXT:    s_mov_b32 s33, s32
6992; GFX11-NEXT:    s_add_i32 s32, s32, 16
6993; GFX11-NEXT:    s_getpc_b64 s[0:1]
6994; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_i16_inreg@rel32@lo+4
6995; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_i16_inreg@rel32@hi+12
6996; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
6997; GFX11-NEXT:    s_movk_i32 s4, 0x7b
6998; GFX11-NEXT:    v_writelane_b32 v40, s30, 1
6999; GFX11-NEXT:    v_writelane_b32 v40, s31, 2
7000; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
7001; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7002; GFX11-NEXT:    v_readlane_b32 s31, v40, 2
7003; GFX11-NEXT:    v_readlane_b32 s30, v40, 1
7004; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
7005; GFX11-NEXT:    s_add_i32 s32, s32, -16
7006; GFX11-NEXT:    v_readlane_b32 s33, v40, 3
7007; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
7008; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
7009; GFX11-NEXT:    s_mov_b32 exec_lo, s0
7010; GFX11-NEXT:    s_waitcnt vmcnt(0)
7011; GFX11-NEXT:    s_setpc_b64 s[30:31]
7012;
7013; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_imm_inreg:
7014; GFX10-SCRATCH:       ; %bb.0:
7015; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7016; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
7017; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
7018; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
7019; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
7020; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
7021; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 3
7022; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
7023; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
7024; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
7025; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_i16_inreg@rel32@lo+4
7026; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_i16_inreg@rel32@hi+12
7027; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
7028; GFX10-SCRATCH-NEXT:    s_movk_i32 s4, 0x7b
7029; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 1
7030; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 2
7031; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
7032; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 2
7033; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 1
7034; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
7035; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
7036; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 3
7037; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
7038; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
7039; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
7040; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
7041; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
7042; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
7043  call amdgpu_gfx void @external_void_func_i16_inreg(i16 inreg 123)
7044  ret void
7045}
7046
7047define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 {
7048; GFX9-LABEL: test_call_external_void_func_i32_imm_inreg:
7049; GFX9:       ; %bb.0:
7050; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7051; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
7052; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7053; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
7054; GFX9-NEXT:    v_writelane_b32 v40, s33, 3
7055; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
7056; GFX9-NEXT:    s_mov_b32 s33, s32
7057; GFX9-NEXT:    s_addk_i32 s32, 0x400
7058; GFX9-NEXT:    v_writelane_b32 v40, s30, 1
7059; GFX9-NEXT:    s_mov_b32 s4, 42
7060; GFX9-NEXT:    v_writelane_b32 v40, s31, 2
7061; GFX9-NEXT:    s_getpc_b64 s[34:35]
7062; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_i32_inreg@rel32@lo+4
7063; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_i32_inreg@rel32@hi+12
7064; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
7065; GFX9-NEXT:    v_readlane_b32 s31, v40, 2
7066; GFX9-NEXT:    v_readlane_b32 s30, v40, 1
7067; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
7068; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
7069; GFX9-NEXT:    v_readlane_b32 s33, v40, 3
7070; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
7071; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7072; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
7073; GFX9-NEXT:    s_waitcnt vmcnt(0)
7074; GFX9-NEXT:    s_setpc_b64 s[30:31]
7075;
7076; GFX10-LABEL: test_call_external_void_func_i32_imm_inreg:
7077; GFX10:       ; %bb.0:
7078; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7079; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
7080; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
7081; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7082; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
7083; GFX10-NEXT:    s_mov_b32 exec_lo, s34
7084; GFX10-NEXT:    v_writelane_b32 v40, s33, 3
7085; GFX10-NEXT:    s_mov_b32 s33, s32
7086; GFX10-NEXT:    s_addk_i32 s32, 0x200
7087; GFX10-NEXT:    s_getpc_b64 s[34:35]
7088; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_i32_inreg@rel32@lo+4
7089; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_i32_inreg@rel32@hi+12
7090; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
7091; GFX10-NEXT:    s_mov_b32 s4, 42
7092; GFX10-NEXT:    v_writelane_b32 v40, s30, 1
7093; GFX10-NEXT:    v_writelane_b32 v40, s31, 2
7094; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
7095; GFX10-NEXT:    v_readlane_b32 s31, v40, 2
7096; GFX10-NEXT:    v_readlane_b32 s30, v40, 1
7097; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
7098; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
7099; GFX10-NEXT:    v_readlane_b32 s33, v40, 3
7100; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
7101; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7102; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
7103; GFX10-NEXT:    s_mov_b32 exec_lo, s34
7104; GFX10-NEXT:    s_waitcnt vmcnt(0)
7105; GFX10-NEXT:    s_setpc_b64 s[30:31]
7106;
7107; GFX11-LABEL: test_call_external_void_func_i32_imm_inreg:
7108; GFX11:       ; %bb.0:
7109; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7110; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
7111; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
7112; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
7113; GFX11-NEXT:    s_mov_b32 exec_lo, s0
7114; GFX11-NEXT:    v_writelane_b32 v40, s33, 3
7115; GFX11-NEXT:    s_mov_b32 s33, s32
7116; GFX11-NEXT:    s_add_i32 s32, s32, 16
7117; GFX11-NEXT:    s_getpc_b64 s[0:1]
7118; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_i32_inreg@rel32@lo+4
7119; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_i32_inreg@rel32@hi+12
7120; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
7121; GFX11-NEXT:    s_mov_b32 s4, 42
7122; GFX11-NEXT:    v_writelane_b32 v40, s30, 1
7123; GFX11-NEXT:    v_writelane_b32 v40, s31, 2
7124; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
7125; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7126; GFX11-NEXT:    v_readlane_b32 s31, v40, 2
7127; GFX11-NEXT:    v_readlane_b32 s30, v40, 1
7128; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
7129; GFX11-NEXT:    s_add_i32 s32, s32, -16
7130; GFX11-NEXT:    v_readlane_b32 s33, v40, 3
7131; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
7132; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
7133; GFX11-NEXT:    s_mov_b32 exec_lo, s0
7134; GFX11-NEXT:    s_waitcnt vmcnt(0)
7135; GFX11-NEXT:    s_setpc_b64 s[30:31]
7136;
7137; GFX10-SCRATCH-LABEL: test_call_external_void_func_i32_imm_inreg:
7138; GFX10-SCRATCH:       ; %bb.0:
7139; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7140; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
7141; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
7142; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
7143; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
7144; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
7145; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 3
7146; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
7147; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
7148; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
7149; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_i32_inreg@rel32@lo+4
7150; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_i32_inreg@rel32@hi+12
7151; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
7152; GFX10-SCRATCH-NEXT:    s_mov_b32 s4, 42
7153; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 1
7154; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 2
7155; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
7156; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 2
7157; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 1
7158; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
7159; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
7160; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 3
7161; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
7162; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
7163; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
7164; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
7165; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
7166; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
7167  call amdgpu_gfx void @external_void_func_i32_inreg(i32 inreg 42)
7168  ret void
7169}
7170
7171define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 {
7172; GFX9-LABEL: test_call_external_void_func_i64_imm_inreg:
7173; GFX9:       ; %bb.0:
7174; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7175; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
7176; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7177; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
7178; GFX9-NEXT:    v_writelane_b32 v40, s33, 4
7179; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
7180; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
7181; GFX9-NEXT:    s_mov_b32 s33, s32
7182; GFX9-NEXT:    s_addk_i32 s32, 0x400
7183; GFX9-NEXT:    v_writelane_b32 v40, s30, 2
7184; GFX9-NEXT:    s_movk_i32 s4, 0x7b
7185; GFX9-NEXT:    s_mov_b32 s5, 0
7186; GFX9-NEXT:    v_writelane_b32 v40, s31, 3
7187; GFX9-NEXT:    s_getpc_b64 s[34:35]
7188; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_i64_inreg@rel32@lo+4
7189; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_i64_inreg@rel32@hi+12
7190; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
7191; GFX9-NEXT:    v_readlane_b32 s31, v40, 3
7192; GFX9-NEXT:    v_readlane_b32 s30, v40, 2
7193; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
7194; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
7195; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
7196; GFX9-NEXT:    v_readlane_b32 s33, v40, 4
7197; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
7198; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7199; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
7200; GFX9-NEXT:    s_waitcnt vmcnt(0)
7201; GFX9-NEXT:    s_setpc_b64 s[30:31]
7202;
7203; GFX10-LABEL: test_call_external_void_func_i64_imm_inreg:
7204; GFX10:       ; %bb.0:
7205; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7206; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
7207; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
7208; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7209; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
7210; GFX10-NEXT:    s_mov_b32 exec_lo, s34
7211; GFX10-NEXT:    v_writelane_b32 v40, s33, 4
7212; GFX10-NEXT:    s_mov_b32 s33, s32
7213; GFX10-NEXT:    s_addk_i32 s32, 0x200
7214; GFX10-NEXT:    s_getpc_b64 s[34:35]
7215; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_i64_inreg@rel32@lo+4
7216; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_i64_inreg@rel32@hi+12
7217; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
7218; GFX10-NEXT:    s_movk_i32 s4, 0x7b
7219; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
7220; GFX10-NEXT:    s_mov_b32 s5, 0
7221; GFX10-NEXT:    v_writelane_b32 v40, s30, 2
7222; GFX10-NEXT:    v_writelane_b32 v40, s31, 3
7223; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
7224; GFX10-NEXT:    v_readlane_b32 s31, v40, 3
7225; GFX10-NEXT:    v_readlane_b32 s30, v40, 2
7226; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
7227; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
7228; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
7229; GFX10-NEXT:    v_readlane_b32 s33, v40, 4
7230; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
7231; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7232; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
7233; GFX10-NEXT:    s_mov_b32 exec_lo, s34
7234; GFX10-NEXT:    s_waitcnt vmcnt(0)
7235; GFX10-NEXT:    s_setpc_b64 s[30:31]
7236;
7237; GFX11-LABEL: test_call_external_void_func_i64_imm_inreg:
7238; GFX11:       ; %bb.0:
7239; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7240; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
7241; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
7242; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
7243; GFX11-NEXT:    s_mov_b32 exec_lo, s0
7244; GFX11-NEXT:    v_writelane_b32 v40, s33, 4
7245; GFX11-NEXT:    s_mov_b32 s33, s32
7246; GFX11-NEXT:    s_add_i32 s32, s32, 16
7247; GFX11-NEXT:    s_getpc_b64 s[0:1]
7248; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_i64_inreg@rel32@lo+4
7249; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_i64_inreg@rel32@hi+12
7250; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
7251; GFX11-NEXT:    s_movk_i32 s4, 0x7b
7252; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
7253; GFX11-NEXT:    s_mov_b32 s5, 0
7254; GFX11-NEXT:    v_writelane_b32 v40, s30, 2
7255; GFX11-NEXT:    v_writelane_b32 v40, s31, 3
7256; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
7257; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7258; GFX11-NEXT:    v_readlane_b32 s31, v40, 3
7259; GFX11-NEXT:    v_readlane_b32 s30, v40, 2
7260; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
7261; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
7262; GFX11-NEXT:    s_add_i32 s32, s32, -16
7263; GFX11-NEXT:    v_readlane_b32 s33, v40, 4
7264; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
7265; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
7266; GFX11-NEXT:    s_mov_b32 exec_lo, s0
7267; GFX11-NEXT:    s_waitcnt vmcnt(0)
7268; GFX11-NEXT:    s_setpc_b64 s[30:31]
7269;
7270; GFX10-SCRATCH-LABEL: test_call_external_void_func_i64_imm_inreg:
7271; GFX10-SCRATCH:       ; %bb.0:
7272; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7273; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
7274; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
7275; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
7276; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
7277; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
7278; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 4
7279; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
7280; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
7281; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
7282; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_i64_inreg@rel32@lo+4
7283; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_i64_inreg@rel32@hi+12
7284; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
7285; GFX10-SCRATCH-NEXT:    s_movk_i32 s4, 0x7b
7286; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
7287; GFX10-SCRATCH-NEXT:    s_mov_b32 s5, 0
7288; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 2
7289; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 3
7290; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
7291; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 3
7292; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 2
7293; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
7294; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
7295; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
7296; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 4
7297; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
7298; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
7299; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
7300; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
7301; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
7302; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
7303  call amdgpu_gfx void @external_void_func_i64_inreg(i64 inreg 123)
7304  ret void
7305}
7306
7307define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 {
7308; GFX9-LABEL: test_call_external_void_func_v2i64_inreg:
7309; GFX9:       ; %bb.0:
7310; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7311; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
7312; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7313; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
7314; GFX9-NEXT:    v_writelane_b32 v40, s33, 6
7315; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
7316; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
7317; GFX9-NEXT:    v_writelane_b32 v40, s6, 2
7318; GFX9-NEXT:    s_mov_b64 s[34:35], 0
7319; GFX9-NEXT:    v_writelane_b32 v40, s7, 3
7320; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[34:35], 0x0
7321; GFX9-NEXT:    s_mov_b32 s33, s32
7322; GFX9-NEXT:    s_addk_i32 s32, 0x400
7323; GFX9-NEXT:    v_writelane_b32 v40, s30, 4
7324; GFX9-NEXT:    v_writelane_b32 v40, s31, 5
7325; GFX9-NEXT:    s_getpc_b64 s[34:35]
7326; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4
7327; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12
7328; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
7329; GFX9-NEXT:    v_readlane_b32 s31, v40, 5
7330; GFX9-NEXT:    v_readlane_b32 s30, v40, 4
7331; GFX9-NEXT:    v_readlane_b32 s7, v40, 3
7332; GFX9-NEXT:    v_readlane_b32 s6, v40, 2
7333; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
7334; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
7335; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
7336; GFX9-NEXT:    v_readlane_b32 s33, v40, 6
7337; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
7338; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7339; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
7340; GFX9-NEXT:    s_waitcnt vmcnt(0)
7341; GFX9-NEXT:    s_setpc_b64 s[30:31]
7342;
7343; GFX10-LABEL: test_call_external_void_func_v2i64_inreg:
7344; GFX10:       ; %bb.0:
7345; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7346; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
7347; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
7348; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7349; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
7350; GFX10-NEXT:    s_mov_b32 exec_lo, s34
7351; GFX10-NEXT:    v_writelane_b32 v40, s33, 6
7352; GFX10-NEXT:    s_mov_b64 s[34:35], 0
7353; GFX10-NEXT:    s_mov_b32 s33, s32
7354; GFX10-NEXT:    s_addk_i32 s32, 0x200
7355; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
7356; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
7357; GFX10-NEXT:    v_writelane_b32 v40, s6, 2
7358; GFX10-NEXT:    v_writelane_b32 v40, s7, 3
7359; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[34:35], 0x0
7360; GFX10-NEXT:    s_getpc_b64 s[34:35]
7361; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4
7362; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12
7363; GFX10-NEXT:    v_writelane_b32 v40, s30, 4
7364; GFX10-NEXT:    v_writelane_b32 v40, s31, 5
7365; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
7366; GFX10-NEXT:    v_readlane_b32 s31, v40, 5
7367; GFX10-NEXT:    v_readlane_b32 s30, v40, 4
7368; GFX10-NEXT:    v_readlane_b32 s7, v40, 3
7369; GFX10-NEXT:    v_readlane_b32 s6, v40, 2
7370; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
7371; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
7372; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
7373; GFX10-NEXT:    v_readlane_b32 s33, v40, 6
7374; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
7375; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7376; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
7377; GFX10-NEXT:    s_mov_b32 exec_lo, s34
7378; GFX10-NEXT:    s_waitcnt vmcnt(0)
7379; GFX10-NEXT:    s_setpc_b64 s[30:31]
7380;
7381; GFX11-LABEL: test_call_external_void_func_v2i64_inreg:
7382; GFX11:       ; %bb.0:
7383; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7384; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
7385; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
7386; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
7387; GFX11-NEXT:    s_mov_b32 exec_lo, s0
7388; GFX11-NEXT:    v_writelane_b32 v40, s33, 6
7389; GFX11-NEXT:    s_mov_b64 s[0:1], 0
7390; GFX11-NEXT:    s_mov_b32 s33, s32
7391; GFX11-NEXT:    s_add_i32 s32, s32, 16
7392; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
7393; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
7394; GFX11-NEXT:    v_writelane_b32 v40, s6, 2
7395; GFX11-NEXT:    v_writelane_b32 v40, s7, 3
7396; GFX11-NEXT:    s_load_b128 s[4:7], s[0:1], 0x0
7397; GFX11-NEXT:    s_getpc_b64 s[0:1]
7398; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v2i64_inreg@rel32@lo+4
7399; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v2i64_inreg@rel32@hi+12
7400; GFX11-NEXT:    v_writelane_b32 v40, s30, 4
7401; GFX11-NEXT:    v_writelane_b32 v40, s31, 5
7402; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
7403; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7404; GFX11-NEXT:    v_readlane_b32 s31, v40, 5
7405; GFX11-NEXT:    v_readlane_b32 s30, v40, 4
7406; GFX11-NEXT:    v_readlane_b32 s7, v40, 3
7407; GFX11-NEXT:    v_readlane_b32 s6, v40, 2
7408; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
7409; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
7410; GFX11-NEXT:    s_add_i32 s32, s32, -16
7411; GFX11-NEXT:    v_readlane_b32 s33, v40, 6
7412; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
7413; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
7414; GFX11-NEXT:    s_mov_b32 exec_lo, s0
7415; GFX11-NEXT:    s_waitcnt vmcnt(0)
7416; GFX11-NEXT:    s_setpc_b64 s[30:31]
7417;
7418; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64_inreg:
7419; GFX10-SCRATCH:       ; %bb.0:
7420; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7421; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
7422; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
7423; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
7424; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
7425; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
7426; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 6
7427; GFX10-SCRATCH-NEXT:    s_mov_b64 s[0:1], 0
7428; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
7429; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
7430; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
7431; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
7432; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s6, 2
7433; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s7, 3
7434; GFX10-SCRATCH-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x0
7435; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
7436; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v2i64_inreg@rel32@lo+4
7437; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v2i64_inreg@rel32@hi+12
7438; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 4
7439; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 5
7440; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
7441; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 5
7442; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 4
7443; GFX10-SCRATCH-NEXT:    v_readlane_b32 s7, v40, 3
7444; GFX10-SCRATCH-NEXT:    v_readlane_b32 s6, v40, 2
7445; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
7446; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
7447; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
7448; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 6
7449; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
7450; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
7451; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
7452; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
7453; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
7454; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
7455  %val = load <2 x i64>, <2 x i64> addrspace(4)* null
7456  call amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> inreg %val)
7457  ret void
7458}
7459
7460define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 {
7461; GFX9-LABEL: test_call_external_void_func_v2i64_imm_inreg:
7462; GFX9:       ; %bb.0:
7463; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7464; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
7465; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7466; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
7467; GFX9-NEXT:    v_writelane_b32 v40, s33, 6
7468; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
7469; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
7470; GFX9-NEXT:    v_writelane_b32 v40, s6, 2
7471; GFX9-NEXT:    v_writelane_b32 v40, s7, 3
7472; GFX9-NEXT:    s_mov_b32 s33, s32
7473; GFX9-NEXT:    s_addk_i32 s32, 0x400
7474; GFX9-NEXT:    v_writelane_b32 v40, s30, 4
7475; GFX9-NEXT:    s_mov_b32 s4, 1
7476; GFX9-NEXT:    s_mov_b32 s5, 2
7477; GFX9-NEXT:    s_mov_b32 s6, 3
7478; GFX9-NEXT:    s_mov_b32 s7, 4
7479; GFX9-NEXT:    v_writelane_b32 v40, s31, 5
7480; GFX9-NEXT:    s_getpc_b64 s[34:35]
7481; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4
7482; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12
7483; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
7484; GFX9-NEXT:    v_readlane_b32 s31, v40, 5
7485; GFX9-NEXT:    v_readlane_b32 s30, v40, 4
7486; GFX9-NEXT:    v_readlane_b32 s7, v40, 3
7487; GFX9-NEXT:    v_readlane_b32 s6, v40, 2
7488; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
7489; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
7490; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
7491; GFX9-NEXT:    v_readlane_b32 s33, v40, 6
7492; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
7493; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7494; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
7495; GFX9-NEXT:    s_waitcnt vmcnt(0)
7496; GFX9-NEXT:    s_setpc_b64 s[30:31]
7497;
7498; GFX10-LABEL: test_call_external_void_func_v2i64_imm_inreg:
7499; GFX10:       ; %bb.0:
7500; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7501; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
7502; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
7503; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7504; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
7505; GFX10-NEXT:    s_mov_b32 exec_lo, s34
7506; GFX10-NEXT:    v_writelane_b32 v40, s33, 6
7507; GFX10-NEXT:    s_mov_b32 s33, s32
7508; GFX10-NEXT:    s_addk_i32 s32, 0x200
7509; GFX10-NEXT:    s_getpc_b64 s[34:35]
7510; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4
7511; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12
7512; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
7513; GFX10-NEXT:    s_mov_b32 s4, 1
7514; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
7515; GFX10-NEXT:    s_mov_b32 s5, 2
7516; GFX10-NEXT:    v_writelane_b32 v40, s6, 2
7517; GFX10-NEXT:    s_mov_b32 s6, 3
7518; GFX10-NEXT:    v_writelane_b32 v40, s7, 3
7519; GFX10-NEXT:    s_mov_b32 s7, 4
7520; GFX10-NEXT:    v_writelane_b32 v40, s30, 4
7521; GFX10-NEXT:    v_writelane_b32 v40, s31, 5
7522; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
7523; GFX10-NEXT:    v_readlane_b32 s31, v40, 5
7524; GFX10-NEXT:    v_readlane_b32 s30, v40, 4
7525; GFX10-NEXT:    v_readlane_b32 s7, v40, 3
7526; GFX10-NEXT:    v_readlane_b32 s6, v40, 2
7527; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
7528; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
7529; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
7530; GFX10-NEXT:    v_readlane_b32 s33, v40, 6
7531; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
7532; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7533; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
7534; GFX10-NEXT:    s_mov_b32 exec_lo, s34
7535; GFX10-NEXT:    s_waitcnt vmcnt(0)
7536; GFX10-NEXT:    s_setpc_b64 s[30:31]
7537;
7538; GFX11-LABEL: test_call_external_void_func_v2i64_imm_inreg:
7539; GFX11:       ; %bb.0:
7540; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7541; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
7542; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
7543; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
7544; GFX11-NEXT:    s_mov_b32 exec_lo, s0
7545; GFX11-NEXT:    v_writelane_b32 v40, s33, 6
7546; GFX11-NEXT:    s_mov_b32 s33, s32
7547; GFX11-NEXT:    s_add_i32 s32, s32, 16
7548; GFX11-NEXT:    s_getpc_b64 s[0:1]
7549; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v2i64_inreg@rel32@lo+4
7550; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v2i64_inreg@rel32@hi+12
7551; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
7552; GFX11-NEXT:    s_mov_b32 s4, 1
7553; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
7554; GFX11-NEXT:    s_mov_b32 s5, 2
7555; GFX11-NEXT:    v_writelane_b32 v40, s6, 2
7556; GFX11-NEXT:    s_mov_b32 s6, 3
7557; GFX11-NEXT:    v_writelane_b32 v40, s7, 3
7558; GFX11-NEXT:    s_mov_b32 s7, 4
7559; GFX11-NEXT:    v_writelane_b32 v40, s30, 4
7560; GFX11-NEXT:    v_writelane_b32 v40, s31, 5
7561; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
7562; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7563; GFX11-NEXT:    v_readlane_b32 s31, v40, 5
7564; GFX11-NEXT:    v_readlane_b32 s30, v40, 4
7565; GFX11-NEXT:    v_readlane_b32 s7, v40, 3
7566; GFX11-NEXT:    v_readlane_b32 s6, v40, 2
7567; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
7568; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
7569; GFX11-NEXT:    s_add_i32 s32, s32, -16
7570; GFX11-NEXT:    v_readlane_b32 s33, v40, 6
7571; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
7572; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
7573; GFX11-NEXT:    s_mov_b32 exec_lo, s0
7574; GFX11-NEXT:    s_waitcnt vmcnt(0)
7575; GFX11-NEXT:    s_setpc_b64 s[30:31]
7576;
7577; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64_imm_inreg:
7578; GFX10-SCRATCH:       ; %bb.0:
7579; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7580; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
7581; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
7582; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
7583; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
7584; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
7585; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 6
7586; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
7587; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
7588; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
7589; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v2i64_inreg@rel32@lo+4
7590; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v2i64_inreg@rel32@hi+12
7591; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
7592; GFX10-SCRATCH-NEXT:    s_mov_b32 s4, 1
7593; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
7594; GFX10-SCRATCH-NEXT:    s_mov_b32 s5, 2
7595; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s6, 2
7596; GFX10-SCRATCH-NEXT:    s_mov_b32 s6, 3
7597; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s7, 3
7598; GFX10-SCRATCH-NEXT:    s_mov_b32 s7, 4
7599; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 4
7600; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 5
7601; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
7602; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 5
7603; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 4
7604; GFX10-SCRATCH-NEXT:    v_readlane_b32 s7, v40, 3
7605; GFX10-SCRATCH-NEXT:    v_readlane_b32 s6, v40, 2
7606; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
7607; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
7608; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
7609; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 6
7610; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
7611; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
7612; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
7613; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
7614; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
7615; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
7616  call amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> inreg <i64 8589934593, i64 17179869187>)
7617  ret void
7618}
7619
7620define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 {
7621; GFX9-LABEL: test_call_external_void_func_v3i64_inreg:
7622; GFX9:       ; %bb.0:
7623; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7624; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
7625; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7626; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
7627; GFX9-NEXT:    v_writelane_b32 v40, s33, 8
7628; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
7629; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
7630; GFX9-NEXT:    v_writelane_b32 v40, s6, 2
7631; GFX9-NEXT:    s_mov_b64 s[34:35], 0
7632; GFX9-NEXT:    v_writelane_b32 v40, s7, 3
7633; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[34:35], 0x0
7634; GFX9-NEXT:    v_writelane_b32 v40, s8, 4
7635; GFX9-NEXT:    v_writelane_b32 v40, s9, 5
7636; GFX9-NEXT:    s_mov_b32 s33, s32
7637; GFX9-NEXT:    s_addk_i32 s32, 0x400
7638; GFX9-NEXT:    v_writelane_b32 v40, s30, 6
7639; GFX9-NEXT:    s_mov_b32 s8, 1
7640; GFX9-NEXT:    s_mov_b32 s9, 2
7641; GFX9-NEXT:    v_writelane_b32 v40, s31, 7
7642; GFX9-NEXT:    s_getpc_b64 s[34:35]
7643; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v3i64_inreg@rel32@lo+4
7644; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v3i64_inreg@rel32@hi+12
7645; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
7646; GFX9-NEXT:    v_readlane_b32 s31, v40, 7
7647; GFX9-NEXT:    v_readlane_b32 s30, v40, 6
7648; GFX9-NEXT:    v_readlane_b32 s9, v40, 5
7649; GFX9-NEXT:    v_readlane_b32 s8, v40, 4
7650; GFX9-NEXT:    v_readlane_b32 s7, v40, 3
7651; GFX9-NEXT:    v_readlane_b32 s6, v40, 2
7652; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
7653; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
7654; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
7655; GFX9-NEXT:    v_readlane_b32 s33, v40, 8
7656; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
7657; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7658; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
7659; GFX9-NEXT:    s_waitcnt vmcnt(0)
7660; GFX9-NEXT:    s_setpc_b64 s[30:31]
7661;
7662; GFX10-LABEL: test_call_external_void_func_v3i64_inreg:
7663; GFX10:       ; %bb.0:
7664; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7665; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
7666; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
7667; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7668; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
7669; GFX10-NEXT:    s_mov_b32 exec_lo, s34
7670; GFX10-NEXT:    v_writelane_b32 v40, s33, 8
7671; GFX10-NEXT:    s_mov_b64 s[34:35], 0
7672; GFX10-NEXT:    s_mov_b32 s33, s32
7673; GFX10-NEXT:    s_addk_i32 s32, 0x200
7674; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
7675; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
7676; GFX10-NEXT:    v_writelane_b32 v40, s6, 2
7677; GFX10-NEXT:    v_writelane_b32 v40, s7, 3
7678; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[34:35], 0x0
7679; GFX10-NEXT:    s_getpc_b64 s[34:35]
7680; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v3i64_inreg@rel32@lo+4
7681; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v3i64_inreg@rel32@hi+12
7682; GFX10-NEXT:    v_writelane_b32 v40, s8, 4
7683; GFX10-NEXT:    s_mov_b32 s8, 1
7684; GFX10-NEXT:    v_writelane_b32 v40, s9, 5
7685; GFX10-NEXT:    s_mov_b32 s9, 2
7686; GFX10-NEXT:    v_writelane_b32 v40, s30, 6
7687; GFX10-NEXT:    v_writelane_b32 v40, s31, 7
7688; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
7689; GFX10-NEXT:    v_readlane_b32 s31, v40, 7
7690; GFX10-NEXT:    v_readlane_b32 s30, v40, 6
7691; GFX10-NEXT:    v_readlane_b32 s9, v40, 5
7692; GFX10-NEXT:    v_readlane_b32 s8, v40, 4
7693; GFX10-NEXT:    v_readlane_b32 s7, v40, 3
7694; GFX10-NEXT:    v_readlane_b32 s6, v40, 2
7695; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
7696; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
7697; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
7698; GFX10-NEXT:    v_readlane_b32 s33, v40, 8
7699; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
7700; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7701; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
7702; GFX10-NEXT:    s_mov_b32 exec_lo, s34
7703; GFX10-NEXT:    s_waitcnt vmcnt(0)
7704; GFX10-NEXT:    s_setpc_b64 s[30:31]
7705;
7706; GFX11-LABEL: test_call_external_void_func_v3i64_inreg:
7707; GFX11:       ; %bb.0:
7708; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7709; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
7710; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
7711; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
7712; GFX11-NEXT:    s_mov_b32 exec_lo, s0
7713; GFX11-NEXT:    v_writelane_b32 v40, s33, 8
7714; GFX11-NEXT:    s_mov_b64 s[0:1], 0
7715; GFX11-NEXT:    s_mov_b32 s33, s32
7716; GFX11-NEXT:    s_add_i32 s32, s32, 16
7717; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
7718; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
7719; GFX11-NEXT:    v_writelane_b32 v40, s6, 2
7720; GFX11-NEXT:    v_writelane_b32 v40, s7, 3
7721; GFX11-NEXT:    s_load_b128 s[4:7], s[0:1], 0x0
7722; GFX11-NEXT:    s_getpc_b64 s[0:1]
7723; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v3i64_inreg@rel32@lo+4
7724; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v3i64_inreg@rel32@hi+12
7725; GFX11-NEXT:    v_writelane_b32 v40, s8, 4
7726; GFX11-NEXT:    s_mov_b32 s8, 1
7727; GFX11-NEXT:    v_writelane_b32 v40, s9, 5
7728; GFX11-NEXT:    s_mov_b32 s9, 2
7729; GFX11-NEXT:    v_writelane_b32 v40, s30, 6
7730; GFX11-NEXT:    v_writelane_b32 v40, s31, 7
7731; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
7732; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7733; GFX11-NEXT:    v_readlane_b32 s31, v40, 7
7734; GFX11-NEXT:    v_readlane_b32 s30, v40, 6
7735; GFX11-NEXT:    v_readlane_b32 s9, v40, 5
7736; GFX11-NEXT:    v_readlane_b32 s8, v40, 4
7737; GFX11-NEXT:    v_readlane_b32 s7, v40, 3
7738; GFX11-NEXT:    v_readlane_b32 s6, v40, 2
7739; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
7740; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
7741; GFX11-NEXT:    s_add_i32 s32, s32, -16
7742; GFX11-NEXT:    v_readlane_b32 s33, v40, 8
7743; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
7744; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
7745; GFX11-NEXT:    s_mov_b32 exec_lo, s0
7746; GFX11-NEXT:    s_waitcnt vmcnt(0)
7747; GFX11-NEXT:    s_setpc_b64 s[30:31]
7748;
7749; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i64_inreg:
7750; GFX10-SCRATCH:       ; %bb.0:
7751; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7752; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
7753; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
7754; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
7755; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
7756; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
7757; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 8
7758; GFX10-SCRATCH-NEXT:    s_mov_b64 s[0:1], 0
7759; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
7760; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
7761; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
7762; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
7763; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s6, 2
7764; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s7, 3
7765; GFX10-SCRATCH-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x0
7766; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
7767; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v3i64_inreg@rel32@lo+4
7768; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v3i64_inreg@rel32@hi+12
7769; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s8, 4
7770; GFX10-SCRATCH-NEXT:    s_mov_b32 s8, 1
7771; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s9, 5
7772; GFX10-SCRATCH-NEXT:    s_mov_b32 s9, 2
7773; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 6
7774; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 7
7775; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
7776; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 7
7777; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 6
7778; GFX10-SCRATCH-NEXT:    v_readlane_b32 s9, v40, 5
7779; GFX10-SCRATCH-NEXT:    v_readlane_b32 s8, v40, 4
7780; GFX10-SCRATCH-NEXT:    v_readlane_b32 s7, v40, 3
7781; GFX10-SCRATCH-NEXT:    v_readlane_b32 s6, v40, 2
7782; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
7783; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
7784; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
7785; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 8
7786; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
7787; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
7788; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
7789; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
7790; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
7791; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
7792  %load = load <2 x i64>, <2 x i64> addrspace(4)* null
7793  %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
7794
7795  call amdgpu_gfx void @external_void_func_v3i64_inreg(<3 x i64> inreg %val)
7796  ret void
7797}
7798
7799define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 {
7800; GFX9-LABEL: test_call_external_void_func_v4i64_inreg:
7801; GFX9:       ; %bb.0:
7802; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7803; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
7804; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7805; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
7806; GFX9-NEXT:    v_writelane_b32 v40, s33, 10
7807; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
7808; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
7809; GFX9-NEXT:    v_writelane_b32 v40, s6, 2
7810; GFX9-NEXT:    v_writelane_b32 v40, s7, 3
7811; GFX9-NEXT:    s_mov_b64 s[34:35], 0
7812; GFX9-NEXT:    v_writelane_b32 v40, s8, 4
7813; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[34:35], 0x0
7814; GFX9-NEXT:    v_writelane_b32 v40, s9, 5
7815; GFX9-NEXT:    v_writelane_b32 v40, s10, 6
7816; GFX9-NEXT:    v_writelane_b32 v40, s11, 7
7817; GFX9-NEXT:    s_mov_b32 s33, s32
7818; GFX9-NEXT:    s_addk_i32 s32, 0x400
7819; GFX9-NEXT:    v_writelane_b32 v40, s30, 8
7820; GFX9-NEXT:    s_mov_b32 s8, 1
7821; GFX9-NEXT:    s_mov_b32 s9, 2
7822; GFX9-NEXT:    s_mov_b32 s10, 3
7823; GFX9-NEXT:    s_mov_b32 s11, 4
7824; GFX9-NEXT:    v_writelane_b32 v40, s31, 9
7825; GFX9-NEXT:    s_getpc_b64 s[34:35]
7826; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v4i64_inreg@rel32@lo+4
7827; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v4i64_inreg@rel32@hi+12
7828; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
7829; GFX9-NEXT:    v_readlane_b32 s31, v40, 9
7830; GFX9-NEXT:    v_readlane_b32 s30, v40, 8
7831; GFX9-NEXT:    v_readlane_b32 s11, v40, 7
7832; GFX9-NEXT:    v_readlane_b32 s10, v40, 6
7833; GFX9-NEXT:    v_readlane_b32 s9, v40, 5
7834; GFX9-NEXT:    v_readlane_b32 s8, v40, 4
7835; GFX9-NEXT:    v_readlane_b32 s7, v40, 3
7836; GFX9-NEXT:    v_readlane_b32 s6, v40, 2
7837; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
7838; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
7839; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
7840; GFX9-NEXT:    v_readlane_b32 s33, v40, 10
7841; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
7842; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7843; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
7844; GFX9-NEXT:    s_waitcnt vmcnt(0)
7845; GFX9-NEXT:    s_setpc_b64 s[30:31]
7846;
7847; GFX10-LABEL: test_call_external_void_func_v4i64_inreg:
7848; GFX10:       ; %bb.0:
7849; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7850; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
7851; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
7852; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
7853; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
7854; GFX10-NEXT:    s_mov_b32 exec_lo, s34
7855; GFX10-NEXT:    v_writelane_b32 v40, s33, 10
7856; GFX10-NEXT:    s_mov_b64 s[34:35], 0
7857; GFX10-NEXT:    s_mov_b32 s33, s32
7858; GFX10-NEXT:    s_addk_i32 s32, 0x200
7859; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
7860; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
7861; GFX10-NEXT:    v_writelane_b32 v40, s6, 2
7862; GFX10-NEXT:    v_writelane_b32 v40, s7, 3
7863; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[34:35], 0x0
7864; GFX10-NEXT:    s_getpc_b64 s[34:35]
7865; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v4i64_inreg@rel32@lo+4
7866; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v4i64_inreg@rel32@hi+12
7867; GFX10-NEXT:    v_writelane_b32 v40, s8, 4
7868; GFX10-NEXT:    s_mov_b32 s8, 1
7869; GFX10-NEXT:    v_writelane_b32 v40, s9, 5
7870; GFX10-NEXT:    s_mov_b32 s9, 2
7871; GFX10-NEXT:    v_writelane_b32 v40, s10, 6
7872; GFX10-NEXT:    s_mov_b32 s10, 3
7873; GFX10-NEXT:    v_writelane_b32 v40, s11, 7
7874; GFX10-NEXT:    s_mov_b32 s11, 4
7875; GFX10-NEXT:    v_writelane_b32 v40, s30, 8
7876; GFX10-NEXT:    v_writelane_b32 v40, s31, 9
7877; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
7878; GFX10-NEXT:    v_readlane_b32 s31, v40, 9
7879; GFX10-NEXT:    v_readlane_b32 s30, v40, 8
7880; GFX10-NEXT:    v_readlane_b32 s11, v40, 7
7881; GFX10-NEXT:    v_readlane_b32 s10, v40, 6
7882; GFX10-NEXT:    v_readlane_b32 s9, v40, 5
7883; GFX10-NEXT:    v_readlane_b32 s8, v40, 4
7884; GFX10-NEXT:    v_readlane_b32 s7, v40, 3
7885; GFX10-NEXT:    v_readlane_b32 s6, v40, 2
7886; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
7887; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
7888; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
7889; GFX10-NEXT:    v_readlane_b32 s33, v40, 10
7890; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
7891; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
7892; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
7893; GFX10-NEXT:    s_mov_b32 exec_lo, s34
7894; GFX10-NEXT:    s_waitcnt vmcnt(0)
7895; GFX10-NEXT:    s_setpc_b64 s[30:31]
7896;
7897; GFX11-LABEL: test_call_external_void_func_v4i64_inreg:
7898; GFX11:       ; %bb.0:
7899; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7900; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
7901; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
7902; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
7903; GFX11-NEXT:    s_mov_b32 exec_lo, s0
7904; GFX11-NEXT:    v_writelane_b32 v40, s33, 10
7905; GFX11-NEXT:    s_mov_b64 s[0:1], 0
7906; GFX11-NEXT:    s_mov_b32 s33, s32
7907; GFX11-NEXT:    s_add_i32 s32, s32, 16
7908; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
7909; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
7910; GFX11-NEXT:    v_writelane_b32 v40, s6, 2
7911; GFX11-NEXT:    v_writelane_b32 v40, s7, 3
7912; GFX11-NEXT:    s_load_b128 s[4:7], s[0:1], 0x0
7913; GFX11-NEXT:    s_getpc_b64 s[0:1]
7914; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v4i64_inreg@rel32@lo+4
7915; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v4i64_inreg@rel32@hi+12
7916; GFX11-NEXT:    v_writelane_b32 v40, s8, 4
7917; GFX11-NEXT:    s_mov_b32 s8, 1
7918; GFX11-NEXT:    v_writelane_b32 v40, s9, 5
7919; GFX11-NEXT:    s_mov_b32 s9, 2
7920; GFX11-NEXT:    v_writelane_b32 v40, s10, 6
7921; GFX11-NEXT:    s_mov_b32 s10, 3
7922; GFX11-NEXT:    v_writelane_b32 v40, s11, 7
7923; GFX11-NEXT:    s_mov_b32 s11, 4
7924; GFX11-NEXT:    v_writelane_b32 v40, s30, 8
7925; GFX11-NEXT:    v_writelane_b32 v40, s31, 9
7926; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
7927; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7928; GFX11-NEXT:    v_readlane_b32 s31, v40, 9
7929; GFX11-NEXT:    v_readlane_b32 s30, v40, 8
7930; GFX11-NEXT:    v_readlane_b32 s11, v40, 7
7931; GFX11-NEXT:    v_readlane_b32 s10, v40, 6
7932; GFX11-NEXT:    v_readlane_b32 s9, v40, 5
7933; GFX11-NEXT:    v_readlane_b32 s8, v40, 4
7934; GFX11-NEXT:    v_readlane_b32 s7, v40, 3
7935; GFX11-NEXT:    v_readlane_b32 s6, v40, 2
7936; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
7937; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
7938; GFX11-NEXT:    s_add_i32 s32, s32, -16
7939; GFX11-NEXT:    v_readlane_b32 s33, v40, 10
7940; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
7941; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
7942; GFX11-NEXT:    s_mov_b32 exec_lo, s0
7943; GFX11-NEXT:    s_waitcnt vmcnt(0)
7944; GFX11-NEXT:    s_setpc_b64 s[30:31]
7945;
7946; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i64_inreg:
7947; GFX10-SCRATCH:       ; %bb.0:
7948; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7949; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
7950; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
7951; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
7952; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
7953; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
7954; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 10
7955; GFX10-SCRATCH-NEXT:    s_mov_b64 s[0:1], 0
7956; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
7957; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
7958; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
7959; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
7960; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s6, 2
7961; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s7, 3
7962; GFX10-SCRATCH-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x0
7963; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
7964; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v4i64_inreg@rel32@lo+4
7965; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v4i64_inreg@rel32@hi+12
7966; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s8, 4
7967; GFX10-SCRATCH-NEXT:    s_mov_b32 s8, 1
7968; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s9, 5
7969; GFX10-SCRATCH-NEXT:    s_mov_b32 s9, 2
7970; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s10, 6
7971; GFX10-SCRATCH-NEXT:    s_mov_b32 s10, 3
7972; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s11, 7
7973; GFX10-SCRATCH-NEXT:    s_mov_b32 s11, 4
7974; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 8
7975; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 9
7976; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
7977; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 9
7978; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 8
7979; GFX10-SCRATCH-NEXT:    v_readlane_b32 s11, v40, 7
7980; GFX10-SCRATCH-NEXT:    v_readlane_b32 s10, v40, 6
7981; GFX10-SCRATCH-NEXT:    v_readlane_b32 s9, v40, 5
7982; GFX10-SCRATCH-NEXT:    v_readlane_b32 s8, v40, 4
7983; GFX10-SCRATCH-NEXT:    v_readlane_b32 s7, v40, 3
7984; GFX10-SCRATCH-NEXT:    v_readlane_b32 s6, v40, 2
7985; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
7986; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
7987; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
7988; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 10
7989; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
7990; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
7991; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
7992; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
7993; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
7994; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
7995  %load = load <2 x i64>, <2 x i64> addrspace(4)* null
7996  %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 17179869187>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7997  call amdgpu_gfx void @external_void_func_v4i64_inreg(<4 x i64> inreg %val)
7998  ret void
7999}
8000
8001define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 {
8002; GFX9-LABEL: test_call_external_void_func_f16_imm_inreg:
8003; GFX9:       ; %bb.0:
8004; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8005; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
8006; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
8007; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
8008; GFX9-NEXT:    v_writelane_b32 v40, s33, 3
8009; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
8010; GFX9-NEXT:    s_mov_b32 s33, s32
8011; GFX9-NEXT:    s_addk_i32 s32, 0x400
8012; GFX9-NEXT:    v_writelane_b32 v40, s30, 1
8013; GFX9-NEXT:    s_movk_i32 s4, 0x4400
8014; GFX9-NEXT:    v_writelane_b32 v40, s31, 2
8015; GFX9-NEXT:    s_getpc_b64 s[34:35]
8016; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_f16_inreg@rel32@lo+4
8017; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_f16_inreg@rel32@hi+12
8018; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
8019; GFX9-NEXT:    v_readlane_b32 s31, v40, 2
8020; GFX9-NEXT:    v_readlane_b32 s30, v40, 1
8021; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
8022; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
8023; GFX9-NEXT:    v_readlane_b32 s33, v40, 3
8024; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
8025; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
8026; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
8027; GFX9-NEXT:    s_waitcnt vmcnt(0)
8028; GFX9-NEXT:    s_setpc_b64 s[30:31]
8029;
8030; GFX10-LABEL: test_call_external_void_func_f16_imm_inreg:
8031; GFX10:       ; %bb.0:
8032; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8033; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
8034; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
8035; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
8036; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
8037; GFX10-NEXT:    s_mov_b32 exec_lo, s34
8038; GFX10-NEXT:    v_writelane_b32 v40, s33, 3
8039; GFX10-NEXT:    s_mov_b32 s33, s32
8040; GFX10-NEXT:    s_addk_i32 s32, 0x200
8041; GFX10-NEXT:    s_getpc_b64 s[34:35]
8042; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_f16_inreg@rel32@lo+4
8043; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_f16_inreg@rel32@hi+12
8044; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
8045; GFX10-NEXT:    s_movk_i32 s4, 0x4400
8046; GFX10-NEXT:    v_writelane_b32 v40, s30, 1
8047; GFX10-NEXT:    v_writelane_b32 v40, s31, 2
8048; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
8049; GFX10-NEXT:    v_readlane_b32 s31, v40, 2
8050; GFX10-NEXT:    v_readlane_b32 s30, v40, 1
8051; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
8052; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
8053; GFX10-NEXT:    v_readlane_b32 s33, v40, 3
8054; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
8055; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
8056; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
8057; GFX10-NEXT:    s_mov_b32 exec_lo, s34
8058; GFX10-NEXT:    s_waitcnt vmcnt(0)
8059; GFX10-NEXT:    s_setpc_b64 s[30:31]
8060;
8061; GFX11-LABEL: test_call_external_void_func_f16_imm_inreg:
8062; GFX11:       ; %bb.0:
8063; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8064; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
8065; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
8066; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
8067; GFX11-NEXT:    s_mov_b32 exec_lo, s0
8068; GFX11-NEXT:    v_writelane_b32 v40, s33, 3
8069; GFX11-NEXT:    s_mov_b32 s33, s32
8070; GFX11-NEXT:    s_add_i32 s32, s32, 16
8071; GFX11-NEXT:    s_getpc_b64 s[0:1]
8072; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_f16_inreg@rel32@lo+4
8073; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_f16_inreg@rel32@hi+12
8074; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
8075; GFX11-NEXT:    s_movk_i32 s4, 0x4400
8076; GFX11-NEXT:    v_writelane_b32 v40, s30, 1
8077; GFX11-NEXT:    v_writelane_b32 v40, s31, 2
8078; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
8079; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
8080; GFX11-NEXT:    v_readlane_b32 s31, v40, 2
8081; GFX11-NEXT:    v_readlane_b32 s30, v40, 1
8082; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
8083; GFX11-NEXT:    s_add_i32 s32, s32, -16
8084; GFX11-NEXT:    v_readlane_b32 s33, v40, 3
8085; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
8086; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
8087; GFX11-NEXT:    s_mov_b32 exec_lo, s0
8088; GFX11-NEXT:    s_waitcnt vmcnt(0)
8089; GFX11-NEXT:    s_setpc_b64 s[30:31]
8090;
8091; GFX10-SCRATCH-LABEL: test_call_external_void_func_f16_imm_inreg:
8092; GFX10-SCRATCH:       ; %bb.0:
8093; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8094; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
8095; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
8096; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
8097; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
8098; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
8099; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 3
8100; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
8101; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
8102; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
8103; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_f16_inreg@rel32@lo+4
8104; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_f16_inreg@rel32@hi+12
8105; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
8106; GFX10-SCRATCH-NEXT:    s_movk_i32 s4, 0x4400
8107; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 1
8108; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 2
8109; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
8110; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 2
8111; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 1
8112; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
8113; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
8114; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 3
8115; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
8116; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
8117; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
8118; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
8119; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
8120; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
8121  call amdgpu_gfx void @external_void_func_f16_inreg(half inreg 4.0)
8122  ret void
8123}
8124
8125define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 {
8126; GFX9-LABEL: test_call_external_void_func_f32_imm_inreg:
8127; GFX9:       ; %bb.0:
8128; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8129; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
8130; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
8131; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
8132; GFX9-NEXT:    v_writelane_b32 v40, s33, 3
8133; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
8134; GFX9-NEXT:    s_mov_b32 s33, s32
8135; GFX9-NEXT:    s_addk_i32 s32, 0x400
8136; GFX9-NEXT:    v_writelane_b32 v40, s30, 1
8137; GFX9-NEXT:    s_mov_b32 s4, 4.0
8138; GFX9-NEXT:    v_writelane_b32 v40, s31, 2
8139; GFX9-NEXT:    s_getpc_b64 s[34:35]
8140; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_f32_inreg@rel32@lo+4
8141; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_f32_inreg@rel32@hi+12
8142; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
8143; GFX9-NEXT:    v_readlane_b32 s31, v40, 2
8144; GFX9-NEXT:    v_readlane_b32 s30, v40, 1
8145; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
8146; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
8147; GFX9-NEXT:    v_readlane_b32 s33, v40, 3
8148; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
8149; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
8150; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
8151; GFX9-NEXT:    s_waitcnt vmcnt(0)
8152; GFX9-NEXT:    s_setpc_b64 s[30:31]
8153;
8154; GFX10-LABEL: test_call_external_void_func_f32_imm_inreg:
8155; GFX10:       ; %bb.0:
8156; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8157; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
8158; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
8159; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
8160; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
8161; GFX10-NEXT:    s_mov_b32 exec_lo, s34
8162; GFX10-NEXT:    v_writelane_b32 v40, s33, 3
8163; GFX10-NEXT:    s_mov_b32 s33, s32
8164; GFX10-NEXT:    s_addk_i32 s32, 0x200
8165; GFX10-NEXT:    s_getpc_b64 s[34:35]
8166; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_f32_inreg@rel32@lo+4
8167; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_f32_inreg@rel32@hi+12
8168; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
8169; GFX10-NEXT:    s_mov_b32 s4, 4.0
8170; GFX10-NEXT:    v_writelane_b32 v40, s30, 1
8171; GFX10-NEXT:    v_writelane_b32 v40, s31, 2
8172; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
8173; GFX10-NEXT:    v_readlane_b32 s31, v40, 2
8174; GFX10-NEXT:    v_readlane_b32 s30, v40, 1
8175; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
8176; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
8177; GFX10-NEXT:    v_readlane_b32 s33, v40, 3
8178; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
8179; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
8180; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
8181; GFX10-NEXT:    s_mov_b32 exec_lo, s34
8182; GFX10-NEXT:    s_waitcnt vmcnt(0)
8183; GFX10-NEXT:    s_setpc_b64 s[30:31]
8184;
8185; GFX11-LABEL: test_call_external_void_func_f32_imm_inreg:
8186; GFX11:       ; %bb.0:
8187; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8188; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
8189; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
8190; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
8191; GFX11-NEXT:    s_mov_b32 exec_lo, s0
8192; GFX11-NEXT:    v_writelane_b32 v40, s33, 3
8193; GFX11-NEXT:    s_mov_b32 s33, s32
8194; GFX11-NEXT:    s_add_i32 s32, s32, 16
8195; GFX11-NEXT:    s_getpc_b64 s[0:1]
8196; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_f32_inreg@rel32@lo+4
8197; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_f32_inreg@rel32@hi+12
8198; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
8199; GFX11-NEXT:    s_mov_b32 s4, 4.0
8200; GFX11-NEXT:    v_writelane_b32 v40, s30, 1
8201; GFX11-NEXT:    v_writelane_b32 v40, s31, 2
8202; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
8203; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
8204; GFX11-NEXT:    v_readlane_b32 s31, v40, 2
8205; GFX11-NEXT:    v_readlane_b32 s30, v40, 1
8206; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
8207; GFX11-NEXT:    s_add_i32 s32, s32, -16
8208; GFX11-NEXT:    v_readlane_b32 s33, v40, 3
8209; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
8210; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
8211; GFX11-NEXT:    s_mov_b32 exec_lo, s0
8212; GFX11-NEXT:    s_waitcnt vmcnt(0)
8213; GFX11-NEXT:    s_setpc_b64 s[30:31]
8214;
8215; GFX10-SCRATCH-LABEL: test_call_external_void_func_f32_imm_inreg:
8216; GFX10-SCRATCH:       ; %bb.0:
8217; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8218; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
8219; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
8220; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
8221; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
8222; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
8223; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 3
8224; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
8225; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
8226; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
8227; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_f32_inreg@rel32@lo+4
8228; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_f32_inreg@rel32@hi+12
8229; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
8230; GFX10-SCRATCH-NEXT:    s_mov_b32 s4, 4.0
8231; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 1
8232; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 2
8233; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
8234; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 2
8235; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 1
8236; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
8237; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
8238; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 3
8239; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
8240; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
8241; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
8242; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
8243; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
8244; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
8245  call amdgpu_gfx void @external_void_func_f32_inreg(float inreg 4.0)
8246  ret void
8247}
8248
8249define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 {
8250; GFX9-LABEL: test_call_external_void_func_v2f32_imm_inreg:
8251; GFX9:       ; %bb.0:
8252; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8253; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
8254; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
8255; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
8256; GFX9-NEXT:    v_writelane_b32 v40, s33, 4
8257; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
8258; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
8259; GFX9-NEXT:    s_mov_b32 s33, s32
8260; GFX9-NEXT:    s_addk_i32 s32, 0x400
8261; GFX9-NEXT:    v_writelane_b32 v40, s30, 2
8262; GFX9-NEXT:    s_mov_b32 s4, 1.0
8263; GFX9-NEXT:    s_mov_b32 s5, 2.0
8264; GFX9-NEXT:    v_writelane_b32 v40, s31, 3
8265; GFX9-NEXT:    s_getpc_b64 s[34:35]
8266; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v2f32_inreg@rel32@lo+4
8267; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v2f32_inreg@rel32@hi+12
8268; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
8269; GFX9-NEXT:    v_readlane_b32 s31, v40, 3
8270; GFX9-NEXT:    v_readlane_b32 s30, v40, 2
8271; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
8272; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
8273; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
8274; GFX9-NEXT:    v_readlane_b32 s33, v40, 4
8275; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
8276; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
8277; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
8278; GFX9-NEXT:    s_waitcnt vmcnt(0)
8279; GFX9-NEXT:    s_setpc_b64 s[30:31]
8280;
8281; GFX10-LABEL: test_call_external_void_func_v2f32_imm_inreg:
8282; GFX10:       ; %bb.0:
8283; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8284; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
8285; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
8286; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
8287; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
8288; GFX10-NEXT:    s_mov_b32 exec_lo, s34
8289; GFX10-NEXT:    v_writelane_b32 v40, s33, 4
8290; GFX10-NEXT:    s_mov_b32 s33, s32
8291; GFX10-NEXT:    s_addk_i32 s32, 0x200
8292; GFX10-NEXT:    s_getpc_b64 s[34:35]
8293; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v2f32_inreg@rel32@lo+4
8294; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v2f32_inreg@rel32@hi+12
8295; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
8296; GFX10-NEXT:    s_mov_b32 s4, 1.0
8297; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
8298; GFX10-NEXT:    s_mov_b32 s5, 2.0
8299; GFX10-NEXT:    v_writelane_b32 v40, s30, 2
8300; GFX10-NEXT:    v_writelane_b32 v40, s31, 3
8301; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
8302; GFX10-NEXT:    v_readlane_b32 s31, v40, 3
8303; GFX10-NEXT:    v_readlane_b32 s30, v40, 2
8304; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
8305; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
8306; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
8307; GFX10-NEXT:    v_readlane_b32 s33, v40, 4
8308; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
8309; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
8310; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
8311; GFX10-NEXT:    s_mov_b32 exec_lo, s34
8312; GFX10-NEXT:    s_waitcnt vmcnt(0)
8313; GFX10-NEXT:    s_setpc_b64 s[30:31]
8314;
8315; GFX11-LABEL: test_call_external_void_func_v2f32_imm_inreg:
8316; GFX11:       ; %bb.0:
8317; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8318; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
8319; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
8320; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
8321; GFX11-NEXT:    s_mov_b32 exec_lo, s0
8322; GFX11-NEXT:    v_writelane_b32 v40, s33, 4
8323; GFX11-NEXT:    s_mov_b32 s33, s32
8324; GFX11-NEXT:    s_add_i32 s32, s32, 16
8325; GFX11-NEXT:    s_getpc_b64 s[0:1]
8326; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v2f32_inreg@rel32@lo+4
8327; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v2f32_inreg@rel32@hi+12
8328; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
8329; GFX11-NEXT:    s_mov_b32 s4, 1.0
8330; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
8331; GFX11-NEXT:    s_mov_b32 s5, 2.0
8332; GFX11-NEXT:    v_writelane_b32 v40, s30, 2
8333; GFX11-NEXT:    v_writelane_b32 v40, s31, 3
8334; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
8335; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
8336; GFX11-NEXT:    v_readlane_b32 s31, v40, 3
8337; GFX11-NEXT:    v_readlane_b32 s30, v40, 2
8338; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
8339; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
8340; GFX11-NEXT:    s_add_i32 s32, s32, -16
8341; GFX11-NEXT:    v_readlane_b32 s33, v40, 4
8342; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
8343; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
8344; GFX11-NEXT:    s_mov_b32 exec_lo, s0
8345; GFX11-NEXT:    s_waitcnt vmcnt(0)
8346; GFX11-NEXT:    s_setpc_b64 s[30:31]
8347;
8348; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f32_imm_inreg:
8349; GFX10-SCRATCH:       ; %bb.0:
8350; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8351; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
8352; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
8353; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
8354; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
8355; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
8356; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 4
8357; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
8358; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
8359; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
8360; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v2f32_inreg@rel32@lo+4
8361; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v2f32_inreg@rel32@hi+12
8362; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
8363; GFX10-SCRATCH-NEXT:    s_mov_b32 s4, 1.0
8364; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
8365; GFX10-SCRATCH-NEXT:    s_mov_b32 s5, 2.0
8366; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 2
8367; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 3
8368; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
8369; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 3
8370; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 2
8371; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
8372; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
8373; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
8374; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 4
8375; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
8376; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
8377; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
8378; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
8379; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
8380; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
8381  call amdgpu_gfx void @external_void_func_v2f32_inreg(<2 x float> inreg <float 1.0, float 2.0>)
8382  ret void
8383}
8384
8385define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 {
8386; GFX9-LABEL: test_call_external_void_func_v3f32_imm_inreg:
8387; GFX9:       ; %bb.0:
8388; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8389; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
8390; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
8391; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
8392; GFX9-NEXT:    v_writelane_b32 v40, s33, 5
8393; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
8394; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
8395; GFX9-NEXT:    v_writelane_b32 v40, s6, 2
8396; GFX9-NEXT:    s_mov_b32 s33, s32
8397; GFX9-NEXT:    s_addk_i32 s32, 0x400
8398; GFX9-NEXT:    v_writelane_b32 v40, s30, 3
8399; GFX9-NEXT:    s_mov_b32 s4, 1.0
8400; GFX9-NEXT:    s_mov_b32 s5, 2.0
8401; GFX9-NEXT:    s_mov_b32 s6, 4.0
8402; GFX9-NEXT:    v_writelane_b32 v40, s31, 4
8403; GFX9-NEXT:    s_getpc_b64 s[34:35]
8404; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v3f32_inreg@rel32@lo+4
8405; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v3f32_inreg@rel32@hi+12
8406; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
8407; GFX9-NEXT:    v_readlane_b32 s31, v40, 4
8408; GFX9-NEXT:    v_readlane_b32 s30, v40, 3
8409; GFX9-NEXT:    v_readlane_b32 s6, v40, 2
8410; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
8411; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
8412; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
8413; GFX9-NEXT:    v_readlane_b32 s33, v40, 5
8414; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
8415; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
8416; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
8417; GFX9-NEXT:    s_waitcnt vmcnt(0)
8418; GFX9-NEXT:    s_setpc_b64 s[30:31]
8419;
8420; GFX10-LABEL: test_call_external_void_func_v3f32_imm_inreg:
8421; GFX10:       ; %bb.0:
8422; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8423; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
8424; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
8425; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
8426; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
8427; GFX10-NEXT:    s_mov_b32 exec_lo, s34
8428; GFX10-NEXT:    v_writelane_b32 v40, s33, 5
8429; GFX10-NEXT:    s_mov_b32 s33, s32
8430; GFX10-NEXT:    s_addk_i32 s32, 0x200
8431; GFX10-NEXT:    s_getpc_b64 s[34:35]
8432; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v3f32_inreg@rel32@lo+4
8433; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v3f32_inreg@rel32@hi+12
8434; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
8435; GFX10-NEXT:    s_mov_b32 s4, 1.0
8436; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
8437; GFX10-NEXT:    s_mov_b32 s5, 2.0
8438; GFX10-NEXT:    v_writelane_b32 v40, s6, 2
8439; GFX10-NEXT:    s_mov_b32 s6, 4.0
8440; GFX10-NEXT:    v_writelane_b32 v40, s30, 3
8441; GFX10-NEXT:    v_writelane_b32 v40, s31, 4
8442; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
8443; GFX10-NEXT:    v_readlane_b32 s31, v40, 4
8444; GFX10-NEXT:    v_readlane_b32 s30, v40, 3
8445; GFX10-NEXT:    v_readlane_b32 s6, v40, 2
8446; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
8447; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
8448; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
8449; GFX10-NEXT:    v_readlane_b32 s33, v40, 5
8450; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
8451; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
8452; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
8453; GFX10-NEXT:    s_mov_b32 exec_lo, s34
8454; GFX10-NEXT:    s_waitcnt vmcnt(0)
8455; GFX10-NEXT:    s_setpc_b64 s[30:31]
8456;
8457; GFX11-LABEL: test_call_external_void_func_v3f32_imm_inreg:
8458; GFX11:       ; %bb.0:
8459; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8460; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
8461; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
8462; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
8463; GFX11-NEXT:    s_mov_b32 exec_lo, s0
8464; GFX11-NEXT:    v_writelane_b32 v40, s33, 5
8465; GFX11-NEXT:    s_mov_b32 s33, s32
8466; GFX11-NEXT:    s_add_i32 s32, s32, 16
8467; GFX11-NEXT:    s_getpc_b64 s[0:1]
8468; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v3f32_inreg@rel32@lo+4
8469; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v3f32_inreg@rel32@hi+12
8470; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
8471; GFX11-NEXT:    s_mov_b32 s4, 1.0
8472; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
8473; GFX11-NEXT:    s_mov_b32 s5, 2.0
8474; GFX11-NEXT:    v_writelane_b32 v40, s6, 2
8475; GFX11-NEXT:    s_mov_b32 s6, 4.0
8476; GFX11-NEXT:    v_writelane_b32 v40, s30, 3
8477; GFX11-NEXT:    v_writelane_b32 v40, s31, 4
8478; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
8479; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
8480; GFX11-NEXT:    v_readlane_b32 s31, v40, 4
8481; GFX11-NEXT:    v_readlane_b32 s30, v40, 3
8482; GFX11-NEXT:    v_readlane_b32 s6, v40, 2
8483; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
8484; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
8485; GFX11-NEXT:    s_add_i32 s32, s32, -16
8486; GFX11-NEXT:    v_readlane_b32 s33, v40, 5
8487; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
8488; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
8489; GFX11-NEXT:    s_mov_b32 exec_lo, s0
8490; GFX11-NEXT:    s_waitcnt vmcnt(0)
8491; GFX11-NEXT:    s_setpc_b64 s[30:31]
8492;
8493; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f32_imm_inreg:
8494; GFX10-SCRATCH:       ; %bb.0:
8495; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8496; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
8497; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
8498; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
8499; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
8500; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
8501; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 5
8502; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
8503; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
8504; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
8505; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v3f32_inreg@rel32@lo+4
8506; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v3f32_inreg@rel32@hi+12
8507; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
8508; GFX10-SCRATCH-NEXT:    s_mov_b32 s4, 1.0
8509; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
8510; GFX10-SCRATCH-NEXT:    s_mov_b32 s5, 2.0
8511; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s6, 2
8512; GFX10-SCRATCH-NEXT:    s_mov_b32 s6, 4.0
8513; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 3
8514; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 4
8515; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
8516; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 4
8517; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 3
8518; GFX10-SCRATCH-NEXT:    v_readlane_b32 s6, v40, 2
8519; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
8520; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
8521; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
8522; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 5
8523; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
8524; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
8525; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
8526; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
8527; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
8528; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
8529  call amdgpu_gfx void @external_void_func_v3f32_inreg(<3 x float> inreg <float 1.0, float 2.0, float 4.0>)
8530  ret void
8531}
8532
8533define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 {
8534; GFX9-LABEL: test_call_external_void_func_v5f32_imm_inreg:
8535; GFX9:       ; %bb.0:
8536; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8537; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
8538; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
8539; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
8540; GFX9-NEXT:    v_writelane_b32 v40, s33, 7
8541; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
8542; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
8543; GFX9-NEXT:    v_writelane_b32 v40, s6, 2
8544; GFX9-NEXT:    v_writelane_b32 v40, s7, 3
8545; GFX9-NEXT:    v_writelane_b32 v40, s8, 4
8546; GFX9-NEXT:    s_mov_b32 s33, s32
8547; GFX9-NEXT:    s_addk_i32 s32, 0x400
8548; GFX9-NEXT:    v_writelane_b32 v40, s30, 5
8549; GFX9-NEXT:    s_mov_b32 s4, 1.0
8550; GFX9-NEXT:    s_mov_b32 s5, 2.0
8551; GFX9-NEXT:    s_mov_b32 s6, 4.0
8552; GFX9-NEXT:    s_mov_b32 s7, -1.0
8553; GFX9-NEXT:    s_mov_b32 s8, 0.5
8554; GFX9-NEXT:    v_writelane_b32 v40, s31, 6
8555; GFX9-NEXT:    s_getpc_b64 s[34:35]
8556; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v5f32_inreg@rel32@lo+4
8557; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v5f32_inreg@rel32@hi+12
8558; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
8559; GFX9-NEXT:    v_readlane_b32 s31, v40, 6
8560; GFX9-NEXT:    v_readlane_b32 s30, v40, 5
8561; GFX9-NEXT:    v_readlane_b32 s8, v40, 4
8562; GFX9-NEXT:    v_readlane_b32 s7, v40, 3
8563; GFX9-NEXT:    v_readlane_b32 s6, v40, 2
8564; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
8565; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
8566; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
8567; GFX9-NEXT:    v_readlane_b32 s33, v40, 7
8568; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
8569; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
8570; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
8571; GFX9-NEXT:    s_waitcnt vmcnt(0)
8572; GFX9-NEXT:    s_setpc_b64 s[30:31]
8573;
8574; GFX10-LABEL: test_call_external_void_func_v5f32_imm_inreg:
8575; GFX10:       ; %bb.0:
8576; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8577; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
8578; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
8579; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
8580; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
8581; GFX10-NEXT:    s_mov_b32 exec_lo, s34
8582; GFX10-NEXT:    v_writelane_b32 v40, s33, 7
8583; GFX10-NEXT:    s_mov_b32 s33, s32
8584; GFX10-NEXT:    s_addk_i32 s32, 0x200
8585; GFX10-NEXT:    s_getpc_b64 s[34:35]
8586; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v5f32_inreg@rel32@lo+4
8587; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v5f32_inreg@rel32@hi+12
8588; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
8589; GFX10-NEXT:    s_mov_b32 s4, 1.0
8590; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
8591; GFX10-NEXT:    s_mov_b32 s5, 2.0
8592; GFX10-NEXT:    v_writelane_b32 v40, s6, 2
8593; GFX10-NEXT:    s_mov_b32 s6, 4.0
8594; GFX10-NEXT:    v_writelane_b32 v40, s7, 3
8595; GFX10-NEXT:    s_mov_b32 s7, -1.0
8596; GFX10-NEXT:    v_writelane_b32 v40, s8, 4
8597; GFX10-NEXT:    s_mov_b32 s8, 0.5
8598; GFX10-NEXT:    v_writelane_b32 v40, s30, 5
8599; GFX10-NEXT:    v_writelane_b32 v40, s31, 6
8600; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
8601; GFX10-NEXT:    v_readlane_b32 s31, v40, 6
8602; GFX10-NEXT:    v_readlane_b32 s30, v40, 5
8603; GFX10-NEXT:    v_readlane_b32 s8, v40, 4
8604; GFX10-NEXT:    v_readlane_b32 s7, v40, 3
8605; GFX10-NEXT:    v_readlane_b32 s6, v40, 2
8606; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
8607; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
8608; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
8609; GFX10-NEXT:    v_readlane_b32 s33, v40, 7
8610; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
8611; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
8612; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
8613; GFX10-NEXT:    s_mov_b32 exec_lo, s34
8614; GFX10-NEXT:    s_waitcnt vmcnt(0)
8615; GFX10-NEXT:    s_setpc_b64 s[30:31]
8616;
8617; GFX11-LABEL: test_call_external_void_func_v5f32_imm_inreg:
8618; GFX11:       ; %bb.0:
8619; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8620; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
8621; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
8622; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
8623; GFX11-NEXT:    s_mov_b32 exec_lo, s0
8624; GFX11-NEXT:    v_writelane_b32 v40, s33, 7
8625; GFX11-NEXT:    s_mov_b32 s33, s32
8626; GFX11-NEXT:    s_add_i32 s32, s32, 16
8627; GFX11-NEXT:    s_getpc_b64 s[0:1]
8628; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v5f32_inreg@rel32@lo+4
8629; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v5f32_inreg@rel32@hi+12
8630; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
8631; GFX11-NEXT:    s_mov_b32 s4, 1.0
8632; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
8633; GFX11-NEXT:    s_mov_b32 s5, 2.0
8634; GFX11-NEXT:    v_writelane_b32 v40, s6, 2
8635; GFX11-NEXT:    s_mov_b32 s6, 4.0
8636; GFX11-NEXT:    v_writelane_b32 v40, s7, 3
8637; GFX11-NEXT:    s_mov_b32 s7, -1.0
8638; GFX11-NEXT:    v_writelane_b32 v40, s8, 4
8639; GFX11-NEXT:    s_mov_b32 s8, 0.5
8640; GFX11-NEXT:    v_writelane_b32 v40, s30, 5
8641; GFX11-NEXT:    v_writelane_b32 v40, s31, 6
8642; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
8643; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
8644; GFX11-NEXT:    v_readlane_b32 s31, v40, 6
8645; GFX11-NEXT:    v_readlane_b32 s30, v40, 5
8646; GFX11-NEXT:    v_readlane_b32 s8, v40, 4
8647; GFX11-NEXT:    v_readlane_b32 s7, v40, 3
8648; GFX11-NEXT:    v_readlane_b32 s6, v40, 2
8649; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
8650; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
8651; GFX11-NEXT:    s_add_i32 s32, s32, -16
8652; GFX11-NEXT:    v_readlane_b32 s33, v40, 7
8653; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
8654; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
8655; GFX11-NEXT:    s_mov_b32 exec_lo, s0
8656; GFX11-NEXT:    s_waitcnt vmcnt(0)
8657; GFX11-NEXT:    s_setpc_b64 s[30:31]
8658;
8659; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5f32_imm_inreg:
8660; GFX10-SCRATCH:       ; %bb.0:
8661; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8662; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
8663; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
8664; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
8665; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
8666; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
8667; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 7
8668; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
8669; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
8670; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
8671; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v5f32_inreg@rel32@lo+4
8672; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v5f32_inreg@rel32@hi+12
8673; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
8674; GFX10-SCRATCH-NEXT:    s_mov_b32 s4, 1.0
8675; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
8676; GFX10-SCRATCH-NEXT:    s_mov_b32 s5, 2.0
8677; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s6, 2
8678; GFX10-SCRATCH-NEXT:    s_mov_b32 s6, 4.0
8679; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s7, 3
8680; GFX10-SCRATCH-NEXT:    s_mov_b32 s7, -1.0
8681; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s8, 4
8682; GFX10-SCRATCH-NEXT:    s_mov_b32 s8, 0.5
8683; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 5
8684; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 6
8685; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
8686; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 6
8687; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 5
8688; GFX10-SCRATCH-NEXT:    v_readlane_b32 s8, v40, 4
8689; GFX10-SCRATCH-NEXT:    v_readlane_b32 s7, v40, 3
8690; GFX10-SCRATCH-NEXT:    v_readlane_b32 s6, v40, 2
8691; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
8692; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
8693; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
8694; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 7
8695; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
8696; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
8697; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
8698; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
8699; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
8700; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
8701  call amdgpu_gfx void @external_void_func_v5f32_inreg(<5 x float> inreg <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>)
8702  ret void
8703}
8704
8705define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 {
8706; GFX9-LABEL: test_call_external_void_func_f64_imm_inreg:
8707; GFX9:       ; %bb.0:
8708; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8709; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
8710; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
8711; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
8712; GFX9-NEXT:    v_writelane_b32 v40, s33, 4
8713; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
8714; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
8715; GFX9-NEXT:    s_mov_b32 s33, s32
8716; GFX9-NEXT:    s_addk_i32 s32, 0x400
8717; GFX9-NEXT:    v_writelane_b32 v40, s30, 2
8718; GFX9-NEXT:    s_mov_b32 s4, 0
8719; GFX9-NEXT:    s_mov_b32 s5, 0x40100000
8720; GFX9-NEXT:    v_writelane_b32 v40, s31, 3
8721; GFX9-NEXT:    s_getpc_b64 s[34:35]
8722; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_f64_inreg@rel32@lo+4
8723; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_f64_inreg@rel32@hi+12
8724; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
8725; GFX9-NEXT:    v_readlane_b32 s31, v40, 3
8726; GFX9-NEXT:    v_readlane_b32 s30, v40, 2
8727; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
8728; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
8729; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
8730; GFX9-NEXT:    v_readlane_b32 s33, v40, 4
8731; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
8732; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
8733; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
8734; GFX9-NEXT:    s_waitcnt vmcnt(0)
8735; GFX9-NEXT:    s_setpc_b64 s[30:31]
8736;
8737; GFX10-LABEL: test_call_external_void_func_f64_imm_inreg:
8738; GFX10:       ; %bb.0:
8739; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8740; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
8741; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
8742; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
8743; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
8744; GFX10-NEXT:    s_mov_b32 exec_lo, s34
8745; GFX10-NEXT:    v_writelane_b32 v40, s33, 4
8746; GFX10-NEXT:    s_mov_b32 s33, s32
8747; GFX10-NEXT:    s_addk_i32 s32, 0x200
8748; GFX10-NEXT:    s_getpc_b64 s[34:35]
8749; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_f64_inreg@rel32@lo+4
8750; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_f64_inreg@rel32@hi+12
8751; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
8752; GFX10-NEXT:    s_mov_b32 s4, 0
8753; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
8754; GFX10-NEXT:    s_mov_b32 s5, 0x40100000
8755; GFX10-NEXT:    v_writelane_b32 v40, s30, 2
8756; GFX10-NEXT:    v_writelane_b32 v40, s31, 3
8757; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
8758; GFX10-NEXT:    v_readlane_b32 s31, v40, 3
8759; GFX10-NEXT:    v_readlane_b32 s30, v40, 2
8760; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
8761; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
8762; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
8763; GFX10-NEXT:    v_readlane_b32 s33, v40, 4
8764; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
8765; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
8766; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
8767; GFX10-NEXT:    s_mov_b32 exec_lo, s34
8768; GFX10-NEXT:    s_waitcnt vmcnt(0)
8769; GFX10-NEXT:    s_setpc_b64 s[30:31]
8770;
8771; GFX11-LABEL: test_call_external_void_func_f64_imm_inreg:
8772; GFX11:       ; %bb.0:
8773; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8774; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
8775; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
8776; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
8777; GFX11-NEXT:    s_mov_b32 exec_lo, s0
8778; GFX11-NEXT:    v_writelane_b32 v40, s33, 4
8779; GFX11-NEXT:    s_mov_b32 s33, s32
8780; GFX11-NEXT:    s_add_i32 s32, s32, 16
8781; GFX11-NEXT:    s_getpc_b64 s[0:1]
8782; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_f64_inreg@rel32@lo+4
8783; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_f64_inreg@rel32@hi+12
8784; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
8785; GFX11-NEXT:    s_mov_b32 s4, 0
8786; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
8787; GFX11-NEXT:    s_mov_b32 s5, 0x40100000
8788; GFX11-NEXT:    v_writelane_b32 v40, s30, 2
8789; GFX11-NEXT:    v_writelane_b32 v40, s31, 3
8790; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
8791; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
8792; GFX11-NEXT:    v_readlane_b32 s31, v40, 3
8793; GFX11-NEXT:    v_readlane_b32 s30, v40, 2
8794; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
8795; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
8796; GFX11-NEXT:    s_add_i32 s32, s32, -16
8797; GFX11-NEXT:    v_readlane_b32 s33, v40, 4
8798; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
8799; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
8800; GFX11-NEXT:    s_mov_b32 exec_lo, s0
8801; GFX11-NEXT:    s_waitcnt vmcnt(0)
8802; GFX11-NEXT:    s_setpc_b64 s[30:31]
8803;
8804; GFX10-SCRATCH-LABEL: test_call_external_void_func_f64_imm_inreg:
8805; GFX10-SCRATCH:       ; %bb.0:
8806; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8807; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
8808; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
8809; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
8810; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
8811; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
8812; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 4
8813; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
8814; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
8815; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
8816; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_f64_inreg@rel32@lo+4
8817; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_f64_inreg@rel32@hi+12
8818; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
8819; GFX10-SCRATCH-NEXT:    s_mov_b32 s4, 0
8820; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
8821; GFX10-SCRATCH-NEXT:    s_mov_b32 s5, 0x40100000
8822; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 2
8823; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 3
8824; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
8825; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 3
8826; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 2
8827; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
8828; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
8829; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
8830; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 4
8831; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
8832; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
8833; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
8834; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
8835; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
8836; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
8837  call amdgpu_gfx void @external_void_func_f64_inreg(double inreg 4.0)
8838  ret void
8839}
8840
8841define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 {
8842; GFX9-LABEL: test_call_external_void_func_v2f64_imm_inreg:
8843; GFX9:       ; %bb.0:
8844; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8845; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
8846; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
8847; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
8848; GFX9-NEXT:    v_writelane_b32 v40, s33, 6
8849; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
8850; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
8851; GFX9-NEXT:    v_writelane_b32 v40, s6, 2
8852; GFX9-NEXT:    v_writelane_b32 v40, s7, 3
8853; GFX9-NEXT:    s_mov_b32 s33, s32
8854; GFX9-NEXT:    s_addk_i32 s32, 0x400
8855; GFX9-NEXT:    v_writelane_b32 v40, s30, 4
8856; GFX9-NEXT:    s_mov_b32 s4, 0
8857; GFX9-NEXT:    s_mov_b32 s5, 2.0
8858; GFX9-NEXT:    s_mov_b32 s6, 0
8859; GFX9-NEXT:    s_mov_b32 s7, 0x40100000
8860; GFX9-NEXT:    v_writelane_b32 v40, s31, 5
8861; GFX9-NEXT:    s_getpc_b64 s[34:35]
8862; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v2f64_inreg@rel32@lo+4
8863; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v2f64_inreg@rel32@hi+12
8864; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
8865; GFX9-NEXT:    v_readlane_b32 s31, v40, 5
8866; GFX9-NEXT:    v_readlane_b32 s30, v40, 4
8867; GFX9-NEXT:    v_readlane_b32 s7, v40, 3
8868; GFX9-NEXT:    v_readlane_b32 s6, v40, 2
8869; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
8870; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
8871; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
8872; GFX9-NEXT:    v_readlane_b32 s33, v40, 6
8873; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
8874; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
8875; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
8876; GFX9-NEXT:    s_waitcnt vmcnt(0)
8877; GFX9-NEXT:    s_setpc_b64 s[30:31]
8878;
8879; GFX10-LABEL: test_call_external_void_func_v2f64_imm_inreg:
8880; GFX10:       ; %bb.0:
8881; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8882; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
8883; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
8884; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
8885; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
8886; GFX10-NEXT:    s_mov_b32 exec_lo, s34
8887; GFX10-NEXT:    v_writelane_b32 v40, s33, 6
8888; GFX10-NEXT:    s_mov_b32 s33, s32
8889; GFX10-NEXT:    s_addk_i32 s32, 0x200
8890; GFX10-NEXT:    s_getpc_b64 s[34:35]
8891; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v2f64_inreg@rel32@lo+4
8892; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v2f64_inreg@rel32@hi+12
8893; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
8894; GFX10-NEXT:    s_mov_b32 s4, 0
8895; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
8896; GFX10-NEXT:    s_mov_b32 s5, 2.0
8897; GFX10-NEXT:    v_writelane_b32 v40, s6, 2
8898; GFX10-NEXT:    s_mov_b32 s6, 0
8899; GFX10-NEXT:    v_writelane_b32 v40, s7, 3
8900; GFX10-NEXT:    s_mov_b32 s7, 0x40100000
8901; GFX10-NEXT:    v_writelane_b32 v40, s30, 4
8902; GFX10-NEXT:    v_writelane_b32 v40, s31, 5
8903; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
8904; GFX10-NEXT:    v_readlane_b32 s31, v40, 5
8905; GFX10-NEXT:    v_readlane_b32 s30, v40, 4
8906; GFX10-NEXT:    v_readlane_b32 s7, v40, 3
8907; GFX10-NEXT:    v_readlane_b32 s6, v40, 2
8908; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
8909; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
8910; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
8911; GFX10-NEXT:    v_readlane_b32 s33, v40, 6
8912; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
8913; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
8914; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
8915; GFX10-NEXT:    s_mov_b32 exec_lo, s34
8916; GFX10-NEXT:    s_waitcnt vmcnt(0)
8917; GFX10-NEXT:    s_setpc_b64 s[30:31]
8918;
8919; GFX11-LABEL: test_call_external_void_func_v2f64_imm_inreg:
8920; GFX11:       ; %bb.0:
8921; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8922; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
8923; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
8924; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
8925; GFX11-NEXT:    s_mov_b32 exec_lo, s0
8926; GFX11-NEXT:    v_writelane_b32 v40, s33, 6
8927; GFX11-NEXT:    s_mov_b32 s33, s32
8928; GFX11-NEXT:    s_add_i32 s32, s32, 16
8929; GFX11-NEXT:    s_getpc_b64 s[0:1]
8930; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v2f64_inreg@rel32@lo+4
8931; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v2f64_inreg@rel32@hi+12
8932; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
8933; GFX11-NEXT:    s_mov_b32 s4, 0
8934; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
8935; GFX11-NEXT:    s_mov_b32 s5, 2.0
8936; GFX11-NEXT:    v_writelane_b32 v40, s6, 2
8937; GFX11-NEXT:    s_mov_b32 s6, 0
8938; GFX11-NEXT:    v_writelane_b32 v40, s7, 3
8939; GFX11-NEXT:    s_mov_b32 s7, 0x40100000
8940; GFX11-NEXT:    v_writelane_b32 v40, s30, 4
8941; GFX11-NEXT:    v_writelane_b32 v40, s31, 5
8942; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
8943; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
8944; GFX11-NEXT:    v_readlane_b32 s31, v40, 5
8945; GFX11-NEXT:    v_readlane_b32 s30, v40, 4
8946; GFX11-NEXT:    v_readlane_b32 s7, v40, 3
8947; GFX11-NEXT:    v_readlane_b32 s6, v40, 2
8948; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
8949; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
8950; GFX11-NEXT:    s_add_i32 s32, s32, -16
8951; GFX11-NEXT:    v_readlane_b32 s33, v40, 6
8952; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
8953; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
8954; GFX11-NEXT:    s_mov_b32 exec_lo, s0
8955; GFX11-NEXT:    s_waitcnt vmcnt(0)
8956; GFX11-NEXT:    s_setpc_b64 s[30:31]
8957;
8958; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f64_imm_inreg:
8959; GFX10-SCRATCH:       ; %bb.0:
8960; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8961; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
8962; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
8963; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
8964; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
8965; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
8966; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 6
8967; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
8968; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
8969; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
8970; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v2f64_inreg@rel32@lo+4
8971; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v2f64_inreg@rel32@hi+12
8972; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
8973; GFX10-SCRATCH-NEXT:    s_mov_b32 s4, 0
8974; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
8975; GFX10-SCRATCH-NEXT:    s_mov_b32 s5, 2.0
8976; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s6, 2
8977; GFX10-SCRATCH-NEXT:    s_mov_b32 s6, 0
8978; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s7, 3
8979; GFX10-SCRATCH-NEXT:    s_mov_b32 s7, 0x40100000
8980; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 4
8981; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 5
8982; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
8983; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 5
8984; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 4
8985; GFX10-SCRATCH-NEXT:    v_readlane_b32 s7, v40, 3
8986; GFX10-SCRATCH-NEXT:    v_readlane_b32 s6, v40, 2
8987; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
8988; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
8989; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
8990; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 6
8991; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
8992; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
8993; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
8994; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
8995; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
8996; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
8997  call amdgpu_gfx void @external_void_func_v2f64_inreg(<2 x double> inreg <double 2.0, double 4.0>)
8998  ret void
8999}
9000
9001define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 {
9002; GFX9-LABEL: test_call_external_void_func_v3f64_imm_inreg:
9003; GFX9:       ; %bb.0:
9004; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9005; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
9006; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
9007; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
9008; GFX9-NEXT:    v_writelane_b32 v40, s33, 8
9009; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
9010; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
9011; GFX9-NEXT:    v_writelane_b32 v40, s6, 2
9012; GFX9-NEXT:    v_writelane_b32 v40, s7, 3
9013; GFX9-NEXT:    v_writelane_b32 v40, s8, 4
9014; GFX9-NEXT:    v_writelane_b32 v40, s9, 5
9015; GFX9-NEXT:    s_mov_b32 s33, s32
9016; GFX9-NEXT:    s_addk_i32 s32, 0x400
9017; GFX9-NEXT:    v_writelane_b32 v40, s30, 6
9018; GFX9-NEXT:    s_mov_b32 s4, 0
9019; GFX9-NEXT:    s_mov_b32 s5, 2.0
9020; GFX9-NEXT:    s_mov_b32 s6, 0
9021; GFX9-NEXT:    s_mov_b32 s7, 0x40100000
9022; GFX9-NEXT:    s_mov_b32 s8, 0
9023; GFX9-NEXT:    s_mov_b32 s9, 0x40200000
9024; GFX9-NEXT:    v_writelane_b32 v40, s31, 7
9025; GFX9-NEXT:    s_getpc_b64 s[34:35]
9026; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v3f64_inreg@rel32@lo+4
9027; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v3f64_inreg@rel32@hi+12
9028; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
9029; GFX9-NEXT:    v_readlane_b32 s31, v40, 7
9030; GFX9-NEXT:    v_readlane_b32 s30, v40, 6
9031; GFX9-NEXT:    v_readlane_b32 s9, v40, 5
9032; GFX9-NEXT:    v_readlane_b32 s8, v40, 4
9033; GFX9-NEXT:    v_readlane_b32 s7, v40, 3
9034; GFX9-NEXT:    v_readlane_b32 s6, v40, 2
9035; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
9036; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
9037; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
9038; GFX9-NEXT:    v_readlane_b32 s33, v40, 8
9039; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
9040; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
9041; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
9042; GFX9-NEXT:    s_waitcnt vmcnt(0)
9043; GFX9-NEXT:    s_setpc_b64 s[30:31]
9044;
9045; GFX10-LABEL: test_call_external_void_func_v3f64_imm_inreg:
9046; GFX10:       ; %bb.0:
9047; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9048; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
9049; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
9050; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
9051; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
9052; GFX10-NEXT:    s_mov_b32 exec_lo, s34
9053; GFX10-NEXT:    v_writelane_b32 v40, s33, 8
9054; GFX10-NEXT:    s_mov_b32 s33, s32
9055; GFX10-NEXT:    s_addk_i32 s32, 0x200
9056; GFX10-NEXT:    s_getpc_b64 s[34:35]
9057; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v3f64_inreg@rel32@lo+4
9058; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v3f64_inreg@rel32@hi+12
9059; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
9060; GFX10-NEXT:    s_mov_b32 s4, 0
9061; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
9062; GFX10-NEXT:    s_mov_b32 s5, 2.0
9063; GFX10-NEXT:    v_writelane_b32 v40, s6, 2
9064; GFX10-NEXT:    s_mov_b32 s6, 0
9065; GFX10-NEXT:    v_writelane_b32 v40, s7, 3
9066; GFX10-NEXT:    s_mov_b32 s7, 0x40100000
9067; GFX10-NEXT:    v_writelane_b32 v40, s8, 4
9068; GFX10-NEXT:    s_mov_b32 s8, 0
9069; GFX10-NEXT:    v_writelane_b32 v40, s9, 5
9070; GFX10-NEXT:    s_mov_b32 s9, 0x40200000
9071; GFX10-NEXT:    v_writelane_b32 v40, s30, 6
9072; GFX10-NEXT:    v_writelane_b32 v40, s31, 7
9073; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
9074; GFX10-NEXT:    v_readlane_b32 s31, v40, 7
9075; GFX10-NEXT:    v_readlane_b32 s30, v40, 6
9076; GFX10-NEXT:    v_readlane_b32 s9, v40, 5
9077; GFX10-NEXT:    v_readlane_b32 s8, v40, 4
9078; GFX10-NEXT:    v_readlane_b32 s7, v40, 3
9079; GFX10-NEXT:    v_readlane_b32 s6, v40, 2
9080; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
9081; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
9082; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
9083; GFX10-NEXT:    v_readlane_b32 s33, v40, 8
9084; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
9085; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
9086; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
9087; GFX10-NEXT:    s_mov_b32 exec_lo, s34
9088; GFX10-NEXT:    s_waitcnt vmcnt(0)
9089; GFX10-NEXT:    s_setpc_b64 s[30:31]
9090;
9091; GFX11-LABEL: test_call_external_void_func_v3f64_imm_inreg:
9092; GFX11:       ; %bb.0:
9093; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9094; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
9095; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
9096; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
9097; GFX11-NEXT:    s_mov_b32 exec_lo, s0
9098; GFX11-NEXT:    v_writelane_b32 v40, s33, 8
9099; GFX11-NEXT:    s_mov_b32 s33, s32
9100; GFX11-NEXT:    s_add_i32 s32, s32, 16
9101; GFX11-NEXT:    s_getpc_b64 s[0:1]
9102; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v3f64_inreg@rel32@lo+4
9103; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v3f64_inreg@rel32@hi+12
9104; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
9105; GFX11-NEXT:    s_mov_b32 s4, 0
9106; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
9107; GFX11-NEXT:    s_mov_b32 s5, 2.0
9108; GFX11-NEXT:    v_writelane_b32 v40, s6, 2
9109; GFX11-NEXT:    s_mov_b32 s6, 0
9110; GFX11-NEXT:    v_writelane_b32 v40, s7, 3
9111; GFX11-NEXT:    s_mov_b32 s7, 0x40100000
9112; GFX11-NEXT:    v_writelane_b32 v40, s8, 4
9113; GFX11-NEXT:    s_mov_b32 s8, 0
9114; GFX11-NEXT:    v_writelane_b32 v40, s9, 5
9115; GFX11-NEXT:    s_mov_b32 s9, 0x40200000
9116; GFX11-NEXT:    v_writelane_b32 v40, s30, 6
9117; GFX11-NEXT:    v_writelane_b32 v40, s31, 7
9118; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
9119; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
9120; GFX11-NEXT:    v_readlane_b32 s31, v40, 7
9121; GFX11-NEXT:    v_readlane_b32 s30, v40, 6
9122; GFX11-NEXT:    v_readlane_b32 s9, v40, 5
9123; GFX11-NEXT:    v_readlane_b32 s8, v40, 4
9124; GFX11-NEXT:    v_readlane_b32 s7, v40, 3
9125; GFX11-NEXT:    v_readlane_b32 s6, v40, 2
9126; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
9127; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
9128; GFX11-NEXT:    s_add_i32 s32, s32, -16
9129; GFX11-NEXT:    v_readlane_b32 s33, v40, 8
9130; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
9131; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
9132; GFX11-NEXT:    s_mov_b32 exec_lo, s0
9133; GFX11-NEXT:    s_waitcnt vmcnt(0)
9134; GFX11-NEXT:    s_setpc_b64 s[30:31]
9135;
9136; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f64_imm_inreg:
9137; GFX10-SCRATCH:       ; %bb.0:
9138; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9139; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
9140; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
9141; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
9142; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
9143; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
9144; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 8
9145; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
9146; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
9147; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
9148; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v3f64_inreg@rel32@lo+4
9149; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v3f64_inreg@rel32@hi+12
9150; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
9151; GFX10-SCRATCH-NEXT:    s_mov_b32 s4, 0
9152; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
9153; GFX10-SCRATCH-NEXT:    s_mov_b32 s5, 2.0
9154; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s6, 2
9155; GFX10-SCRATCH-NEXT:    s_mov_b32 s6, 0
9156; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s7, 3
9157; GFX10-SCRATCH-NEXT:    s_mov_b32 s7, 0x40100000
9158; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s8, 4
9159; GFX10-SCRATCH-NEXT:    s_mov_b32 s8, 0
9160; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s9, 5
9161; GFX10-SCRATCH-NEXT:    s_mov_b32 s9, 0x40200000
9162; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 6
9163; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 7
9164; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
9165; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 7
9166; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 6
9167; GFX10-SCRATCH-NEXT:    v_readlane_b32 s9, v40, 5
9168; GFX10-SCRATCH-NEXT:    v_readlane_b32 s8, v40, 4
9169; GFX10-SCRATCH-NEXT:    v_readlane_b32 s7, v40, 3
9170; GFX10-SCRATCH-NEXT:    v_readlane_b32 s6, v40, 2
9171; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
9172; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
9173; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
9174; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 8
9175; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
9176; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
9177; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
9178; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
9179; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
9180; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
9181  call amdgpu_gfx void @external_void_func_v3f64_inreg(<3 x double> inreg <double 2.0, double 4.0, double 8.0>)
9182  ret void
9183}
9184
9185define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 {
9186; GFX9-LABEL: test_call_external_void_func_v2i16_inreg:
9187; GFX9:       ; %bb.0:
9188; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9189; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
9190; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
9191; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
9192; GFX9-NEXT:    v_writelane_b32 v40, s33, 3
9193; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
9194; GFX9-NEXT:    s_load_dword s4, s[34:35], 0x0
9195; GFX9-NEXT:    s_mov_b32 s33, s32
9196; GFX9-NEXT:    s_addk_i32 s32, 0x400
9197; GFX9-NEXT:    v_writelane_b32 v40, s30, 1
9198; GFX9-NEXT:    v_writelane_b32 v40, s31, 2
9199; GFX9-NEXT:    s_getpc_b64 s[34:35]
9200; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v2i16_inreg@rel32@lo+4
9201; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v2i16_inreg@rel32@hi+12
9202; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
9203; GFX9-NEXT:    v_readlane_b32 s31, v40, 2
9204; GFX9-NEXT:    v_readlane_b32 s30, v40, 1
9205; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
9206; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
9207; GFX9-NEXT:    v_readlane_b32 s33, v40, 3
9208; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
9209; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
9210; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
9211; GFX9-NEXT:    s_waitcnt vmcnt(0)
9212; GFX9-NEXT:    s_setpc_b64 s[30:31]
9213;
9214; GFX10-LABEL: test_call_external_void_func_v2i16_inreg:
9215; GFX10:       ; %bb.0:
9216; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9217; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
9218; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
9219; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
9220; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
9221; GFX10-NEXT:    s_mov_b32 exec_lo, s34
9222; GFX10-NEXT:    v_writelane_b32 v40, s33, 3
9223; GFX10-NEXT:    s_mov_b32 s33, s32
9224; GFX10-NEXT:    s_addk_i32 s32, 0x200
9225; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
9226; GFX10-NEXT:    s_load_dword s4, s[34:35], 0x0
9227; GFX10-NEXT:    s_getpc_b64 s[34:35]
9228; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v2i16_inreg@rel32@lo+4
9229; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v2i16_inreg@rel32@hi+12
9230; GFX10-NEXT:    v_writelane_b32 v40, s30, 1
9231; GFX10-NEXT:    v_writelane_b32 v40, s31, 2
9232; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
9233; GFX10-NEXT:    v_readlane_b32 s31, v40, 2
9234; GFX10-NEXT:    v_readlane_b32 s30, v40, 1
9235; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
9236; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
9237; GFX10-NEXT:    v_readlane_b32 s33, v40, 3
9238; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
9239; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
9240; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
9241; GFX10-NEXT:    s_mov_b32 exec_lo, s34
9242; GFX10-NEXT:    s_waitcnt vmcnt(0)
9243; GFX10-NEXT:    s_setpc_b64 s[30:31]
9244;
9245; GFX11-LABEL: test_call_external_void_func_v2i16_inreg:
9246; GFX11:       ; %bb.0:
9247; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9248; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
9249; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
9250; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
9251; GFX11-NEXT:    s_mov_b32 exec_lo, s0
9252; GFX11-NEXT:    v_writelane_b32 v40, s33, 3
9253; GFX11-NEXT:    s_mov_b32 s33, s32
9254; GFX11-NEXT:    s_add_i32 s32, s32, 16
9255; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
9256; GFX11-NEXT:    s_load_b32 s4, s[0:1], 0x0
9257; GFX11-NEXT:    s_getpc_b64 s[0:1]
9258; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v2i16_inreg@rel32@lo+4
9259; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v2i16_inreg@rel32@hi+12
9260; GFX11-NEXT:    v_writelane_b32 v40, s30, 1
9261; GFX11-NEXT:    v_writelane_b32 v40, s31, 2
9262; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
9263; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
9264; GFX11-NEXT:    v_readlane_b32 s31, v40, 2
9265; GFX11-NEXT:    v_readlane_b32 s30, v40, 1
9266; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
9267; GFX11-NEXT:    s_add_i32 s32, s32, -16
9268; GFX11-NEXT:    v_readlane_b32 s33, v40, 3
9269; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
9270; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
9271; GFX11-NEXT:    s_mov_b32 exec_lo, s0
9272; GFX11-NEXT:    s_waitcnt vmcnt(0)
9273; GFX11-NEXT:    s_setpc_b64 s[30:31]
9274;
9275; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i16_inreg:
9276; GFX10-SCRATCH:       ; %bb.0:
9277; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9278; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
9279; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
9280; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
9281; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
9282; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
9283; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 3
9284; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
9285; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
9286; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
9287; GFX10-SCRATCH-NEXT:    s_load_dword s4, s[0:1], 0x0
9288; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
9289; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v2i16_inreg@rel32@lo+4
9290; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v2i16_inreg@rel32@hi+12
9291; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 1
9292; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 2
9293; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
9294; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 2
9295; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 1
9296; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
9297; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
9298; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 3
9299; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
9300; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
9301; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
9302; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
9303; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
9304; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
9305  %val = load <2 x i16>, <2 x i16> addrspace(4)* undef
9306  call amdgpu_gfx void @external_void_func_v2i16_inreg(<2 x i16> inreg %val)
9307  ret void
9308}
9309
9310define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 {
9311; GFX9-LABEL: test_call_external_void_func_v3i16_inreg:
9312; GFX9:       ; %bb.0:
9313; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9314; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
9315; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
9316; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
9317; GFX9-NEXT:    v_writelane_b32 v40, s33, 4
9318; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
9319; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
9320; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[34:35], 0x0
9321; GFX9-NEXT:    s_mov_b32 s33, s32
9322; GFX9-NEXT:    s_addk_i32 s32, 0x400
9323; GFX9-NEXT:    v_writelane_b32 v40, s30, 2
9324; GFX9-NEXT:    v_writelane_b32 v40, s31, 3
9325; GFX9-NEXT:    s_getpc_b64 s[34:35]
9326; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4
9327; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12
9328; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
9329; GFX9-NEXT:    v_readlane_b32 s31, v40, 3
9330; GFX9-NEXT:    v_readlane_b32 s30, v40, 2
9331; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
9332; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
9333; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
9334; GFX9-NEXT:    v_readlane_b32 s33, v40, 4
9335; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
9336; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
9337; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
9338; GFX9-NEXT:    s_waitcnt vmcnt(0)
9339; GFX9-NEXT:    s_setpc_b64 s[30:31]
9340;
9341; GFX10-LABEL: test_call_external_void_func_v3i16_inreg:
9342; GFX10:       ; %bb.0:
9343; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9344; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
9345; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
9346; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
9347; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
9348; GFX10-NEXT:    s_mov_b32 exec_lo, s34
9349; GFX10-NEXT:    v_writelane_b32 v40, s33, 4
9350; GFX10-NEXT:    s_mov_b32 s33, s32
9351; GFX10-NEXT:    s_addk_i32 s32, 0x200
9352; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
9353; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
9354; GFX10-NEXT:    s_load_dwordx2 s[4:5], s[34:35], 0x0
9355; GFX10-NEXT:    s_getpc_b64 s[34:35]
9356; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4
9357; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12
9358; GFX10-NEXT:    v_writelane_b32 v40, s30, 2
9359; GFX10-NEXT:    v_writelane_b32 v40, s31, 3
9360; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
9361; GFX10-NEXT:    v_readlane_b32 s31, v40, 3
9362; GFX10-NEXT:    v_readlane_b32 s30, v40, 2
9363; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
9364; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
9365; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
9366; GFX10-NEXT:    v_readlane_b32 s33, v40, 4
9367; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
9368; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
9369; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
9370; GFX10-NEXT:    s_mov_b32 exec_lo, s34
9371; GFX10-NEXT:    s_waitcnt vmcnt(0)
9372; GFX10-NEXT:    s_setpc_b64 s[30:31]
9373;
9374; GFX11-LABEL: test_call_external_void_func_v3i16_inreg:
9375; GFX11:       ; %bb.0:
9376; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9377; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
9378; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
9379; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
9380; GFX11-NEXT:    s_mov_b32 exec_lo, s0
9381; GFX11-NEXT:    v_writelane_b32 v40, s33, 4
9382; GFX11-NEXT:    s_mov_b32 s33, s32
9383; GFX11-NEXT:    s_add_i32 s32, s32, 16
9384; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
9385; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
9386; GFX11-NEXT:    s_load_b64 s[4:5], s[0:1], 0x0
9387; GFX11-NEXT:    s_getpc_b64 s[0:1]
9388; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v3i16_inreg@rel32@lo+4
9389; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v3i16_inreg@rel32@hi+12
9390; GFX11-NEXT:    v_writelane_b32 v40, s30, 2
9391; GFX11-NEXT:    v_writelane_b32 v40, s31, 3
9392; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
9393; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
9394; GFX11-NEXT:    v_readlane_b32 s31, v40, 3
9395; GFX11-NEXT:    v_readlane_b32 s30, v40, 2
9396; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
9397; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
9398; GFX11-NEXT:    s_add_i32 s32, s32, -16
9399; GFX11-NEXT:    v_readlane_b32 s33, v40, 4
9400; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
9401; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
9402; GFX11-NEXT:    s_mov_b32 exec_lo, s0
9403; GFX11-NEXT:    s_waitcnt vmcnt(0)
9404; GFX11-NEXT:    s_setpc_b64 s[30:31]
9405;
9406; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16_inreg:
9407; GFX10-SCRATCH:       ; %bb.0:
9408; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9409; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
9410; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
9411; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
9412; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
9413; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
9414; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 4
9415; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
9416; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
9417; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
9418; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
9419; GFX10-SCRATCH-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
9420; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
9421; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v3i16_inreg@rel32@lo+4
9422; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v3i16_inreg@rel32@hi+12
9423; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 2
9424; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 3
9425; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
9426; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 3
9427; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 2
9428; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
9429; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
9430; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
9431; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 4
9432; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
9433; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
9434; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
9435; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
9436; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
9437; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
9438  %val = load <3 x i16>, <3 x i16> addrspace(4)* undef
9439  call amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> inreg %val)
9440  ret void
9441}
9442
9443define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 {
9444; GFX9-LABEL: test_call_external_void_func_v3f16_inreg:
9445; GFX9:       ; %bb.0:
9446; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9447; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
9448; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
9449; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
9450; GFX9-NEXT:    v_writelane_b32 v40, s33, 4
9451; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
9452; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
9453; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[34:35], 0x0
9454; GFX9-NEXT:    s_mov_b32 s33, s32
9455; GFX9-NEXT:    s_addk_i32 s32, 0x400
9456; GFX9-NEXT:    v_writelane_b32 v40, s30, 2
9457; GFX9-NEXT:    v_writelane_b32 v40, s31, 3
9458; GFX9-NEXT:    s_getpc_b64 s[34:35]
9459; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4
9460; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12
9461; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
9462; GFX9-NEXT:    v_readlane_b32 s31, v40, 3
9463; GFX9-NEXT:    v_readlane_b32 s30, v40, 2
9464; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
9465; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
9466; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
9467; GFX9-NEXT:    v_readlane_b32 s33, v40, 4
9468; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
9469; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
9470; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
9471; GFX9-NEXT:    s_waitcnt vmcnt(0)
9472; GFX9-NEXT:    s_setpc_b64 s[30:31]
9473;
9474; GFX10-LABEL: test_call_external_void_func_v3f16_inreg:
9475; GFX10:       ; %bb.0:
9476; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9477; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
9478; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
9479; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
9480; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
9481; GFX10-NEXT:    s_mov_b32 exec_lo, s34
9482; GFX10-NEXT:    v_writelane_b32 v40, s33, 4
9483; GFX10-NEXT:    s_mov_b32 s33, s32
9484; GFX10-NEXT:    s_addk_i32 s32, 0x200
9485; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
9486; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
9487; GFX10-NEXT:    s_load_dwordx2 s[4:5], s[34:35], 0x0
9488; GFX10-NEXT:    s_getpc_b64 s[34:35]
9489; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4
9490; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12
9491; GFX10-NEXT:    v_writelane_b32 v40, s30, 2
9492; GFX10-NEXT:    v_writelane_b32 v40, s31, 3
9493; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
9494; GFX10-NEXT:    v_readlane_b32 s31, v40, 3
9495; GFX10-NEXT:    v_readlane_b32 s30, v40, 2
9496; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
9497; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
9498; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
9499; GFX10-NEXT:    v_readlane_b32 s33, v40, 4
9500; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
9501; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
9502; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
9503; GFX10-NEXT:    s_mov_b32 exec_lo, s34
9504; GFX10-NEXT:    s_waitcnt vmcnt(0)
9505; GFX10-NEXT:    s_setpc_b64 s[30:31]
9506;
9507; GFX11-LABEL: test_call_external_void_func_v3f16_inreg:
9508; GFX11:       ; %bb.0:
9509; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9510; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
9511; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
9512; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
9513; GFX11-NEXT:    s_mov_b32 exec_lo, s0
9514; GFX11-NEXT:    v_writelane_b32 v40, s33, 4
9515; GFX11-NEXT:    s_mov_b32 s33, s32
9516; GFX11-NEXT:    s_add_i32 s32, s32, 16
9517; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
9518; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
9519; GFX11-NEXT:    s_load_b64 s[4:5], s[0:1], 0x0
9520; GFX11-NEXT:    s_getpc_b64 s[0:1]
9521; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v3f16_inreg@rel32@lo+4
9522; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v3f16_inreg@rel32@hi+12
9523; GFX11-NEXT:    v_writelane_b32 v40, s30, 2
9524; GFX11-NEXT:    v_writelane_b32 v40, s31, 3
9525; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
9526; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
9527; GFX11-NEXT:    v_readlane_b32 s31, v40, 3
9528; GFX11-NEXT:    v_readlane_b32 s30, v40, 2
9529; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
9530; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
9531; GFX11-NEXT:    s_add_i32 s32, s32, -16
9532; GFX11-NEXT:    v_readlane_b32 s33, v40, 4
9533; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
9534; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
9535; GFX11-NEXT:    s_mov_b32 exec_lo, s0
9536; GFX11-NEXT:    s_waitcnt vmcnt(0)
9537; GFX11-NEXT:    s_setpc_b64 s[30:31]
9538;
9539; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16_inreg:
9540; GFX10-SCRATCH:       ; %bb.0:
9541; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9542; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
9543; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
9544; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
9545; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
9546; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
9547; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 4
9548; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
9549; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
9550; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
9551; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
9552; GFX10-SCRATCH-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
9553; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
9554; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v3f16_inreg@rel32@lo+4
9555; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v3f16_inreg@rel32@hi+12
9556; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 2
9557; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 3
9558; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
9559; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 3
9560; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 2
9561; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
9562; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
9563; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
9564; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 4
9565; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
9566; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
9567; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
9568; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
9569; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
9570; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
9571  %val = load <3 x half>, <3 x half> addrspace(4)* undef
9572  call amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> inreg %val)
9573  ret void
9574}
9575
9576define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 {
9577; GFX9-LABEL: test_call_external_void_func_v3i16_imm_inreg:
9578; GFX9:       ; %bb.0:
9579; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9580; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
9581; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
9582; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
9583; GFX9-NEXT:    v_writelane_b32 v40, s33, 4
9584; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
9585; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
9586; GFX9-NEXT:    s_mov_b32 s33, s32
9587; GFX9-NEXT:    s_addk_i32 s32, 0x400
9588; GFX9-NEXT:    v_writelane_b32 v40, s30, 2
9589; GFX9-NEXT:    s_mov_b32 s4, 0x20001
9590; GFX9-NEXT:    s_mov_b32 s5, 3
9591; GFX9-NEXT:    v_writelane_b32 v40, s31, 3
9592; GFX9-NEXT:    s_getpc_b64 s[34:35]
9593; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4
9594; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12
9595; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
9596; GFX9-NEXT:    v_readlane_b32 s31, v40, 3
9597; GFX9-NEXT:    v_readlane_b32 s30, v40, 2
9598; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
9599; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
9600; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
9601; GFX9-NEXT:    v_readlane_b32 s33, v40, 4
9602; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
9603; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
9604; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
9605; GFX9-NEXT:    s_waitcnt vmcnt(0)
9606; GFX9-NEXT:    s_setpc_b64 s[30:31]
9607;
9608; GFX10-LABEL: test_call_external_void_func_v3i16_imm_inreg:
9609; GFX10:       ; %bb.0:
9610; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9611; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
9612; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
9613; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
9614; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
9615; GFX10-NEXT:    s_mov_b32 exec_lo, s34
9616; GFX10-NEXT:    v_writelane_b32 v40, s33, 4
9617; GFX10-NEXT:    s_mov_b32 s33, s32
9618; GFX10-NEXT:    s_addk_i32 s32, 0x200
9619; GFX10-NEXT:    s_getpc_b64 s[34:35]
9620; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4
9621; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12
9622; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
9623; GFX10-NEXT:    s_mov_b32 s4, 0x20001
9624; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
9625; GFX10-NEXT:    s_mov_b32 s5, 3
9626; GFX10-NEXT:    v_writelane_b32 v40, s30, 2
9627; GFX10-NEXT:    v_writelane_b32 v40, s31, 3
9628; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
9629; GFX10-NEXT:    v_readlane_b32 s31, v40, 3
9630; GFX10-NEXT:    v_readlane_b32 s30, v40, 2
9631; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
9632; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
9633; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
9634; GFX10-NEXT:    v_readlane_b32 s33, v40, 4
9635; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
9636; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
9637; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
9638; GFX10-NEXT:    s_mov_b32 exec_lo, s34
9639; GFX10-NEXT:    s_waitcnt vmcnt(0)
9640; GFX10-NEXT:    s_setpc_b64 s[30:31]
9641;
9642; GFX11-LABEL: test_call_external_void_func_v3i16_imm_inreg:
9643; GFX11:       ; %bb.0:
9644; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9645; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
9646; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
9647; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
9648; GFX11-NEXT:    s_mov_b32 exec_lo, s0
9649; GFX11-NEXT:    v_writelane_b32 v40, s33, 4
9650; GFX11-NEXT:    s_mov_b32 s33, s32
9651; GFX11-NEXT:    s_add_i32 s32, s32, 16
9652; GFX11-NEXT:    s_getpc_b64 s[0:1]
9653; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v3i16_inreg@rel32@lo+4
9654; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v3i16_inreg@rel32@hi+12
9655; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
9656; GFX11-NEXT:    s_mov_b32 s4, 0x20001
9657; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
9658; GFX11-NEXT:    s_mov_b32 s5, 3
9659; GFX11-NEXT:    v_writelane_b32 v40, s30, 2
9660; GFX11-NEXT:    v_writelane_b32 v40, s31, 3
9661; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
9662; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
9663; GFX11-NEXT:    v_readlane_b32 s31, v40, 3
9664; GFX11-NEXT:    v_readlane_b32 s30, v40, 2
9665; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
9666; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
9667; GFX11-NEXT:    s_add_i32 s32, s32, -16
9668; GFX11-NEXT:    v_readlane_b32 s33, v40, 4
9669; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
9670; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
9671; GFX11-NEXT:    s_mov_b32 exec_lo, s0
9672; GFX11-NEXT:    s_waitcnt vmcnt(0)
9673; GFX11-NEXT:    s_setpc_b64 s[30:31]
9674;
9675; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16_imm_inreg:
9676; GFX10-SCRATCH:       ; %bb.0:
9677; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9678; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
9679; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
9680; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
9681; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
9682; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
9683; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 4
9684; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
9685; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
9686; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
9687; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v3i16_inreg@rel32@lo+4
9688; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v3i16_inreg@rel32@hi+12
9689; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
9690; GFX10-SCRATCH-NEXT:    s_mov_b32 s4, 0x20001
9691; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
9692; GFX10-SCRATCH-NEXT:    s_mov_b32 s5, 3
9693; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 2
9694; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 3
9695; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
9696; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 3
9697; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 2
9698; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
9699; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
9700; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
9701; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 4
9702; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
9703; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
9704; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
9705; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
9706; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
9707; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
9708  call amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> inreg <i16 1, i16 2, i16 3>)
9709  ret void
9710}
9711
9712define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 {
9713; GFX9-LABEL: test_call_external_void_func_v3f16_imm_inreg:
9714; GFX9:       ; %bb.0:
9715; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9716; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
9717; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
9718; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
9719; GFX9-NEXT:    v_writelane_b32 v40, s33, 4
9720; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
9721; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
9722; GFX9-NEXT:    s_mov_b32 s33, s32
9723; GFX9-NEXT:    s_addk_i32 s32, 0x400
9724; GFX9-NEXT:    v_writelane_b32 v40, s30, 2
9725; GFX9-NEXT:    s_mov_b32 s4, 0x40003c00
9726; GFX9-NEXT:    s_movk_i32 s5, 0x4400
9727; GFX9-NEXT:    v_writelane_b32 v40, s31, 3
9728; GFX9-NEXT:    s_getpc_b64 s[34:35]
9729; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4
9730; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12
9731; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
9732; GFX9-NEXT:    v_readlane_b32 s31, v40, 3
9733; GFX9-NEXT:    v_readlane_b32 s30, v40, 2
9734; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
9735; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
9736; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
9737; GFX9-NEXT:    v_readlane_b32 s33, v40, 4
9738; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
9739; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
9740; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
9741; GFX9-NEXT:    s_waitcnt vmcnt(0)
9742; GFX9-NEXT:    s_setpc_b64 s[30:31]
9743;
9744; GFX10-LABEL: test_call_external_void_func_v3f16_imm_inreg:
9745; GFX10:       ; %bb.0:
9746; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9747; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
9748; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
9749; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
9750; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
9751; GFX10-NEXT:    s_mov_b32 exec_lo, s34
9752; GFX10-NEXT:    v_writelane_b32 v40, s33, 4
9753; GFX10-NEXT:    s_mov_b32 s33, s32
9754; GFX10-NEXT:    s_addk_i32 s32, 0x200
9755; GFX10-NEXT:    s_getpc_b64 s[34:35]
9756; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4
9757; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12
9758; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
9759; GFX10-NEXT:    s_mov_b32 s4, 0x40003c00
9760; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
9761; GFX10-NEXT:    s_movk_i32 s5, 0x4400
9762; GFX10-NEXT:    v_writelane_b32 v40, s30, 2
9763; GFX10-NEXT:    v_writelane_b32 v40, s31, 3
9764; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
9765; GFX10-NEXT:    v_readlane_b32 s31, v40, 3
9766; GFX10-NEXT:    v_readlane_b32 s30, v40, 2
9767; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
9768; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
9769; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
9770; GFX10-NEXT:    v_readlane_b32 s33, v40, 4
9771; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
9772; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
9773; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
9774; GFX10-NEXT:    s_mov_b32 exec_lo, s34
9775; GFX10-NEXT:    s_waitcnt vmcnt(0)
9776; GFX10-NEXT:    s_setpc_b64 s[30:31]
9777;
9778; GFX11-LABEL: test_call_external_void_func_v3f16_imm_inreg:
9779; GFX11:       ; %bb.0:
9780; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9781; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
9782; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
9783; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
9784; GFX11-NEXT:    s_mov_b32 exec_lo, s0
9785; GFX11-NEXT:    v_writelane_b32 v40, s33, 4
9786; GFX11-NEXT:    s_mov_b32 s33, s32
9787; GFX11-NEXT:    s_add_i32 s32, s32, 16
9788; GFX11-NEXT:    s_getpc_b64 s[0:1]
9789; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v3f16_inreg@rel32@lo+4
9790; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v3f16_inreg@rel32@hi+12
9791; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
9792; GFX11-NEXT:    s_mov_b32 s4, 0x40003c00
9793; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
9794; GFX11-NEXT:    s_movk_i32 s5, 0x4400
9795; GFX11-NEXT:    v_writelane_b32 v40, s30, 2
9796; GFX11-NEXT:    v_writelane_b32 v40, s31, 3
9797; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
9798; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
9799; GFX11-NEXT:    v_readlane_b32 s31, v40, 3
9800; GFX11-NEXT:    v_readlane_b32 s30, v40, 2
9801; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
9802; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
9803; GFX11-NEXT:    s_add_i32 s32, s32, -16
9804; GFX11-NEXT:    v_readlane_b32 s33, v40, 4
9805; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
9806; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
9807; GFX11-NEXT:    s_mov_b32 exec_lo, s0
9808; GFX11-NEXT:    s_waitcnt vmcnt(0)
9809; GFX11-NEXT:    s_setpc_b64 s[30:31]
9810;
9811; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16_imm_inreg:
9812; GFX10-SCRATCH:       ; %bb.0:
9813; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9814; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
9815; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
9816; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
9817; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
9818; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
9819; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 4
9820; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
9821; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
9822; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
9823; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v3f16_inreg@rel32@lo+4
9824; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v3f16_inreg@rel32@hi+12
9825; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
9826; GFX10-SCRATCH-NEXT:    s_mov_b32 s4, 0x40003c00
9827; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
9828; GFX10-SCRATCH-NEXT:    s_movk_i32 s5, 0x4400
9829; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 2
9830; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 3
9831; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
9832; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 3
9833; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 2
9834; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
9835; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
9836; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
9837; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 4
9838; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
9839; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
9840; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
9841; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
9842; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
9843; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
9844  call amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> inreg <half 1.0, half 2.0, half 4.0>)
9845  ret void
9846}
9847
9848define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 {
9849; GFX9-LABEL: test_call_external_void_func_v4i16_inreg:
9850; GFX9:       ; %bb.0:
9851; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9852; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
9853; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
9854; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
9855; GFX9-NEXT:    v_writelane_b32 v40, s33, 4
9856; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
9857; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
9858; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[34:35], 0x0
9859; GFX9-NEXT:    s_mov_b32 s33, s32
9860; GFX9-NEXT:    s_addk_i32 s32, 0x400
9861; GFX9-NEXT:    v_writelane_b32 v40, s30, 2
9862; GFX9-NEXT:    v_writelane_b32 v40, s31, 3
9863; GFX9-NEXT:    s_getpc_b64 s[34:35]
9864; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4
9865; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12
9866; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
9867; GFX9-NEXT:    v_readlane_b32 s31, v40, 3
9868; GFX9-NEXT:    v_readlane_b32 s30, v40, 2
9869; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
9870; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
9871; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
9872; GFX9-NEXT:    v_readlane_b32 s33, v40, 4
9873; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
9874; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
9875; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
9876; GFX9-NEXT:    s_waitcnt vmcnt(0)
9877; GFX9-NEXT:    s_setpc_b64 s[30:31]
9878;
9879; GFX10-LABEL: test_call_external_void_func_v4i16_inreg:
9880; GFX10:       ; %bb.0:
9881; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9882; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
9883; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
9884; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
9885; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
9886; GFX10-NEXT:    s_mov_b32 exec_lo, s34
9887; GFX10-NEXT:    v_writelane_b32 v40, s33, 4
9888; GFX10-NEXT:    s_mov_b32 s33, s32
9889; GFX10-NEXT:    s_addk_i32 s32, 0x200
9890; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
9891; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
9892; GFX10-NEXT:    s_load_dwordx2 s[4:5], s[34:35], 0x0
9893; GFX10-NEXT:    s_getpc_b64 s[34:35]
9894; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4
9895; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12
9896; GFX10-NEXT:    v_writelane_b32 v40, s30, 2
9897; GFX10-NEXT:    v_writelane_b32 v40, s31, 3
9898; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
9899; GFX10-NEXT:    v_readlane_b32 s31, v40, 3
9900; GFX10-NEXT:    v_readlane_b32 s30, v40, 2
9901; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
9902; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
9903; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
9904; GFX10-NEXT:    v_readlane_b32 s33, v40, 4
9905; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
9906; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
9907; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
9908; GFX10-NEXT:    s_mov_b32 exec_lo, s34
9909; GFX10-NEXT:    s_waitcnt vmcnt(0)
9910; GFX10-NEXT:    s_setpc_b64 s[30:31]
9911;
9912; GFX11-LABEL: test_call_external_void_func_v4i16_inreg:
9913; GFX11:       ; %bb.0:
9914; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9915; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
9916; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
9917; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
9918; GFX11-NEXT:    s_mov_b32 exec_lo, s0
9919; GFX11-NEXT:    v_writelane_b32 v40, s33, 4
9920; GFX11-NEXT:    s_mov_b32 s33, s32
9921; GFX11-NEXT:    s_add_i32 s32, s32, 16
9922; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
9923; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
9924; GFX11-NEXT:    s_load_b64 s[4:5], s[0:1], 0x0
9925; GFX11-NEXT:    s_getpc_b64 s[0:1]
9926; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v4i16_inreg@rel32@lo+4
9927; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v4i16_inreg@rel32@hi+12
9928; GFX11-NEXT:    v_writelane_b32 v40, s30, 2
9929; GFX11-NEXT:    v_writelane_b32 v40, s31, 3
9930; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
9931; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
9932; GFX11-NEXT:    v_readlane_b32 s31, v40, 3
9933; GFX11-NEXT:    v_readlane_b32 s30, v40, 2
9934; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
9935; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
9936; GFX11-NEXT:    s_add_i32 s32, s32, -16
9937; GFX11-NEXT:    v_readlane_b32 s33, v40, 4
9938; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
9939; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
9940; GFX11-NEXT:    s_mov_b32 exec_lo, s0
9941; GFX11-NEXT:    s_waitcnt vmcnt(0)
9942; GFX11-NEXT:    s_setpc_b64 s[30:31]
9943;
9944; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16_inreg:
9945; GFX10-SCRATCH:       ; %bb.0:
9946; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9947; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
9948; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
9949; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
9950; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
9951; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
9952; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 4
9953; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
9954; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
9955; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
9956; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
9957; GFX10-SCRATCH-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
9958; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
9959; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v4i16_inreg@rel32@lo+4
9960; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v4i16_inreg@rel32@hi+12
9961; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 2
9962; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 3
9963; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
9964; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 3
9965; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 2
9966; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
9967; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
9968; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
9969; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 4
9970; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
9971; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
9972; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
9973; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
9974; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
9975; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
9976  %val = load <4 x i16>, <4 x i16> addrspace(4)* undef
9977  call amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> inreg %val)
9978  ret void
9979}
9980
9981define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 {
9982; GFX9-LABEL: test_call_external_void_func_v4i16_imm_inreg:
9983; GFX9:       ; %bb.0:
9984; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9985; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
9986; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
9987; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
9988; GFX9-NEXT:    v_writelane_b32 v40, s33, 4
9989; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
9990; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
9991; GFX9-NEXT:    s_mov_b32 s33, s32
9992; GFX9-NEXT:    s_addk_i32 s32, 0x400
9993; GFX9-NEXT:    v_writelane_b32 v40, s30, 2
9994; GFX9-NEXT:    s_mov_b32 s4, 0x20001
9995; GFX9-NEXT:    s_mov_b32 s5, 0x40003
9996; GFX9-NEXT:    v_writelane_b32 v40, s31, 3
9997; GFX9-NEXT:    s_getpc_b64 s[34:35]
9998; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4
9999; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12
10000; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
10001; GFX9-NEXT:    v_readlane_b32 s31, v40, 3
10002; GFX9-NEXT:    v_readlane_b32 s30, v40, 2
10003; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
10004; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
10005; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
10006; GFX9-NEXT:    v_readlane_b32 s33, v40, 4
10007; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
10008; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
10009; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
10010; GFX9-NEXT:    s_waitcnt vmcnt(0)
10011; GFX9-NEXT:    s_setpc_b64 s[30:31]
10012;
10013; GFX10-LABEL: test_call_external_void_func_v4i16_imm_inreg:
10014; GFX10:       ; %bb.0:
10015; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10016; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
10017; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
10018; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
10019; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
10020; GFX10-NEXT:    s_mov_b32 exec_lo, s34
10021; GFX10-NEXT:    v_writelane_b32 v40, s33, 4
10022; GFX10-NEXT:    s_mov_b32 s33, s32
10023; GFX10-NEXT:    s_addk_i32 s32, 0x200
10024; GFX10-NEXT:    s_getpc_b64 s[34:35]
10025; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4
10026; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12
10027; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
10028; GFX10-NEXT:    s_mov_b32 s4, 0x20001
10029; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
10030; GFX10-NEXT:    s_mov_b32 s5, 0x40003
10031; GFX10-NEXT:    v_writelane_b32 v40, s30, 2
10032; GFX10-NEXT:    v_writelane_b32 v40, s31, 3
10033; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
10034; GFX10-NEXT:    v_readlane_b32 s31, v40, 3
10035; GFX10-NEXT:    v_readlane_b32 s30, v40, 2
10036; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
10037; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
10038; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
10039; GFX10-NEXT:    v_readlane_b32 s33, v40, 4
10040; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
10041; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
10042; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
10043; GFX10-NEXT:    s_mov_b32 exec_lo, s34
10044; GFX10-NEXT:    s_waitcnt vmcnt(0)
10045; GFX10-NEXT:    s_setpc_b64 s[30:31]
10046;
10047; GFX11-LABEL: test_call_external_void_func_v4i16_imm_inreg:
10048; GFX11:       ; %bb.0:
10049; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10050; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
10051; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
10052; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
10053; GFX11-NEXT:    s_mov_b32 exec_lo, s0
10054; GFX11-NEXT:    v_writelane_b32 v40, s33, 4
10055; GFX11-NEXT:    s_mov_b32 s33, s32
10056; GFX11-NEXT:    s_add_i32 s32, s32, 16
10057; GFX11-NEXT:    s_getpc_b64 s[0:1]
10058; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v4i16_inreg@rel32@lo+4
10059; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v4i16_inreg@rel32@hi+12
10060; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
10061; GFX11-NEXT:    s_mov_b32 s4, 0x20001
10062; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
10063; GFX11-NEXT:    s_mov_b32 s5, 0x40003
10064; GFX11-NEXT:    v_writelane_b32 v40, s30, 2
10065; GFX11-NEXT:    v_writelane_b32 v40, s31, 3
10066; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
10067; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
10068; GFX11-NEXT:    v_readlane_b32 s31, v40, 3
10069; GFX11-NEXT:    v_readlane_b32 s30, v40, 2
10070; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
10071; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
10072; GFX11-NEXT:    s_add_i32 s32, s32, -16
10073; GFX11-NEXT:    v_readlane_b32 s33, v40, 4
10074; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
10075; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
10076; GFX11-NEXT:    s_mov_b32 exec_lo, s0
10077; GFX11-NEXT:    s_waitcnt vmcnt(0)
10078; GFX11-NEXT:    s_setpc_b64 s[30:31]
10079;
10080; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16_imm_inreg:
10081; GFX10-SCRATCH:       ; %bb.0:
10082; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10083; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
10084; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
10085; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
10086; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
10087; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
10088; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 4
10089; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
10090; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
10091; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
10092; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v4i16_inreg@rel32@lo+4
10093; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v4i16_inreg@rel32@hi+12
10094; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
10095; GFX10-SCRATCH-NEXT:    s_mov_b32 s4, 0x20001
10096; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
10097; GFX10-SCRATCH-NEXT:    s_mov_b32 s5, 0x40003
10098; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 2
10099; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 3
10100; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
10101; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 3
10102; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 2
10103; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
10104; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
10105; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
10106; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 4
10107; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
10108; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
10109; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
10110; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
10111; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
10112; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
10113  call amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> inreg <i16 1, i16 2, i16 3, i16 4>)
10114  ret void
10115}
10116
10117define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 {
10118; GFX9-LABEL: test_call_external_void_func_v2f16_inreg:
10119; GFX9:       ; %bb.0:
10120; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10121; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
10122; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
10123; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
10124; GFX9-NEXT:    v_writelane_b32 v40, s33, 3
10125; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
10126; GFX9-NEXT:    s_load_dword s4, s[34:35], 0x0
10127; GFX9-NEXT:    s_mov_b32 s33, s32
10128; GFX9-NEXT:    s_addk_i32 s32, 0x400
10129; GFX9-NEXT:    v_writelane_b32 v40, s30, 1
10130; GFX9-NEXT:    v_writelane_b32 v40, s31, 2
10131; GFX9-NEXT:    s_getpc_b64 s[34:35]
10132; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v2f16_inreg@rel32@lo+4
10133; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v2f16_inreg@rel32@hi+12
10134; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
10135; GFX9-NEXT:    v_readlane_b32 s31, v40, 2
10136; GFX9-NEXT:    v_readlane_b32 s30, v40, 1
10137; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
10138; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
10139; GFX9-NEXT:    v_readlane_b32 s33, v40, 3
10140; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
10141; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
10142; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
10143; GFX9-NEXT:    s_waitcnt vmcnt(0)
10144; GFX9-NEXT:    s_setpc_b64 s[30:31]
10145;
10146; GFX10-LABEL: test_call_external_void_func_v2f16_inreg:
10147; GFX10:       ; %bb.0:
10148; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10149; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
10150; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
10151; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
10152; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
10153; GFX10-NEXT:    s_mov_b32 exec_lo, s34
10154; GFX10-NEXT:    v_writelane_b32 v40, s33, 3
10155; GFX10-NEXT:    s_mov_b32 s33, s32
10156; GFX10-NEXT:    s_addk_i32 s32, 0x200
10157; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
10158; GFX10-NEXT:    s_load_dword s4, s[34:35], 0x0
10159; GFX10-NEXT:    s_getpc_b64 s[34:35]
10160; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v2f16_inreg@rel32@lo+4
10161; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v2f16_inreg@rel32@hi+12
10162; GFX10-NEXT:    v_writelane_b32 v40, s30, 1
10163; GFX10-NEXT:    v_writelane_b32 v40, s31, 2
10164; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
10165; GFX10-NEXT:    v_readlane_b32 s31, v40, 2
10166; GFX10-NEXT:    v_readlane_b32 s30, v40, 1
10167; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
10168; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
10169; GFX10-NEXT:    v_readlane_b32 s33, v40, 3
10170; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
10171; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
10172; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
10173; GFX10-NEXT:    s_mov_b32 exec_lo, s34
10174; GFX10-NEXT:    s_waitcnt vmcnt(0)
10175; GFX10-NEXT:    s_setpc_b64 s[30:31]
10176;
10177; GFX11-LABEL: test_call_external_void_func_v2f16_inreg:
10178; GFX11:       ; %bb.0:
10179; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10180; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
10181; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
10182; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
10183; GFX11-NEXT:    s_mov_b32 exec_lo, s0
10184; GFX11-NEXT:    v_writelane_b32 v40, s33, 3
10185; GFX11-NEXT:    s_mov_b32 s33, s32
10186; GFX11-NEXT:    s_add_i32 s32, s32, 16
10187; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
10188; GFX11-NEXT:    s_load_b32 s4, s[0:1], 0x0
10189; GFX11-NEXT:    s_getpc_b64 s[0:1]
10190; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v2f16_inreg@rel32@lo+4
10191; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v2f16_inreg@rel32@hi+12
10192; GFX11-NEXT:    v_writelane_b32 v40, s30, 1
10193; GFX11-NEXT:    v_writelane_b32 v40, s31, 2
10194; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
10195; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
10196; GFX11-NEXT:    v_readlane_b32 s31, v40, 2
10197; GFX11-NEXT:    v_readlane_b32 s30, v40, 1
10198; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
10199; GFX11-NEXT:    s_add_i32 s32, s32, -16
10200; GFX11-NEXT:    v_readlane_b32 s33, v40, 3
10201; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
10202; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
10203; GFX11-NEXT:    s_mov_b32 exec_lo, s0
10204; GFX11-NEXT:    s_waitcnt vmcnt(0)
10205; GFX11-NEXT:    s_setpc_b64 s[30:31]
10206;
10207; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f16_inreg:
10208; GFX10-SCRATCH:       ; %bb.0:
10209; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10210; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
10211; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
10212; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
10213; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
10214; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
10215; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 3
10216; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
10217; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
10218; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
10219; GFX10-SCRATCH-NEXT:    s_load_dword s4, s[0:1], 0x0
10220; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
10221; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v2f16_inreg@rel32@lo+4
10222; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v2f16_inreg@rel32@hi+12
10223; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 1
10224; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 2
10225; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
10226; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 2
10227; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 1
10228; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
10229; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
10230; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 3
10231; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
10232; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
10233; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
10234; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
10235; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
10236; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
10237  %val = load <2 x half>, <2 x half> addrspace(4)* undef
10238  call amdgpu_gfx void @external_void_func_v2f16_inreg(<2 x half> inreg %val)
10239  ret void
10240}
10241
10242define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 {
10243; GFX9-LABEL: test_call_external_void_func_v2i32_inreg:
10244; GFX9:       ; %bb.0:
10245; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10246; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
10247; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
10248; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
10249; GFX9-NEXT:    v_writelane_b32 v40, s33, 4
10250; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
10251; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
10252; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[34:35], 0x0
10253; GFX9-NEXT:    s_mov_b32 s33, s32
10254; GFX9-NEXT:    s_addk_i32 s32, 0x400
10255; GFX9-NEXT:    v_writelane_b32 v40, s30, 2
10256; GFX9-NEXT:    v_writelane_b32 v40, s31, 3
10257; GFX9-NEXT:    s_getpc_b64 s[34:35]
10258; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4
10259; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12
10260; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
10261; GFX9-NEXT:    v_readlane_b32 s31, v40, 3
10262; GFX9-NEXT:    v_readlane_b32 s30, v40, 2
10263; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
10264; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
10265; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
10266; GFX9-NEXT:    v_readlane_b32 s33, v40, 4
10267; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
10268; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
10269; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
10270; GFX9-NEXT:    s_waitcnt vmcnt(0)
10271; GFX9-NEXT:    s_setpc_b64 s[30:31]
10272;
10273; GFX10-LABEL: test_call_external_void_func_v2i32_inreg:
10274; GFX10:       ; %bb.0:
10275; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10276; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
10277; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
10278; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
10279; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
10280; GFX10-NEXT:    s_mov_b32 exec_lo, s34
10281; GFX10-NEXT:    v_writelane_b32 v40, s33, 4
10282; GFX10-NEXT:    s_mov_b32 s33, s32
10283; GFX10-NEXT:    s_addk_i32 s32, 0x200
10284; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
10285; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
10286; GFX10-NEXT:    s_load_dwordx2 s[4:5], s[34:35], 0x0
10287; GFX10-NEXT:    s_getpc_b64 s[34:35]
10288; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4
10289; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12
10290; GFX10-NEXT:    v_writelane_b32 v40, s30, 2
10291; GFX10-NEXT:    v_writelane_b32 v40, s31, 3
10292; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
10293; GFX10-NEXT:    v_readlane_b32 s31, v40, 3
10294; GFX10-NEXT:    v_readlane_b32 s30, v40, 2
10295; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
10296; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
10297; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
10298; GFX10-NEXT:    v_readlane_b32 s33, v40, 4
10299; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
10300; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
10301; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
10302; GFX10-NEXT:    s_mov_b32 exec_lo, s34
10303; GFX10-NEXT:    s_waitcnt vmcnt(0)
10304; GFX10-NEXT:    s_setpc_b64 s[30:31]
10305;
10306; GFX11-LABEL: test_call_external_void_func_v2i32_inreg:
10307; GFX11:       ; %bb.0:
10308; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10309; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
10310; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
10311; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
10312; GFX11-NEXT:    s_mov_b32 exec_lo, s0
10313; GFX11-NEXT:    v_writelane_b32 v40, s33, 4
10314; GFX11-NEXT:    s_mov_b32 s33, s32
10315; GFX11-NEXT:    s_add_i32 s32, s32, 16
10316; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
10317; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
10318; GFX11-NEXT:    s_load_b64 s[4:5], s[0:1], 0x0
10319; GFX11-NEXT:    s_getpc_b64 s[0:1]
10320; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v2i32_inreg@rel32@lo+4
10321; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v2i32_inreg@rel32@hi+12
10322; GFX11-NEXT:    v_writelane_b32 v40, s30, 2
10323; GFX11-NEXT:    v_writelane_b32 v40, s31, 3
10324; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
10325; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
10326; GFX11-NEXT:    v_readlane_b32 s31, v40, 3
10327; GFX11-NEXT:    v_readlane_b32 s30, v40, 2
10328; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
10329; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
10330; GFX11-NEXT:    s_add_i32 s32, s32, -16
10331; GFX11-NEXT:    v_readlane_b32 s33, v40, 4
10332; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
10333; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
10334; GFX11-NEXT:    s_mov_b32 exec_lo, s0
10335; GFX11-NEXT:    s_waitcnt vmcnt(0)
10336; GFX11-NEXT:    s_setpc_b64 s[30:31]
10337;
10338; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32_inreg:
10339; GFX10-SCRATCH:       ; %bb.0:
10340; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10341; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
10342; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
10343; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
10344; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
10345; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
10346; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 4
10347; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
10348; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
10349; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
10350; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
10351; GFX10-SCRATCH-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
10352; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
10353; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v2i32_inreg@rel32@lo+4
10354; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v2i32_inreg@rel32@hi+12
10355; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 2
10356; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 3
10357; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
10358; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 3
10359; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 2
10360; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
10361; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
10362; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
10363; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 4
10364; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
10365; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
10366; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
10367; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
10368; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
10369; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
10370  %val = load <2 x i32>, <2 x i32> addrspace(4)* undef
10371  call amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> inreg %val)
10372  ret void
10373}
10374
10375define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 {
10376; GFX9-LABEL: test_call_external_void_func_v2i32_imm_inreg:
10377; GFX9:       ; %bb.0:
10378; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10379; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
10380; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
10381; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
10382; GFX9-NEXT:    v_writelane_b32 v40, s33, 4
10383; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
10384; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
10385; GFX9-NEXT:    s_mov_b32 s33, s32
10386; GFX9-NEXT:    s_addk_i32 s32, 0x400
10387; GFX9-NEXT:    v_writelane_b32 v40, s30, 2
10388; GFX9-NEXT:    s_mov_b32 s4, 1
10389; GFX9-NEXT:    s_mov_b32 s5, 2
10390; GFX9-NEXT:    v_writelane_b32 v40, s31, 3
10391; GFX9-NEXT:    s_getpc_b64 s[34:35]
10392; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4
10393; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12
10394; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
10395; GFX9-NEXT:    v_readlane_b32 s31, v40, 3
10396; GFX9-NEXT:    v_readlane_b32 s30, v40, 2
10397; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
10398; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
10399; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
10400; GFX9-NEXT:    v_readlane_b32 s33, v40, 4
10401; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
10402; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
10403; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
10404; GFX9-NEXT:    s_waitcnt vmcnt(0)
10405; GFX9-NEXT:    s_setpc_b64 s[30:31]
10406;
10407; GFX10-LABEL: test_call_external_void_func_v2i32_imm_inreg:
10408; GFX10:       ; %bb.0:
10409; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10410; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
10411; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
10412; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
10413; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
10414; GFX10-NEXT:    s_mov_b32 exec_lo, s34
10415; GFX10-NEXT:    v_writelane_b32 v40, s33, 4
10416; GFX10-NEXT:    s_mov_b32 s33, s32
10417; GFX10-NEXT:    s_addk_i32 s32, 0x200
10418; GFX10-NEXT:    s_getpc_b64 s[34:35]
10419; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4
10420; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12
10421; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
10422; GFX10-NEXT:    s_mov_b32 s4, 1
10423; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
10424; GFX10-NEXT:    s_mov_b32 s5, 2
10425; GFX10-NEXT:    v_writelane_b32 v40, s30, 2
10426; GFX10-NEXT:    v_writelane_b32 v40, s31, 3
10427; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
10428; GFX10-NEXT:    v_readlane_b32 s31, v40, 3
10429; GFX10-NEXT:    v_readlane_b32 s30, v40, 2
10430; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
10431; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
10432; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
10433; GFX10-NEXT:    v_readlane_b32 s33, v40, 4
10434; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
10435; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
10436; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
10437; GFX10-NEXT:    s_mov_b32 exec_lo, s34
10438; GFX10-NEXT:    s_waitcnt vmcnt(0)
10439; GFX10-NEXT:    s_setpc_b64 s[30:31]
10440;
10441; GFX11-LABEL: test_call_external_void_func_v2i32_imm_inreg:
10442; GFX11:       ; %bb.0:
10443; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10444; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
10445; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
10446; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
10447; GFX11-NEXT:    s_mov_b32 exec_lo, s0
10448; GFX11-NEXT:    v_writelane_b32 v40, s33, 4
10449; GFX11-NEXT:    s_mov_b32 s33, s32
10450; GFX11-NEXT:    s_add_i32 s32, s32, 16
10451; GFX11-NEXT:    s_getpc_b64 s[0:1]
10452; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v2i32_inreg@rel32@lo+4
10453; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v2i32_inreg@rel32@hi+12
10454; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
10455; GFX11-NEXT:    s_mov_b32 s4, 1
10456; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
10457; GFX11-NEXT:    s_mov_b32 s5, 2
10458; GFX11-NEXT:    v_writelane_b32 v40, s30, 2
10459; GFX11-NEXT:    v_writelane_b32 v40, s31, 3
10460; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
10461; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
10462; GFX11-NEXT:    v_readlane_b32 s31, v40, 3
10463; GFX11-NEXT:    v_readlane_b32 s30, v40, 2
10464; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
10465; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
10466; GFX11-NEXT:    s_add_i32 s32, s32, -16
10467; GFX11-NEXT:    v_readlane_b32 s33, v40, 4
10468; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
10469; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
10470; GFX11-NEXT:    s_mov_b32 exec_lo, s0
10471; GFX11-NEXT:    s_waitcnt vmcnt(0)
10472; GFX11-NEXT:    s_setpc_b64 s[30:31]
10473;
10474; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32_imm_inreg:
10475; GFX10-SCRATCH:       ; %bb.0:
10476; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10477; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
10478; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
10479; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
10480; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
10481; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
10482; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 4
10483; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
10484; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
10485; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
10486; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v2i32_inreg@rel32@lo+4
10487; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v2i32_inreg@rel32@hi+12
10488; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
10489; GFX10-SCRATCH-NEXT:    s_mov_b32 s4, 1
10490; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
10491; GFX10-SCRATCH-NEXT:    s_mov_b32 s5, 2
10492; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 2
10493; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 3
10494; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
10495; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 3
10496; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 2
10497; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
10498; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
10499; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
10500; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 4
10501; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
10502; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
10503; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
10504; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
10505; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
10506; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
10507  call amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> inreg <i32 1, i32 2>)
10508  ret void
10509}
10510
10511define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 {
10512; GFX9-LABEL: test_call_external_void_func_v3i32_imm_inreg:
10513; GFX9:       ; %bb.0:
10514; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10515; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
10516; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
10517; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
10518; GFX9-NEXT:    v_writelane_b32 v40, s33, 5
10519; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
10520; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
10521; GFX9-NEXT:    v_writelane_b32 v40, s6, 2
10522; GFX9-NEXT:    s_mov_b32 s33, s32
10523; GFX9-NEXT:    s_addk_i32 s32, 0x400
10524; GFX9-NEXT:    v_writelane_b32 v40, s30, 3
10525; GFX9-NEXT:    s_mov_b32 s4, 3
10526; GFX9-NEXT:    s_mov_b32 s5, 4
10527; GFX9-NEXT:    s_mov_b32 s6, 5
10528; GFX9-NEXT:    v_writelane_b32 v40, s31, 4
10529; GFX9-NEXT:    s_getpc_b64 s[34:35]
10530; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v3i32_inreg@rel32@lo+4
10531; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v3i32_inreg@rel32@hi+12
10532; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
10533; GFX9-NEXT:    v_readlane_b32 s31, v40, 4
10534; GFX9-NEXT:    v_readlane_b32 s30, v40, 3
10535; GFX9-NEXT:    v_readlane_b32 s6, v40, 2
10536; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
10537; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
10538; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
10539; GFX9-NEXT:    v_readlane_b32 s33, v40, 5
10540; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
10541; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
10542; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
10543; GFX9-NEXT:    s_waitcnt vmcnt(0)
10544; GFX9-NEXT:    s_setpc_b64 s[30:31]
10545;
10546; GFX10-LABEL: test_call_external_void_func_v3i32_imm_inreg:
10547; GFX10:       ; %bb.0:
10548; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10549; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
10550; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
10551; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
10552; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
10553; GFX10-NEXT:    s_mov_b32 exec_lo, s34
10554; GFX10-NEXT:    v_writelane_b32 v40, s33, 5
10555; GFX10-NEXT:    s_mov_b32 s33, s32
10556; GFX10-NEXT:    s_addk_i32 s32, 0x200
10557; GFX10-NEXT:    s_getpc_b64 s[34:35]
10558; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v3i32_inreg@rel32@lo+4
10559; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v3i32_inreg@rel32@hi+12
10560; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
10561; GFX10-NEXT:    s_mov_b32 s4, 3
10562; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
10563; GFX10-NEXT:    s_mov_b32 s5, 4
10564; GFX10-NEXT:    v_writelane_b32 v40, s6, 2
10565; GFX10-NEXT:    s_mov_b32 s6, 5
10566; GFX10-NEXT:    v_writelane_b32 v40, s30, 3
10567; GFX10-NEXT:    v_writelane_b32 v40, s31, 4
10568; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
10569; GFX10-NEXT:    v_readlane_b32 s31, v40, 4
10570; GFX10-NEXT:    v_readlane_b32 s30, v40, 3
10571; GFX10-NEXT:    v_readlane_b32 s6, v40, 2
10572; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
10573; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
10574; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
10575; GFX10-NEXT:    v_readlane_b32 s33, v40, 5
10576; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
10577; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
10578; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
10579; GFX10-NEXT:    s_mov_b32 exec_lo, s34
10580; GFX10-NEXT:    s_waitcnt vmcnt(0)
10581; GFX10-NEXT:    s_setpc_b64 s[30:31]
10582;
10583; GFX11-LABEL: test_call_external_void_func_v3i32_imm_inreg:
10584; GFX11:       ; %bb.0:
10585; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10586; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
10587; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
10588; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
10589; GFX11-NEXT:    s_mov_b32 exec_lo, s0
10590; GFX11-NEXT:    v_writelane_b32 v40, s33, 5
10591; GFX11-NEXT:    s_mov_b32 s33, s32
10592; GFX11-NEXT:    s_add_i32 s32, s32, 16
10593; GFX11-NEXT:    s_getpc_b64 s[0:1]
10594; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v3i32_inreg@rel32@lo+4
10595; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v3i32_inreg@rel32@hi+12
10596; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
10597; GFX11-NEXT:    s_mov_b32 s4, 3
10598; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
10599; GFX11-NEXT:    s_mov_b32 s5, 4
10600; GFX11-NEXT:    v_writelane_b32 v40, s6, 2
10601; GFX11-NEXT:    s_mov_b32 s6, 5
10602; GFX11-NEXT:    v_writelane_b32 v40, s30, 3
10603; GFX11-NEXT:    v_writelane_b32 v40, s31, 4
10604; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
10605; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
10606; GFX11-NEXT:    v_readlane_b32 s31, v40, 4
10607; GFX11-NEXT:    v_readlane_b32 s30, v40, 3
10608; GFX11-NEXT:    v_readlane_b32 s6, v40, 2
10609; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
10610; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
10611; GFX11-NEXT:    s_add_i32 s32, s32, -16
10612; GFX11-NEXT:    v_readlane_b32 s33, v40, 5
10613; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
10614; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
10615; GFX11-NEXT:    s_mov_b32 exec_lo, s0
10616; GFX11-NEXT:    s_waitcnt vmcnt(0)
10617; GFX11-NEXT:    s_setpc_b64 s[30:31]
10618;
10619; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_imm_inreg:
10620; GFX10-SCRATCH:       ; %bb.0:
10621; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10622; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
10623; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
10624; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
10625; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
10626; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
10627; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 5
10628; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
10629; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
10630; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
10631; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v3i32_inreg@rel32@lo+4
10632; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v3i32_inreg@rel32@hi+12
10633; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
10634; GFX10-SCRATCH-NEXT:    s_mov_b32 s4, 3
10635; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
10636; GFX10-SCRATCH-NEXT:    s_mov_b32 s5, 4
10637; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s6, 2
10638; GFX10-SCRATCH-NEXT:    s_mov_b32 s6, 5
10639; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 3
10640; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 4
10641; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
10642; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 4
10643; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 3
10644; GFX10-SCRATCH-NEXT:    v_readlane_b32 s6, v40, 2
10645; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
10646; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
10647; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
10648; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 5
10649; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
10650; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
10651; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
10652; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
10653; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
10654; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
10655  call amdgpu_gfx void @external_void_func_v3i32_inreg(<3 x i32> inreg <i32 3, i32 4, i32 5>)
10656  ret void
10657}
10658
10659define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 {
10660; GFX9-LABEL: test_call_external_void_func_v3i32_i32_inreg:
10661; GFX9:       ; %bb.0:
10662; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10663; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
10664; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
10665; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
10666; GFX9-NEXT:    v_writelane_b32 v40, s33, 6
10667; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
10668; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
10669; GFX9-NEXT:    v_writelane_b32 v40, s6, 2
10670; GFX9-NEXT:    v_writelane_b32 v40, s7, 3
10671; GFX9-NEXT:    s_mov_b32 s33, s32
10672; GFX9-NEXT:    s_addk_i32 s32, 0x400
10673; GFX9-NEXT:    v_writelane_b32 v40, s30, 4
10674; GFX9-NEXT:    s_mov_b32 s4, 3
10675; GFX9-NEXT:    s_mov_b32 s5, 4
10676; GFX9-NEXT:    s_mov_b32 s6, 5
10677; GFX9-NEXT:    s_mov_b32 s7, 6
10678; GFX9-NEXT:    v_writelane_b32 v40, s31, 5
10679; GFX9-NEXT:    s_getpc_b64 s[34:35]
10680; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v3i32_i32_inreg@rel32@lo+4
10681; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v3i32_i32_inreg@rel32@hi+12
10682; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
10683; GFX9-NEXT:    v_readlane_b32 s31, v40, 5
10684; GFX9-NEXT:    v_readlane_b32 s30, v40, 4
10685; GFX9-NEXT:    v_readlane_b32 s7, v40, 3
10686; GFX9-NEXT:    v_readlane_b32 s6, v40, 2
10687; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
10688; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
10689; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
10690; GFX9-NEXT:    v_readlane_b32 s33, v40, 6
10691; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
10692; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
10693; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
10694; GFX9-NEXT:    s_waitcnt vmcnt(0)
10695; GFX9-NEXT:    s_setpc_b64 s[30:31]
10696;
10697; GFX10-LABEL: test_call_external_void_func_v3i32_i32_inreg:
10698; GFX10:       ; %bb.0:
10699; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10700; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
10701; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
10702; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
10703; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
10704; GFX10-NEXT:    s_mov_b32 exec_lo, s34
10705; GFX10-NEXT:    v_writelane_b32 v40, s33, 6
10706; GFX10-NEXT:    s_mov_b32 s33, s32
10707; GFX10-NEXT:    s_addk_i32 s32, 0x200
10708; GFX10-NEXT:    s_getpc_b64 s[34:35]
10709; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v3i32_i32_inreg@rel32@lo+4
10710; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v3i32_i32_inreg@rel32@hi+12
10711; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
10712; GFX10-NEXT:    s_mov_b32 s4, 3
10713; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
10714; GFX10-NEXT:    s_mov_b32 s5, 4
10715; GFX10-NEXT:    v_writelane_b32 v40, s6, 2
10716; GFX10-NEXT:    s_mov_b32 s6, 5
10717; GFX10-NEXT:    v_writelane_b32 v40, s7, 3
10718; GFX10-NEXT:    s_mov_b32 s7, 6
10719; GFX10-NEXT:    v_writelane_b32 v40, s30, 4
10720; GFX10-NEXT:    v_writelane_b32 v40, s31, 5
10721; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
10722; GFX10-NEXT:    v_readlane_b32 s31, v40, 5
10723; GFX10-NEXT:    v_readlane_b32 s30, v40, 4
10724; GFX10-NEXT:    v_readlane_b32 s7, v40, 3
10725; GFX10-NEXT:    v_readlane_b32 s6, v40, 2
10726; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
10727; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
10728; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
10729; GFX10-NEXT:    v_readlane_b32 s33, v40, 6
10730; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
10731; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
10732; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
10733; GFX10-NEXT:    s_mov_b32 exec_lo, s34
10734; GFX10-NEXT:    s_waitcnt vmcnt(0)
10735; GFX10-NEXT:    s_setpc_b64 s[30:31]
10736;
10737; GFX11-LABEL: test_call_external_void_func_v3i32_i32_inreg:
10738; GFX11:       ; %bb.0:
10739; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10740; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
10741; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
10742; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
10743; GFX11-NEXT:    s_mov_b32 exec_lo, s0
10744; GFX11-NEXT:    v_writelane_b32 v40, s33, 6
10745; GFX11-NEXT:    s_mov_b32 s33, s32
10746; GFX11-NEXT:    s_add_i32 s32, s32, 16
10747; GFX11-NEXT:    s_getpc_b64 s[0:1]
10748; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v3i32_i32_inreg@rel32@lo+4
10749; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v3i32_i32_inreg@rel32@hi+12
10750; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
10751; GFX11-NEXT:    s_mov_b32 s4, 3
10752; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
10753; GFX11-NEXT:    s_mov_b32 s5, 4
10754; GFX11-NEXT:    v_writelane_b32 v40, s6, 2
10755; GFX11-NEXT:    s_mov_b32 s6, 5
10756; GFX11-NEXT:    v_writelane_b32 v40, s7, 3
10757; GFX11-NEXT:    s_mov_b32 s7, 6
10758; GFX11-NEXT:    v_writelane_b32 v40, s30, 4
10759; GFX11-NEXT:    v_writelane_b32 v40, s31, 5
10760; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
10761; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
10762; GFX11-NEXT:    v_readlane_b32 s31, v40, 5
10763; GFX11-NEXT:    v_readlane_b32 s30, v40, 4
10764; GFX11-NEXT:    v_readlane_b32 s7, v40, 3
10765; GFX11-NEXT:    v_readlane_b32 s6, v40, 2
10766; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
10767; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
10768; GFX11-NEXT:    s_add_i32 s32, s32, -16
10769; GFX11-NEXT:    v_readlane_b32 s33, v40, 6
10770; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
10771; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
10772; GFX11-NEXT:    s_mov_b32 exec_lo, s0
10773; GFX11-NEXT:    s_waitcnt vmcnt(0)
10774; GFX11-NEXT:    s_setpc_b64 s[30:31]
10775;
10776; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_i32_inreg:
10777; GFX10-SCRATCH:       ; %bb.0:
10778; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10779; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
10780; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
10781; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
10782; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
10783; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
10784; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 6
10785; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
10786; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
10787; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
10788; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v3i32_i32_inreg@rel32@lo+4
10789; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v3i32_i32_inreg@rel32@hi+12
10790; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
10791; GFX10-SCRATCH-NEXT:    s_mov_b32 s4, 3
10792; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
10793; GFX10-SCRATCH-NEXT:    s_mov_b32 s5, 4
10794; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s6, 2
10795; GFX10-SCRATCH-NEXT:    s_mov_b32 s6, 5
10796; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s7, 3
10797; GFX10-SCRATCH-NEXT:    s_mov_b32 s7, 6
10798; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 4
10799; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 5
10800; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
10801; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 5
10802; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 4
10803; GFX10-SCRATCH-NEXT:    v_readlane_b32 s7, v40, 3
10804; GFX10-SCRATCH-NEXT:    v_readlane_b32 s6, v40, 2
10805; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
10806; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
10807; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
10808; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 6
10809; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
10810; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
10811; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
10812; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
10813; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
10814; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
10815  call amdgpu_gfx void @external_void_func_v3i32_i32_inreg(<3 x i32> inreg <i32 3, i32 4, i32 5>, i32 inreg 6)
10816  ret void
10817}
10818
10819define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 {
10820; GFX9-LABEL: test_call_external_void_func_v4i32_inreg:
10821; GFX9:       ; %bb.0:
10822; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10823; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
10824; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
10825; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
10826; GFX9-NEXT:    v_writelane_b32 v40, s33, 6
10827; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
10828; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
10829; GFX9-NEXT:    v_writelane_b32 v40, s6, 2
10830; GFX9-NEXT:    v_writelane_b32 v40, s7, 3
10831; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[34:35], 0x0
10832; GFX9-NEXT:    s_mov_b32 s33, s32
10833; GFX9-NEXT:    s_addk_i32 s32, 0x400
10834; GFX9-NEXT:    v_writelane_b32 v40, s30, 4
10835; GFX9-NEXT:    v_writelane_b32 v40, s31, 5
10836; GFX9-NEXT:    s_getpc_b64 s[34:35]
10837; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4
10838; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12
10839; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
10840; GFX9-NEXT:    v_readlane_b32 s31, v40, 5
10841; GFX9-NEXT:    v_readlane_b32 s30, v40, 4
10842; GFX9-NEXT:    v_readlane_b32 s7, v40, 3
10843; GFX9-NEXT:    v_readlane_b32 s6, v40, 2
10844; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
10845; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
10846; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
10847; GFX9-NEXT:    v_readlane_b32 s33, v40, 6
10848; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
10849; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
10850; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
10851; GFX9-NEXT:    s_waitcnt vmcnt(0)
10852; GFX9-NEXT:    s_setpc_b64 s[30:31]
10853;
10854; GFX10-LABEL: test_call_external_void_func_v4i32_inreg:
10855; GFX10:       ; %bb.0:
10856; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10857; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
10858; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
10859; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
10860; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
10861; GFX10-NEXT:    s_mov_b32 exec_lo, s34
10862; GFX10-NEXT:    v_writelane_b32 v40, s33, 6
10863; GFX10-NEXT:    s_mov_b32 s33, s32
10864; GFX10-NEXT:    s_addk_i32 s32, 0x200
10865; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
10866; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
10867; GFX10-NEXT:    v_writelane_b32 v40, s6, 2
10868; GFX10-NEXT:    v_writelane_b32 v40, s7, 3
10869; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[34:35], 0x0
10870; GFX10-NEXT:    s_getpc_b64 s[34:35]
10871; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4
10872; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12
10873; GFX10-NEXT:    v_writelane_b32 v40, s30, 4
10874; GFX10-NEXT:    v_writelane_b32 v40, s31, 5
10875; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
10876; GFX10-NEXT:    v_readlane_b32 s31, v40, 5
10877; GFX10-NEXT:    v_readlane_b32 s30, v40, 4
10878; GFX10-NEXT:    v_readlane_b32 s7, v40, 3
10879; GFX10-NEXT:    v_readlane_b32 s6, v40, 2
10880; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
10881; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
10882; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
10883; GFX10-NEXT:    v_readlane_b32 s33, v40, 6
10884; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
10885; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
10886; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
10887; GFX10-NEXT:    s_mov_b32 exec_lo, s34
10888; GFX10-NEXT:    s_waitcnt vmcnt(0)
10889; GFX10-NEXT:    s_setpc_b64 s[30:31]
10890;
10891; GFX11-LABEL: test_call_external_void_func_v4i32_inreg:
10892; GFX11:       ; %bb.0:
10893; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10894; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
10895; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
10896; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
10897; GFX11-NEXT:    s_mov_b32 exec_lo, s0
10898; GFX11-NEXT:    v_writelane_b32 v40, s33, 6
10899; GFX11-NEXT:    s_mov_b32 s33, s32
10900; GFX11-NEXT:    s_add_i32 s32, s32, 16
10901; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
10902; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
10903; GFX11-NEXT:    v_writelane_b32 v40, s6, 2
10904; GFX11-NEXT:    v_writelane_b32 v40, s7, 3
10905; GFX11-NEXT:    s_load_b128 s[4:7], s[0:1], 0x0
10906; GFX11-NEXT:    s_getpc_b64 s[0:1]
10907; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v4i32_inreg@rel32@lo+4
10908; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v4i32_inreg@rel32@hi+12
10909; GFX11-NEXT:    v_writelane_b32 v40, s30, 4
10910; GFX11-NEXT:    v_writelane_b32 v40, s31, 5
10911; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
10912; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
10913; GFX11-NEXT:    v_readlane_b32 s31, v40, 5
10914; GFX11-NEXT:    v_readlane_b32 s30, v40, 4
10915; GFX11-NEXT:    v_readlane_b32 s7, v40, 3
10916; GFX11-NEXT:    v_readlane_b32 s6, v40, 2
10917; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
10918; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
10919; GFX11-NEXT:    s_add_i32 s32, s32, -16
10920; GFX11-NEXT:    v_readlane_b32 s33, v40, 6
10921; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
10922; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
10923; GFX11-NEXT:    s_mov_b32 exec_lo, s0
10924; GFX11-NEXT:    s_waitcnt vmcnt(0)
10925; GFX11-NEXT:    s_setpc_b64 s[30:31]
10926;
10927; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32_inreg:
10928; GFX10-SCRATCH:       ; %bb.0:
10929; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10930; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
10931; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
10932; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
10933; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
10934; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
10935; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 6
10936; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
10937; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
10938; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
10939; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
10940; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s6, 2
10941; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s7, 3
10942; GFX10-SCRATCH-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x0
10943; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
10944; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v4i32_inreg@rel32@lo+4
10945; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v4i32_inreg@rel32@hi+12
10946; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 4
10947; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 5
10948; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
10949; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 5
10950; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 4
10951; GFX10-SCRATCH-NEXT:    v_readlane_b32 s7, v40, 3
10952; GFX10-SCRATCH-NEXT:    v_readlane_b32 s6, v40, 2
10953; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
10954; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
10955; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
10956; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 6
10957; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
10958; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
10959; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
10960; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
10961; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
10962; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
10963  %val = load <4 x i32>, <4 x i32> addrspace(4)* undef
10964  call amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> inreg %val)
10965  ret void
10966}
10967
10968define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 {
10969; GFX9-LABEL: test_call_external_void_func_v4i32_imm_inreg:
10970; GFX9:       ; %bb.0:
10971; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10972; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
10973; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
10974; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
10975; GFX9-NEXT:    v_writelane_b32 v40, s33, 6
10976; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
10977; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
10978; GFX9-NEXT:    v_writelane_b32 v40, s6, 2
10979; GFX9-NEXT:    v_writelane_b32 v40, s7, 3
10980; GFX9-NEXT:    s_mov_b32 s33, s32
10981; GFX9-NEXT:    s_addk_i32 s32, 0x400
10982; GFX9-NEXT:    v_writelane_b32 v40, s30, 4
10983; GFX9-NEXT:    s_mov_b32 s4, 1
10984; GFX9-NEXT:    s_mov_b32 s5, 2
10985; GFX9-NEXT:    s_mov_b32 s6, 3
10986; GFX9-NEXT:    s_mov_b32 s7, 4
10987; GFX9-NEXT:    v_writelane_b32 v40, s31, 5
10988; GFX9-NEXT:    s_getpc_b64 s[34:35]
10989; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4
10990; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12
10991; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
10992; GFX9-NEXT:    v_readlane_b32 s31, v40, 5
10993; GFX9-NEXT:    v_readlane_b32 s30, v40, 4
10994; GFX9-NEXT:    v_readlane_b32 s7, v40, 3
10995; GFX9-NEXT:    v_readlane_b32 s6, v40, 2
10996; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
10997; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
10998; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
10999; GFX9-NEXT:    v_readlane_b32 s33, v40, 6
11000; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
11001; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
11002; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
11003; GFX9-NEXT:    s_waitcnt vmcnt(0)
11004; GFX9-NEXT:    s_setpc_b64 s[30:31]
11005;
11006; GFX10-LABEL: test_call_external_void_func_v4i32_imm_inreg:
11007; GFX10:       ; %bb.0:
11008; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11009; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
11010; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
11011; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
11012; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
11013; GFX10-NEXT:    s_mov_b32 exec_lo, s34
11014; GFX10-NEXT:    v_writelane_b32 v40, s33, 6
11015; GFX10-NEXT:    s_mov_b32 s33, s32
11016; GFX10-NEXT:    s_addk_i32 s32, 0x200
11017; GFX10-NEXT:    s_getpc_b64 s[34:35]
11018; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4
11019; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12
11020; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
11021; GFX10-NEXT:    s_mov_b32 s4, 1
11022; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
11023; GFX10-NEXT:    s_mov_b32 s5, 2
11024; GFX10-NEXT:    v_writelane_b32 v40, s6, 2
11025; GFX10-NEXT:    s_mov_b32 s6, 3
11026; GFX10-NEXT:    v_writelane_b32 v40, s7, 3
11027; GFX10-NEXT:    s_mov_b32 s7, 4
11028; GFX10-NEXT:    v_writelane_b32 v40, s30, 4
11029; GFX10-NEXT:    v_writelane_b32 v40, s31, 5
11030; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
11031; GFX10-NEXT:    v_readlane_b32 s31, v40, 5
11032; GFX10-NEXT:    v_readlane_b32 s30, v40, 4
11033; GFX10-NEXT:    v_readlane_b32 s7, v40, 3
11034; GFX10-NEXT:    v_readlane_b32 s6, v40, 2
11035; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
11036; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
11037; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
11038; GFX10-NEXT:    v_readlane_b32 s33, v40, 6
11039; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
11040; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
11041; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
11042; GFX10-NEXT:    s_mov_b32 exec_lo, s34
11043; GFX10-NEXT:    s_waitcnt vmcnt(0)
11044; GFX10-NEXT:    s_setpc_b64 s[30:31]
11045;
11046; GFX11-LABEL: test_call_external_void_func_v4i32_imm_inreg:
11047; GFX11:       ; %bb.0:
11048; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11049; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
11050; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
11051; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
11052; GFX11-NEXT:    s_mov_b32 exec_lo, s0
11053; GFX11-NEXT:    v_writelane_b32 v40, s33, 6
11054; GFX11-NEXT:    s_mov_b32 s33, s32
11055; GFX11-NEXT:    s_add_i32 s32, s32, 16
11056; GFX11-NEXT:    s_getpc_b64 s[0:1]
11057; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v4i32_inreg@rel32@lo+4
11058; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v4i32_inreg@rel32@hi+12
11059; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
11060; GFX11-NEXT:    s_mov_b32 s4, 1
11061; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
11062; GFX11-NEXT:    s_mov_b32 s5, 2
11063; GFX11-NEXT:    v_writelane_b32 v40, s6, 2
11064; GFX11-NEXT:    s_mov_b32 s6, 3
11065; GFX11-NEXT:    v_writelane_b32 v40, s7, 3
11066; GFX11-NEXT:    s_mov_b32 s7, 4
11067; GFX11-NEXT:    v_writelane_b32 v40, s30, 4
11068; GFX11-NEXT:    v_writelane_b32 v40, s31, 5
11069; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
11070; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
11071; GFX11-NEXT:    v_readlane_b32 s31, v40, 5
11072; GFX11-NEXT:    v_readlane_b32 s30, v40, 4
11073; GFX11-NEXT:    v_readlane_b32 s7, v40, 3
11074; GFX11-NEXT:    v_readlane_b32 s6, v40, 2
11075; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
11076; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
11077; GFX11-NEXT:    s_add_i32 s32, s32, -16
11078; GFX11-NEXT:    v_readlane_b32 s33, v40, 6
11079; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
11080; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
11081; GFX11-NEXT:    s_mov_b32 exec_lo, s0
11082; GFX11-NEXT:    s_waitcnt vmcnt(0)
11083; GFX11-NEXT:    s_setpc_b64 s[30:31]
11084;
11085; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32_imm_inreg:
11086; GFX10-SCRATCH:       ; %bb.0:
11087; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11088; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
11089; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
11090; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
11091; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
11092; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
11093; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 6
11094; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
11095; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
11096; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
11097; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v4i32_inreg@rel32@lo+4
11098; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v4i32_inreg@rel32@hi+12
11099; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
11100; GFX10-SCRATCH-NEXT:    s_mov_b32 s4, 1
11101; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
11102; GFX10-SCRATCH-NEXT:    s_mov_b32 s5, 2
11103; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s6, 2
11104; GFX10-SCRATCH-NEXT:    s_mov_b32 s6, 3
11105; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s7, 3
11106; GFX10-SCRATCH-NEXT:    s_mov_b32 s7, 4
11107; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 4
11108; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 5
11109; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
11110; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 5
11111; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 4
11112; GFX10-SCRATCH-NEXT:    v_readlane_b32 s7, v40, 3
11113; GFX10-SCRATCH-NEXT:    v_readlane_b32 s6, v40, 2
11114; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
11115; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
11116; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
11117; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 6
11118; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
11119; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
11120; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
11121; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
11122; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
11123; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
11124  call amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> inreg <i32 1, i32 2, i32 3, i32 4>)
11125  ret void
11126}
11127
11128define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 {
11129; GFX9-LABEL: test_call_external_void_func_v5i32_imm_inreg:
11130; GFX9:       ; %bb.0:
11131; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11132; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
11133; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
11134; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
11135; GFX9-NEXT:    v_writelane_b32 v40, s33, 7
11136; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
11137; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
11138; GFX9-NEXT:    v_writelane_b32 v40, s6, 2
11139; GFX9-NEXT:    v_writelane_b32 v40, s7, 3
11140; GFX9-NEXT:    v_writelane_b32 v40, s8, 4
11141; GFX9-NEXT:    s_mov_b32 s33, s32
11142; GFX9-NEXT:    s_addk_i32 s32, 0x400
11143; GFX9-NEXT:    v_writelane_b32 v40, s30, 5
11144; GFX9-NEXT:    s_mov_b32 s4, 1
11145; GFX9-NEXT:    s_mov_b32 s5, 2
11146; GFX9-NEXT:    s_mov_b32 s6, 3
11147; GFX9-NEXT:    s_mov_b32 s7, 4
11148; GFX9-NEXT:    s_mov_b32 s8, 5
11149; GFX9-NEXT:    v_writelane_b32 v40, s31, 6
11150; GFX9-NEXT:    s_getpc_b64 s[34:35]
11151; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v5i32_inreg@rel32@lo+4
11152; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v5i32_inreg@rel32@hi+12
11153; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
11154; GFX9-NEXT:    v_readlane_b32 s31, v40, 6
11155; GFX9-NEXT:    v_readlane_b32 s30, v40, 5
11156; GFX9-NEXT:    v_readlane_b32 s8, v40, 4
11157; GFX9-NEXT:    v_readlane_b32 s7, v40, 3
11158; GFX9-NEXT:    v_readlane_b32 s6, v40, 2
11159; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
11160; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
11161; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
11162; GFX9-NEXT:    v_readlane_b32 s33, v40, 7
11163; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
11164; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
11165; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
11166; GFX9-NEXT:    s_waitcnt vmcnt(0)
11167; GFX9-NEXT:    s_setpc_b64 s[30:31]
11168;
11169; GFX10-LABEL: test_call_external_void_func_v5i32_imm_inreg:
11170; GFX10:       ; %bb.0:
11171; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11172; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
11173; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
11174; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
11175; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
11176; GFX10-NEXT:    s_mov_b32 exec_lo, s34
11177; GFX10-NEXT:    v_writelane_b32 v40, s33, 7
11178; GFX10-NEXT:    s_mov_b32 s33, s32
11179; GFX10-NEXT:    s_addk_i32 s32, 0x200
11180; GFX10-NEXT:    s_getpc_b64 s[34:35]
11181; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v5i32_inreg@rel32@lo+4
11182; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v5i32_inreg@rel32@hi+12
11183; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
11184; GFX10-NEXT:    s_mov_b32 s4, 1
11185; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
11186; GFX10-NEXT:    s_mov_b32 s5, 2
11187; GFX10-NEXT:    v_writelane_b32 v40, s6, 2
11188; GFX10-NEXT:    s_mov_b32 s6, 3
11189; GFX10-NEXT:    v_writelane_b32 v40, s7, 3
11190; GFX10-NEXT:    s_mov_b32 s7, 4
11191; GFX10-NEXT:    v_writelane_b32 v40, s8, 4
11192; GFX10-NEXT:    s_mov_b32 s8, 5
11193; GFX10-NEXT:    v_writelane_b32 v40, s30, 5
11194; GFX10-NEXT:    v_writelane_b32 v40, s31, 6
11195; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
11196; GFX10-NEXT:    v_readlane_b32 s31, v40, 6
11197; GFX10-NEXT:    v_readlane_b32 s30, v40, 5
11198; GFX10-NEXT:    v_readlane_b32 s8, v40, 4
11199; GFX10-NEXT:    v_readlane_b32 s7, v40, 3
11200; GFX10-NEXT:    v_readlane_b32 s6, v40, 2
11201; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
11202; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
11203; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
11204; GFX10-NEXT:    v_readlane_b32 s33, v40, 7
11205; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
11206; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
11207; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
11208; GFX10-NEXT:    s_mov_b32 exec_lo, s34
11209; GFX10-NEXT:    s_waitcnt vmcnt(0)
11210; GFX10-NEXT:    s_setpc_b64 s[30:31]
11211;
11212; GFX11-LABEL: test_call_external_void_func_v5i32_imm_inreg:
11213; GFX11:       ; %bb.0:
11214; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11215; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
11216; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
11217; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
11218; GFX11-NEXT:    s_mov_b32 exec_lo, s0
11219; GFX11-NEXT:    v_writelane_b32 v40, s33, 7
11220; GFX11-NEXT:    s_mov_b32 s33, s32
11221; GFX11-NEXT:    s_add_i32 s32, s32, 16
11222; GFX11-NEXT:    s_getpc_b64 s[0:1]
11223; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v5i32_inreg@rel32@lo+4
11224; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v5i32_inreg@rel32@hi+12
11225; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
11226; GFX11-NEXT:    s_mov_b32 s4, 1
11227; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
11228; GFX11-NEXT:    s_mov_b32 s5, 2
11229; GFX11-NEXT:    v_writelane_b32 v40, s6, 2
11230; GFX11-NEXT:    s_mov_b32 s6, 3
11231; GFX11-NEXT:    v_writelane_b32 v40, s7, 3
11232; GFX11-NEXT:    s_mov_b32 s7, 4
11233; GFX11-NEXT:    v_writelane_b32 v40, s8, 4
11234; GFX11-NEXT:    s_mov_b32 s8, 5
11235; GFX11-NEXT:    v_writelane_b32 v40, s30, 5
11236; GFX11-NEXT:    v_writelane_b32 v40, s31, 6
11237; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
11238; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
11239; GFX11-NEXT:    v_readlane_b32 s31, v40, 6
11240; GFX11-NEXT:    v_readlane_b32 s30, v40, 5
11241; GFX11-NEXT:    v_readlane_b32 s8, v40, 4
11242; GFX11-NEXT:    v_readlane_b32 s7, v40, 3
11243; GFX11-NEXT:    v_readlane_b32 s6, v40, 2
11244; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
11245; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
11246; GFX11-NEXT:    s_add_i32 s32, s32, -16
11247; GFX11-NEXT:    v_readlane_b32 s33, v40, 7
11248; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
11249; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
11250; GFX11-NEXT:    s_mov_b32 exec_lo, s0
11251; GFX11-NEXT:    s_waitcnt vmcnt(0)
11252; GFX11-NEXT:    s_setpc_b64 s[30:31]
11253;
11254; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5i32_imm_inreg:
11255; GFX10-SCRATCH:       ; %bb.0:
11256; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11257; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
11258; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
11259; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
11260; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
11261; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
11262; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 7
11263; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
11264; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
11265; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
11266; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v5i32_inreg@rel32@lo+4
11267; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v5i32_inreg@rel32@hi+12
11268; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
11269; GFX10-SCRATCH-NEXT:    s_mov_b32 s4, 1
11270; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
11271; GFX10-SCRATCH-NEXT:    s_mov_b32 s5, 2
11272; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s6, 2
11273; GFX10-SCRATCH-NEXT:    s_mov_b32 s6, 3
11274; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s7, 3
11275; GFX10-SCRATCH-NEXT:    s_mov_b32 s7, 4
11276; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s8, 4
11277; GFX10-SCRATCH-NEXT:    s_mov_b32 s8, 5
11278; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 5
11279; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 6
11280; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
11281; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 6
11282; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 5
11283; GFX10-SCRATCH-NEXT:    v_readlane_b32 s8, v40, 4
11284; GFX10-SCRATCH-NEXT:    v_readlane_b32 s7, v40, 3
11285; GFX10-SCRATCH-NEXT:    v_readlane_b32 s6, v40, 2
11286; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
11287; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
11288; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
11289; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 7
11290; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
11291; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
11292; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
11293; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
11294; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
11295; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
11296  call amdgpu_gfx void @external_void_func_v5i32_inreg(<5 x i32> inreg <i32 1, i32 2, i32 3, i32 4, i32 5>)
11297  ret void
11298}
11299
11300define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 {
11301; GFX9-LABEL: test_call_external_void_func_v8i32_inreg:
11302; GFX9:       ; %bb.0:
11303; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11304; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
11305; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
11306; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
11307; GFX9-NEXT:    v_writelane_b32 v40, s33, 10
11308; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
11309; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
11310; GFX9-NEXT:    v_writelane_b32 v40, s6, 2
11311; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
11312; GFX9-NEXT:    v_writelane_b32 v40, s7, 3
11313; GFX9-NEXT:    v_writelane_b32 v40, s8, 4
11314; GFX9-NEXT:    v_writelane_b32 v40, s9, 5
11315; GFX9-NEXT:    v_writelane_b32 v40, s10, 6
11316; GFX9-NEXT:    v_writelane_b32 v40, s11, 7
11317; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
11318; GFX9-NEXT:    s_load_dwordx8 s[4:11], s[34:35], 0x0
11319; GFX9-NEXT:    s_mov_b32 s33, s32
11320; GFX9-NEXT:    s_addk_i32 s32, 0x400
11321; GFX9-NEXT:    v_writelane_b32 v40, s30, 8
11322; GFX9-NEXT:    v_writelane_b32 v40, s31, 9
11323; GFX9-NEXT:    s_getpc_b64 s[34:35]
11324; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4
11325; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12
11326; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
11327; GFX9-NEXT:    v_readlane_b32 s31, v40, 9
11328; GFX9-NEXT:    v_readlane_b32 s30, v40, 8
11329; GFX9-NEXT:    v_readlane_b32 s11, v40, 7
11330; GFX9-NEXT:    v_readlane_b32 s10, v40, 6
11331; GFX9-NEXT:    v_readlane_b32 s9, v40, 5
11332; GFX9-NEXT:    v_readlane_b32 s8, v40, 4
11333; GFX9-NEXT:    v_readlane_b32 s7, v40, 3
11334; GFX9-NEXT:    v_readlane_b32 s6, v40, 2
11335; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
11336; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
11337; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
11338; GFX9-NEXT:    v_readlane_b32 s33, v40, 10
11339; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
11340; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
11341; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
11342; GFX9-NEXT:    s_waitcnt vmcnt(0)
11343; GFX9-NEXT:    s_setpc_b64 s[30:31]
11344;
11345; GFX10-LABEL: test_call_external_void_func_v8i32_inreg:
11346; GFX10:       ; %bb.0:
11347; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11348; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
11349; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
11350; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
11351; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
11352; GFX10-NEXT:    s_mov_b32 exec_lo, s34
11353; GFX10-NEXT:    v_writelane_b32 v40, s33, 10
11354; GFX10-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
11355; GFX10-NEXT:    s_mov_b32 s33, s32
11356; GFX10-NEXT:    s_addk_i32 s32, 0x200
11357; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
11358; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
11359; GFX10-NEXT:    v_writelane_b32 v40, s6, 2
11360; GFX10-NEXT:    v_writelane_b32 v40, s7, 3
11361; GFX10-NEXT:    v_writelane_b32 v40, s8, 4
11362; GFX10-NEXT:    v_writelane_b32 v40, s9, 5
11363; GFX10-NEXT:    v_writelane_b32 v40, s10, 6
11364; GFX10-NEXT:    v_writelane_b32 v40, s11, 7
11365; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
11366; GFX10-NEXT:    s_load_dwordx8 s[4:11], s[34:35], 0x0
11367; GFX10-NEXT:    s_getpc_b64 s[34:35]
11368; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4
11369; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12
11370; GFX10-NEXT:    v_writelane_b32 v40, s30, 8
11371; GFX10-NEXT:    v_writelane_b32 v40, s31, 9
11372; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
11373; GFX10-NEXT:    v_readlane_b32 s31, v40, 9
11374; GFX10-NEXT:    v_readlane_b32 s30, v40, 8
11375; GFX10-NEXT:    v_readlane_b32 s11, v40, 7
11376; GFX10-NEXT:    v_readlane_b32 s10, v40, 6
11377; GFX10-NEXT:    v_readlane_b32 s9, v40, 5
11378; GFX10-NEXT:    v_readlane_b32 s8, v40, 4
11379; GFX10-NEXT:    v_readlane_b32 s7, v40, 3
11380; GFX10-NEXT:    v_readlane_b32 s6, v40, 2
11381; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
11382; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
11383; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
11384; GFX10-NEXT:    v_readlane_b32 s33, v40, 10
11385; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
11386; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
11387; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
11388; GFX10-NEXT:    s_mov_b32 exec_lo, s34
11389; GFX10-NEXT:    s_waitcnt vmcnt(0)
11390; GFX10-NEXT:    s_setpc_b64 s[30:31]
11391;
11392; GFX11-LABEL: test_call_external_void_func_v8i32_inreg:
11393; GFX11:       ; %bb.0:
11394; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11395; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
11396; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
11397; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
11398; GFX11-NEXT:    s_mov_b32 exec_lo, s0
11399; GFX11-NEXT:    v_writelane_b32 v40, s33, 10
11400; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
11401; GFX11-NEXT:    s_mov_b32 s33, s32
11402; GFX11-NEXT:    s_add_i32 s32, s32, 16
11403; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
11404; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
11405; GFX11-NEXT:    v_writelane_b32 v40, s6, 2
11406; GFX11-NEXT:    v_writelane_b32 v40, s7, 3
11407; GFX11-NEXT:    v_writelane_b32 v40, s8, 4
11408; GFX11-NEXT:    v_writelane_b32 v40, s9, 5
11409; GFX11-NEXT:    v_writelane_b32 v40, s10, 6
11410; GFX11-NEXT:    v_writelane_b32 v40, s11, 7
11411; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
11412; GFX11-NEXT:    s_load_b256 s[4:11], s[0:1], 0x0
11413; GFX11-NEXT:    s_getpc_b64 s[0:1]
11414; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v8i32_inreg@rel32@lo+4
11415; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v8i32_inreg@rel32@hi+12
11416; GFX11-NEXT:    v_writelane_b32 v40, s30, 8
11417; GFX11-NEXT:    v_writelane_b32 v40, s31, 9
11418; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
11419; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
11420; GFX11-NEXT:    v_readlane_b32 s31, v40, 9
11421; GFX11-NEXT:    v_readlane_b32 s30, v40, 8
11422; GFX11-NEXT:    v_readlane_b32 s11, v40, 7
11423; GFX11-NEXT:    v_readlane_b32 s10, v40, 6
11424; GFX11-NEXT:    v_readlane_b32 s9, v40, 5
11425; GFX11-NEXT:    v_readlane_b32 s8, v40, 4
11426; GFX11-NEXT:    v_readlane_b32 s7, v40, 3
11427; GFX11-NEXT:    v_readlane_b32 s6, v40, 2
11428; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
11429; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
11430; GFX11-NEXT:    s_add_i32 s32, s32, -16
11431; GFX11-NEXT:    v_readlane_b32 s33, v40, 10
11432; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
11433; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
11434; GFX11-NEXT:    s_mov_b32 exec_lo, s0
11435; GFX11-NEXT:    s_waitcnt vmcnt(0)
11436; GFX11-NEXT:    s_setpc_b64 s[30:31]
11437;
11438; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32_inreg:
11439; GFX10-SCRATCH:       ; %bb.0:
11440; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11441; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
11442; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
11443; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
11444; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
11445; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
11446; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 10
11447; GFX10-SCRATCH-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
11448; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
11449; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
11450; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
11451; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
11452; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s6, 2
11453; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s7, 3
11454; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s8, 4
11455; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s9, 5
11456; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s10, 6
11457; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s11, 7
11458; GFX10-SCRATCH-NEXT:    s_waitcnt lgkmcnt(0)
11459; GFX10-SCRATCH-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x0
11460; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
11461; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v8i32_inreg@rel32@lo+4
11462; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v8i32_inreg@rel32@hi+12
11463; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 8
11464; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 9
11465; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
11466; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 9
11467; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 8
11468; GFX10-SCRATCH-NEXT:    v_readlane_b32 s11, v40, 7
11469; GFX10-SCRATCH-NEXT:    v_readlane_b32 s10, v40, 6
11470; GFX10-SCRATCH-NEXT:    v_readlane_b32 s9, v40, 5
11471; GFX10-SCRATCH-NEXT:    v_readlane_b32 s8, v40, 4
11472; GFX10-SCRATCH-NEXT:    v_readlane_b32 s7, v40, 3
11473; GFX10-SCRATCH-NEXT:    v_readlane_b32 s6, v40, 2
11474; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
11475; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
11476; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
11477; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 10
11478; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
11479; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
11480; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
11481; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
11482; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
11483; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
11484  %ptr = load <8 x i32> addrspace(4)*, <8 x i32> addrspace(4)* addrspace(4)* undef
11485  %val = load <8 x i32>, <8 x i32> addrspace(4)* %ptr
11486  call amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> inreg %val)
11487  ret void
11488}
11489
11490define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 {
11491; GFX9-LABEL: test_call_external_void_func_v8i32_imm_inreg:
11492; GFX9:       ; %bb.0:
11493; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11494; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
11495; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
11496; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
11497; GFX9-NEXT:    v_writelane_b32 v40, s33, 10
11498; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
11499; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
11500; GFX9-NEXT:    v_writelane_b32 v40, s6, 2
11501; GFX9-NEXT:    v_writelane_b32 v40, s7, 3
11502; GFX9-NEXT:    v_writelane_b32 v40, s8, 4
11503; GFX9-NEXT:    v_writelane_b32 v40, s9, 5
11504; GFX9-NEXT:    v_writelane_b32 v40, s10, 6
11505; GFX9-NEXT:    v_writelane_b32 v40, s11, 7
11506; GFX9-NEXT:    s_mov_b32 s33, s32
11507; GFX9-NEXT:    s_addk_i32 s32, 0x400
11508; GFX9-NEXT:    v_writelane_b32 v40, s30, 8
11509; GFX9-NEXT:    s_mov_b32 s4, 1
11510; GFX9-NEXT:    s_mov_b32 s5, 2
11511; GFX9-NEXT:    s_mov_b32 s6, 3
11512; GFX9-NEXT:    s_mov_b32 s7, 4
11513; GFX9-NEXT:    s_mov_b32 s8, 5
11514; GFX9-NEXT:    s_mov_b32 s9, 6
11515; GFX9-NEXT:    s_mov_b32 s10, 7
11516; GFX9-NEXT:    s_mov_b32 s11, 8
11517; GFX9-NEXT:    v_writelane_b32 v40, s31, 9
11518; GFX9-NEXT:    s_getpc_b64 s[34:35]
11519; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4
11520; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12
11521; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
11522; GFX9-NEXT:    v_readlane_b32 s31, v40, 9
11523; GFX9-NEXT:    v_readlane_b32 s30, v40, 8
11524; GFX9-NEXT:    v_readlane_b32 s11, v40, 7
11525; GFX9-NEXT:    v_readlane_b32 s10, v40, 6
11526; GFX9-NEXT:    v_readlane_b32 s9, v40, 5
11527; GFX9-NEXT:    v_readlane_b32 s8, v40, 4
11528; GFX9-NEXT:    v_readlane_b32 s7, v40, 3
11529; GFX9-NEXT:    v_readlane_b32 s6, v40, 2
11530; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
11531; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
11532; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
11533; GFX9-NEXT:    v_readlane_b32 s33, v40, 10
11534; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
11535; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
11536; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
11537; GFX9-NEXT:    s_waitcnt vmcnt(0)
11538; GFX9-NEXT:    s_setpc_b64 s[30:31]
11539;
11540; GFX10-LABEL: test_call_external_void_func_v8i32_imm_inreg:
11541; GFX10:       ; %bb.0:
11542; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11543; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
11544; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
11545; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
11546; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
11547; GFX10-NEXT:    s_mov_b32 exec_lo, s34
11548; GFX10-NEXT:    v_writelane_b32 v40, s33, 10
11549; GFX10-NEXT:    s_mov_b32 s33, s32
11550; GFX10-NEXT:    s_addk_i32 s32, 0x200
11551; GFX10-NEXT:    s_getpc_b64 s[34:35]
11552; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4
11553; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12
11554; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
11555; GFX10-NEXT:    s_mov_b32 s4, 1
11556; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
11557; GFX10-NEXT:    s_mov_b32 s5, 2
11558; GFX10-NEXT:    v_writelane_b32 v40, s6, 2
11559; GFX10-NEXT:    s_mov_b32 s6, 3
11560; GFX10-NEXT:    v_writelane_b32 v40, s7, 3
11561; GFX10-NEXT:    s_mov_b32 s7, 4
11562; GFX10-NEXT:    v_writelane_b32 v40, s8, 4
11563; GFX10-NEXT:    s_mov_b32 s8, 5
11564; GFX10-NEXT:    v_writelane_b32 v40, s9, 5
11565; GFX10-NEXT:    s_mov_b32 s9, 6
11566; GFX10-NEXT:    v_writelane_b32 v40, s10, 6
11567; GFX10-NEXT:    s_mov_b32 s10, 7
11568; GFX10-NEXT:    v_writelane_b32 v40, s11, 7
11569; GFX10-NEXT:    s_mov_b32 s11, 8
11570; GFX10-NEXT:    v_writelane_b32 v40, s30, 8
11571; GFX10-NEXT:    v_writelane_b32 v40, s31, 9
11572; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
11573; GFX10-NEXT:    v_readlane_b32 s31, v40, 9
11574; GFX10-NEXT:    v_readlane_b32 s30, v40, 8
11575; GFX10-NEXT:    v_readlane_b32 s11, v40, 7
11576; GFX10-NEXT:    v_readlane_b32 s10, v40, 6
11577; GFX10-NEXT:    v_readlane_b32 s9, v40, 5
11578; GFX10-NEXT:    v_readlane_b32 s8, v40, 4
11579; GFX10-NEXT:    v_readlane_b32 s7, v40, 3
11580; GFX10-NEXT:    v_readlane_b32 s6, v40, 2
11581; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
11582; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
11583; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
11584; GFX10-NEXT:    v_readlane_b32 s33, v40, 10
11585; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
11586; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
11587; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
11588; GFX10-NEXT:    s_mov_b32 exec_lo, s34
11589; GFX10-NEXT:    s_waitcnt vmcnt(0)
11590; GFX10-NEXT:    s_setpc_b64 s[30:31]
11591;
11592; GFX11-LABEL: test_call_external_void_func_v8i32_imm_inreg:
11593; GFX11:       ; %bb.0:
11594; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11595; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
11596; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
11597; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
11598; GFX11-NEXT:    s_mov_b32 exec_lo, s0
11599; GFX11-NEXT:    v_writelane_b32 v40, s33, 10
11600; GFX11-NEXT:    s_mov_b32 s33, s32
11601; GFX11-NEXT:    s_add_i32 s32, s32, 16
11602; GFX11-NEXT:    s_getpc_b64 s[0:1]
11603; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v8i32_inreg@rel32@lo+4
11604; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v8i32_inreg@rel32@hi+12
11605; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
11606; GFX11-NEXT:    s_mov_b32 s4, 1
11607; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
11608; GFX11-NEXT:    s_mov_b32 s5, 2
11609; GFX11-NEXT:    v_writelane_b32 v40, s6, 2
11610; GFX11-NEXT:    s_mov_b32 s6, 3
11611; GFX11-NEXT:    v_writelane_b32 v40, s7, 3
11612; GFX11-NEXT:    s_mov_b32 s7, 4
11613; GFX11-NEXT:    v_writelane_b32 v40, s8, 4
11614; GFX11-NEXT:    s_mov_b32 s8, 5
11615; GFX11-NEXT:    v_writelane_b32 v40, s9, 5
11616; GFX11-NEXT:    s_mov_b32 s9, 6
11617; GFX11-NEXT:    v_writelane_b32 v40, s10, 6
11618; GFX11-NEXT:    s_mov_b32 s10, 7
11619; GFX11-NEXT:    v_writelane_b32 v40, s11, 7
11620; GFX11-NEXT:    s_mov_b32 s11, 8
11621; GFX11-NEXT:    v_writelane_b32 v40, s30, 8
11622; GFX11-NEXT:    v_writelane_b32 v40, s31, 9
11623; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
11624; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
11625; GFX11-NEXT:    v_readlane_b32 s31, v40, 9
11626; GFX11-NEXT:    v_readlane_b32 s30, v40, 8
11627; GFX11-NEXT:    v_readlane_b32 s11, v40, 7
11628; GFX11-NEXT:    v_readlane_b32 s10, v40, 6
11629; GFX11-NEXT:    v_readlane_b32 s9, v40, 5
11630; GFX11-NEXT:    v_readlane_b32 s8, v40, 4
11631; GFX11-NEXT:    v_readlane_b32 s7, v40, 3
11632; GFX11-NEXT:    v_readlane_b32 s6, v40, 2
11633; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
11634; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
11635; GFX11-NEXT:    s_add_i32 s32, s32, -16
11636; GFX11-NEXT:    v_readlane_b32 s33, v40, 10
11637; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
11638; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
11639; GFX11-NEXT:    s_mov_b32 exec_lo, s0
11640; GFX11-NEXT:    s_waitcnt vmcnt(0)
11641; GFX11-NEXT:    s_setpc_b64 s[30:31]
11642;
11643; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32_imm_inreg:
11644; GFX10-SCRATCH:       ; %bb.0:
11645; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11646; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
11647; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
11648; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
11649; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
11650; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
11651; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 10
11652; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
11653; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
11654; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
11655; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v8i32_inreg@rel32@lo+4
11656; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v8i32_inreg@rel32@hi+12
11657; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
11658; GFX10-SCRATCH-NEXT:    s_mov_b32 s4, 1
11659; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
11660; GFX10-SCRATCH-NEXT:    s_mov_b32 s5, 2
11661; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s6, 2
11662; GFX10-SCRATCH-NEXT:    s_mov_b32 s6, 3
11663; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s7, 3
11664; GFX10-SCRATCH-NEXT:    s_mov_b32 s7, 4
11665; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s8, 4
11666; GFX10-SCRATCH-NEXT:    s_mov_b32 s8, 5
11667; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s9, 5
11668; GFX10-SCRATCH-NEXT:    s_mov_b32 s9, 6
11669; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s10, 6
11670; GFX10-SCRATCH-NEXT:    s_mov_b32 s10, 7
11671; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s11, 7
11672; GFX10-SCRATCH-NEXT:    s_mov_b32 s11, 8
11673; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 8
11674; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 9
11675; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
11676; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 9
11677; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 8
11678; GFX10-SCRATCH-NEXT:    v_readlane_b32 s11, v40, 7
11679; GFX10-SCRATCH-NEXT:    v_readlane_b32 s10, v40, 6
11680; GFX10-SCRATCH-NEXT:    v_readlane_b32 s9, v40, 5
11681; GFX10-SCRATCH-NEXT:    v_readlane_b32 s8, v40, 4
11682; GFX10-SCRATCH-NEXT:    v_readlane_b32 s7, v40, 3
11683; GFX10-SCRATCH-NEXT:    v_readlane_b32 s6, v40, 2
11684; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
11685; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
11686; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
11687; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 10
11688; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
11689; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
11690; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
11691; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
11692; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
11693; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
11694  call amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> inreg <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>)
11695  ret void
11696}
11697
11698define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 {
11699; GFX9-LABEL: test_call_external_void_func_v16i32_inreg:
11700; GFX9:       ; %bb.0:
11701; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11702; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
11703; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
11704; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
11705; GFX9-NEXT:    v_writelane_b32 v40, s33, 18
11706; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
11707; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
11708; GFX9-NEXT:    v_writelane_b32 v40, s6, 2
11709; GFX9-NEXT:    v_writelane_b32 v40, s7, 3
11710; GFX9-NEXT:    v_writelane_b32 v40, s8, 4
11711; GFX9-NEXT:    v_writelane_b32 v40, s9, 5
11712; GFX9-NEXT:    v_writelane_b32 v40, s10, 6
11713; GFX9-NEXT:    v_writelane_b32 v40, s11, 7
11714; GFX9-NEXT:    v_writelane_b32 v40, s12, 8
11715; GFX9-NEXT:    v_writelane_b32 v40, s13, 9
11716; GFX9-NEXT:    v_writelane_b32 v40, s14, 10
11717; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
11718; GFX9-NEXT:    v_writelane_b32 v40, s15, 11
11719; GFX9-NEXT:    v_writelane_b32 v40, s16, 12
11720; GFX9-NEXT:    v_writelane_b32 v40, s17, 13
11721; GFX9-NEXT:    v_writelane_b32 v40, s18, 14
11722; GFX9-NEXT:    v_writelane_b32 v40, s19, 15
11723; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
11724; GFX9-NEXT:    s_load_dwordx16 s[4:19], s[34:35], 0x0
11725; GFX9-NEXT:    s_mov_b32 s33, s32
11726; GFX9-NEXT:    s_addk_i32 s32, 0x400
11727; GFX9-NEXT:    v_writelane_b32 v40, s30, 16
11728; GFX9-NEXT:    v_writelane_b32 v40, s31, 17
11729; GFX9-NEXT:    s_getpc_b64 s[34:35]
11730; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v16i32_inreg@rel32@lo+4
11731; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v16i32_inreg@rel32@hi+12
11732; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
11733; GFX9-NEXT:    v_readlane_b32 s31, v40, 17
11734; GFX9-NEXT:    v_readlane_b32 s30, v40, 16
11735; GFX9-NEXT:    v_readlane_b32 s19, v40, 15
11736; GFX9-NEXT:    v_readlane_b32 s18, v40, 14
11737; GFX9-NEXT:    v_readlane_b32 s17, v40, 13
11738; GFX9-NEXT:    v_readlane_b32 s16, v40, 12
11739; GFX9-NEXT:    v_readlane_b32 s15, v40, 11
11740; GFX9-NEXT:    v_readlane_b32 s14, v40, 10
11741; GFX9-NEXT:    v_readlane_b32 s13, v40, 9
11742; GFX9-NEXT:    v_readlane_b32 s12, v40, 8
11743; GFX9-NEXT:    v_readlane_b32 s11, v40, 7
11744; GFX9-NEXT:    v_readlane_b32 s10, v40, 6
11745; GFX9-NEXT:    v_readlane_b32 s9, v40, 5
11746; GFX9-NEXT:    v_readlane_b32 s8, v40, 4
11747; GFX9-NEXT:    v_readlane_b32 s7, v40, 3
11748; GFX9-NEXT:    v_readlane_b32 s6, v40, 2
11749; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
11750; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
11751; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
11752; GFX9-NEXT:    v_readlane_b32 s33, v40, 18
11753; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
11754; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
11755; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
11756; GFX9-NEXT:    s_waitcnt vmcnt(0)
11757; GFX9-NEXT:    s_setpc_b64 s[30:31]
11758;
11759; GFX10-LABEL: test_call_external_void_func_v16i32_inreg:
11760; GFX10:       ; %bb.0:
11761; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11762; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
11763; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
11764; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
11765; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
11766; GFX10-NEXT:    s_mov_b32 exec_lo, s34
11767; GFX10-NEXT:    v_writelane_b32 v40, s33, 18
11768; GFX10-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
11769; GFX10-NEXT:    s_mov_b32 s33, s32
11770; GFX10-NEXT:    s_addk_i32 s32, 0x200
11771; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
11772; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
11773; GFX10-NEXT:    v_writelane_b32 v40, s6, 2
11774; GFX10-NEXT:    v_writelane_b32 v40, s7, 3
11775; GFX10-NEXT:    v_writelane_b32 v40, s8, 4
11776; GFX10-NEXT:    v_writelane_b32 v40, s9, 5
11777; GFX10-NEXT:    v_writelane_b32 v40, s10, 6
11778; GFX10-NEXT:    v_writelane_b32 v40, s11, 7
11779; GFX10-NEXT:    v_writelane_b32 v40, s12, 8
11780; GFX10-NEXT:    v_writelane_b32 v40, s13, 9
11781; GFX10-NEXT:    v_writelane_b32 v40, s14, 10
11782; GFX10-NEXT:    v_writelane_b32 v40, s15, 11
11783; GFX10-NEXT:    v_writelane_b32 v40, s16, 12
11784; GFX10-NEXT:    v_writelane_b32 v40, s17, 13
11785; GFX10-NEXT:    v_writelane_b32 v40, s18, 14
11786; GFX10-NEXT:    v_writelane_b32 v40, s19, 15
11787; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
11788; GFX10-NEXT:    s_load_dwordx16 s[4:19], s[34:35], 0x0
11789; GFX10-NEXT:    s_getpc_b64 s[34:35]
11790; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v16i32_inreg@rel32@lo+4
11791; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v16i32_inreg@rel32@hi+12
11792; GFX10-NEXT:    v_writelane_b32 v40, s30, 16
11793; GFX10-NEXT:    v_writelane_b32 v40, s31, 17
11794; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
11795; GFX10-NEXT:    v_readlane_b32 s31, v40, 17
11796; GFX10-NEXT:    v_readlane_b32 s30, v40, 16
11797; GFX10-NEXT:    v_readlane_b32 s19, v40, 15
11798; GFX10-NEXT:    v_readlane_b32 s18, v40, 14
11799; GFX10-NEXT:    v_readlane_b32 s17, v40, 13
11800; GFX10-NEXT:    v_readlane_b32 s16, v40, 12
11801; GFX10-NEXT:    v_readlane_b32 s15, v40, 11
11802; GFX10-NEXT:    v_readlane_b32 s14, v40, 10
11803; GFX10-NEXT:    v_readlane_b32 s13, v40, 9
11804; GFX10-NEXT:    v_readlane_b32 s12, v40, 8
11805; GFX10-NEXT:    v_readlane_b32 s11, v40, 7
11806; GFX10-NEXT:    v_readlane_b32 s10, v40, 6
11807; GFX10-NEXT:    v_readlane_b32 s9, v40, 5
11808; GFX10-NEXT:    v_readlane_b32 s8, v40, 4
11809; GFX10-NEXT:    v_readlane_b32 s7, v40, 3
11810; GFX10-NEXT:    v_readlane_b32 s6, v40, 2
11811; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
11812; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
11813; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
11814; GFX10-NEXT:    v_readlane_b32 s33, v40, 18
11815; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
11816; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
11817; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
11818; GFX10-NEXT:    s_mov_b32 exec_lo, s34
11819; GFX10-NEXT:    s_waitcnt vmcnt(0)
11820; GFX10-NEXT:    s_setpc_b64 s[30:31]
11821;
11822; GFX11-LABEL: test_call_external_void_func_v16i32_inreg:
11823; GFX11:       ; %bb.0:
11824; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11825; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
11826; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
11827; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
11828; GFX11-NEXT:    s_mov_b32 exec_lo, s0
11829; GFX11-NEXT:    v_writelane_b32 v40, s33, 18
11830; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
11831; GFX11-NEXT:    s_mov_b32 s33, s32
11832; GFX11-NEXT:    s_add_i32 s32, s32, 16
11833; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
11834; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
11835; GFX11-NEXT:    v_writelane_b32 v40, s6, 2
11836; GFX11-NEXT:    v_writelane_b32 v40, s7, 3
11837; GFX11-NEXT:    v_writelane_b32 v40, s8, 4
11838; GFX11-NEXT:    v_writelane_b32 v40, s9, 5
11839; GFX11-NEXT:    v_writelane_b32 v40, s10, 6
11840; GFX11-NEXT:    v_writelane_b32 v40, s11, 7
11841; GFX11-NEXT:    v_writelane_b32 v40, s12, 8
11842; GFX11-NEXT:    v_writelane_b32 v40, s13, 9
11843; GFX11-NEXT:    v_writelane_b32 v40, s14, 10
11844; GFX11-NEXT:    v_writelane_b32 v40, s15, 11
11845; GFX11-NEXT:    v_writelane_b32 v40, s16, 12
11846; GFX11-NEXT:    v_writelane_b32 v40, s17, 13
11847; GFX11-NEXT:    v_writelane_b32 v40, s18, 14
11848; GFX11-NEXT:    v_writelane_b32 v40, s19, 15
11849; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
11850; GFX11-NEXT:    s_load_b512 s[4:19], s[0:1], 0x0
11851; GFX11-NEXT:    s_getpc_b64 s[0:1]
11852; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v16i32_inreg@rel32@lo+4
11853; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v16i32_inreg@rel32@hi+12
11854; GFX11-NEXT:    v_writelane_b32 v40, s30, 16
11855; GFX11-NEXT:    v_writelane_b32 v40, s31, 17
11856; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
11857; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
11858; GFX11-NEXT:    v_readlane_b32 s31, v40, 17
11859; GFX11-NEXT:    v_readlane_b32 s30, v40, 16
11860; GFX11-NEXT:    v_readlane_b32 s19, v40, 15
11861; GFX11-NEXT:    v_readlane_b32 s18, v40, 14
11862; GFX11-NEXT:    v_readlane_b32 s17, v40, 13
11863; GFX11-NEXT:    v_readlane_b32 s16, v40, 12
11864; GFX11-NEXT:    v_readlane_b32 s15, v40, 11
11865; GFX11-NEXT:    v_readlane_b32 s14, v40, 10
11866; GFX11-NEXT:    v_readlane_b32 s13, v40, 9
11867; GFX11-NEXT:    v_readlane_b32 s12, v40, 8
11868; GFX11-NEXT:    v_readlane_b32 s11, v40, 7
11869; GFX11-NEXT:    v_readlane_b32 s10, v40, 6
11870; GFX11-NEXT:    v_readlane_b32 s9, v40, 5
11871; GFX11-NEXT:    v_readlane_b32 s8, v40, 4
11872; GFX11-NEXT:    v_readlane_b32 s7, v40, 3
11873; GFX11-NEXT:    v_readlane_b32 s6, v40, 2
11874; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
11875; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
11876; GFX11-NEXT:    s_add_i32 s32, s32, -16
11877; GFX11-NEXT:    v_readlane_b32 s33, v40, 18
11878; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
11879; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
11880; GFX11-NEXT:    s_mov_b32 exec_lo, s0
11881; GFX11-NEXT:    s_waitcnt vmcnt(0)
11882; GFX11-NEXT:    s_setpc_b64 s[30:31]
11883;
11884; GFX10-SCRATCH-LABEL: test_call_external_void_func_v16i32_inreg:
11885; GFX10-SCRATCH:       ; %bb.0:
11886; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11887; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
11888; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
11889; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
11890; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
11891; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
11892; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 18
11893; GFX10-SCRATCH-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
11894; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
11895; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
11896; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
11897; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
11898; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s6, 2
11899; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s7, 3
11900; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s8, 4
11901; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s9, 5
11902; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s10, 6
11903; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s11, 7
11904; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s12, 8
11905; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s13, 9
11906; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s14, 10
11907; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s15, 11
11908; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s16, 12
11909; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s17, 13
11910; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s18, 14
11911; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s19, 15
11912; GFX10-SCRATCH-NEXT:    s_waitcnt lgkmcnt(0)
11913; GFX10-SCRATCH-NEXT:    s_load_dwordx16 s[4:19], s[0:1], 0x0
11914; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
11915; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v16i32_inreg@rel32@lo+4
11916; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v16i32_inreg@rel32@hi+12
11917; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 16
11918; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 17
11919; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
11920; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 17
11921; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 16
11922; GFX10-SCRATCH-NEXT:    v_readlane_b32 s19, v40, 15
11923; GFX10-SCRATCH-NEXT:    v_readlane_b32 s18, v40, 14
11924; GFX10-SCRATCH-NEXT:    v_readlane_b32 s17, v40, 13
11925; GFX10-SCRATCH-NEXT:    v_readlane_b32 s16, v40, 12
11926; GFX10-SCRATCH-NEXT:    v_readlane_b32 s15, v40, 11
11927; GFX10-SCRATCH-NEXT:    v_readlane_b32 s14, v40, 10
11928; GFX10-SCRATCH-NEXT:    v_readlane_b32 s13, v40, 9
11929; GFX10-SCRATCH-NEXT:    v_readlane_b32 s12, v40, 8
11930; GFX10-SCRATCH-NEXT:    v_readlane_b32 s11, v40, 7
11931; GFX10-SCRATCH-NEXT:    v_readlane_b32 s10, v40, 6
11932; GFX10-SCRATCH-NEXT:    v_readlane_b32 s9, v40, 5
11933; GFX10-SCRATCH-NEXT:    v_readlane_b32 s8, v40, 4
11934; GFX10-SCRATCH-NEXT:    v_readlane_b32 s7, v40, 3
11935; GFX10-SCRATCH-NEXT:    v_readlane_b32 s6, v40, 2
11936; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
11937; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
11938; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
11939; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 18
11940; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
11941; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
11942; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
11943; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
11944; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
11945; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
11946  %ptr = load <16 x i32> addrspace(4)*, <16 x i32> addrspace(4)* addrspace(4)* undef
11947  %val = load <16 x i32>, <16 x i32> addrspace(4)* %ptr
11948  call amdgpu_gfx void @external_void_func_v16i32_inreg(<16 x i32> inreg %val)
11949  ret void
11950}
11951
11952define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 {
11953; GFX9-LABEL: test_call_external_void_func_v32i32_inreg:
11954; GFX9:       ; %bb.0:
11955; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11956; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
11957; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
11958; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
11959; GFX9-NEXT:    v_writelane_b32 v40, s33, 28
11960; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
11961; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
11962; GFX9-NEXT:    v_writelane_b32 v40, s6, 2
11963; GFX9-NEXT:    v_writelane_b32 v40, s7, 3
11964; GFX9-NEXT:    v_writelane_b32 v40, s8, 4
11965; GFX9-NEXT:    v_writelane_b32 v40, s9, 5
11966; GFX9-NEXT:    v_writelane_b32 v40, s10, 6
11967; GFX9-NEXT:    v_writelane_b32 v40, s11, 7
11968; GFX9-NEXT:    v_writelane_b32 v40, s12, 8
11969; GFX9-NEXT:    v_writelane_b32 v40, s13, 9
11970; GFX9-NEXT:    v_writelane_b32 v40, s14, 10
11971; GFX9-NEXT:    v_writelane_b32 v40, s15, 11
11972; GFX9-NEXT:    v_writelane_b32 v40, s16, 12
11973; GFX9-NEXT:    v_writelane_b32 v40, s17, 13
11974; GFX9-NEXT:    v_writelane_b32 v40, s18, 14
11975; GFX9-NEXT:    v_writelane_b32 v40, s19, 15
11976; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
11977; GFX9-NEXT:    v_writelane_b32 v40, s20, 16
11978; GFX9-NEXT:    v_writelane_b32 v40, s21, 17
11979; GFX9-NEXT:    v_writelane_b32 v40, s22, 18
11980; GFX9-NEXT:    v_writelane_b32 v40, s23, 19
11981; GFX9-NEXT:    v_writelane_b32 v40, s24, 20
11982; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
11983; GFX9-NEXT:    s_load_dwordx16 s[36:51], s[34:35], 0x40
11984; GFX9-NEXT:    s_load_dwordx16 s[4:19], s[34:35], 0x0
11985; GFX9-NEXT:    v_writelane_b32 v40, s25, 21
11986; GFX9-NEXT:    v_writelane_b32 v40, s26, 22
11987; GFX9-NEXT:    v_writelane_b32 v40, s27, 23
11988; GFX9-NEXT:    s_mov_b32 s33, s32
11989; GFX9-NEXT:    s_addk_i32 s32, 0x400
11990; GFX9-NEXT:    v_writelane_b32 v40, s28, 24
11991; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
11992; GFX9-NEXT:    v_mov_b32_e32 v0, s46
11993; GFX9-NEXT:    v_writelane_b32 v40, s29, 25
11994; GFX9-NEXT:    v_mov_b32_e32 v1, s47
11995; GFX9-NEXT:    v_mov_b32_e32 v2, s48
11996; GFX9-NEXT:    v_mov_b32_e32 v3, s49
11997; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32
11998; GFX9-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4
11999; GFX9-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8
12000; GFX9-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:12
12001; GFX9-NEXT:    v_mov_b32_e32 v0, s50
12002; GFX9-NEXT:    v_writelane_b32 v40, s30, 26
12003; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:16
12004; GFX9-NEXT:    v_mov_b32_e32 v0, s51
12005; GFX9-NEXT:    s_mov_b32 s20, s36
12006; GFX9-NEXT:    s_mov_b32 s21, s37
12007; GFX9-NEXT:    s_mov_b32 s22, s38
12008; GFX9-NEXT:    s_mov_b32 s23, s39
12009; GFX9-NEXT:    s_mov_b32 s24, s40
12010; GFX9-NEXT:    s_mov_b32 s25, s41
12011; GFX9-NEXT:    s_mov_b32 s26, s42
12012; GFX9-NEXT:    s_mov_b32 s27, s43
12013; GFX9-NEXT:    s_mov_b32 s28, s44
12014; GFX9-NEXT:    s_mov_b32 s29, s45
12015; GFX9-NEXT:    v_writelane_b32 v40, s31, 27
12016; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:20
12017; GFX9-NEXT:    s_getpc_b64 s[34:35]
12018; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v32i32_inreg@rel32@lo+4
12019; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v32i32_inreg@rel32@hi+12
12020; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
12021; GFX9-NEXT:    v_readlane_b32 s31, v40, 27
12022; GFX9-NEXT:    v_readlane_b32 s30, v40, 26
12023; GFX9-NEXT:    v_readlane_b32 s29, v40, 25
12024; GFX9-NEXT:    v_readlane_b32 s28, v40, 24
12025; GFX9-NEXT:    v_readlane_b32 s27, v40, 23
12026; GFX9-NEXT:    v_readlane_b32 s26, v40, 22
12027; GFX9-NEXT:    v_readlane_b32 s25, v40, 21
12028; GFX9-NEXT:    v_readlane_b32 s24, v40, 20
12029; GFX9-NEXT:    v_readlane_b32 s23, v40, 19
12030; GFX9-NEXT:    v_readlane_b32 s22, v40, 18
12031; GFX9-NEXT:    v_readlane_b32 s21, v40, 17
12032; GFX9-NEXT:    v_readlane_b32 s20, v40, 16
12033; GFX9-NEXT:    v_readlane_b32 s19, v40, 15
12034; GFX9-NEXT:    v_readlane_b32 s18, v40, 14
12035; GFX9-NEXT:    v_readlane_b32 s17, v40, 13
12036; GFX9-NEXT:    v_readlane_b32 s16, v40, 12
12037; GFX9-NEXT:    v_readlane_b32 s15, v40, 11
12038; GFX9-NEXT:    v_readlane_b32 s14, v40, 10
12039; GFX9-NEXT:    v_readlane_b32 s13, v40, 9
12040; GFX9-NEXT:    v_readlane_b32 s12, v40, 8
12041; GFX9-NEXT:    v_readlane_b32 s11, v40, 7
12042; GFX9-NEXT:    v_readlane_b32 s10, v40, 6
12043; GFX9-NEXT:    v_readlane_b32 s9, v40, 5
12044; GFX9-NEXT:    v_readlane_b32 s8, v40, 4
12045; GFX9-NEXT:    v_readlane_b32 s7, v40, 3
12046; GFX9-NEXT:    v_readlane_b32 s6, v40, 2
12047; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
12048; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
12049; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
12050; GFX9-NEXT:    v_readlane_b32 s33, v40, 28
12051; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
12052; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
12053; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
12054; GFX9-NEXT:    s_waitcnt vmcnt(0)
12055; GFX9-NEXT:    s_setpc_b64 s[30:31]
12056;
12057; GFX10-LABEL: test_call_external_void_func_v32i32_inreg:
12058; GFX10:       ; %bb.0:
12059; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12060; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
12061; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
12062; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
12063; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
12064; GFX10-NEXT:    s_mov_b32 exec_lo, s34
12065; GFX10-NEXT:    v_writelane_b32 v40, s33, 28
12066; GFX10-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
12067; GFX10-NEXT:    s_mov_b32 s33, s32
12068; GFX10-NEXT:    s_addk_i32 s32, 0x200
12069; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
12070; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
12071; GFX10-NEXT:    v_writelane_b32 v40, s6, 2
12072; GFX10-NEXT:    v_writelane_b32 v40, s7, 3
12073; GFX10-NEXT:    v_writelane_b32 v40, s8, 4
12074; GFX10-NEXT:    v_writelane_b32 v40, s9, 5
12075; GFX10-NEXT:    v_writelane_b32 v40, s10, 6
12076; GFX10-NEXT:    v_writelane_b32 v40, s11, 7
12077; GFX10-NEXT:    v_writelane_b32 v40, s12, 8
12078; GFX10-NEXT:    v_writelane_b32 v40, s13, 9
12079; GFX10-NEXT:    v_writelane_b32 v40, s14, 10
12080; GFX10-NEXT:    v_writelane_b32 v40, s15, 11
12081; GFX10-NEXT:    v_writelane_b32 v40, s16, 12
12082; GFX10-NEXT:    v_writelane_b32 v40, s17, 13
12083; GFX10-NEXT:    v_writelane_b32 v40, s18, 14
12084; GFX10-NEXT:    v_writelane_b32 v40, s19, 15
12085; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
12086; GFX10-NEXT:    s_clause 0x1
12087; GFX10-NEXT:    s_load_dwordx16 s[36:51], s[34:35], 0x40
12088; GFX10-NEXT:    s_load_dwordx16 s[4:19], s[34:35], 0x0
12089; GFX10-NEXT:    s_getpc_b64 s[34:35]
12090; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v32i32_inreg@rel32@lo+4
12091; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v32i32_inreg@rel32@hi+12
12092; GFX10-NEXT:    v_writelane_b32 v40, s20, 16
12093; GFX10-NEXT:    v_writelane_b32 v40, s21, 17
12094; GFX10-NEXT:    v_writelane_b32 v40, s22, 18
12095; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
12096; GFX10-NEXT:    v_mov_b32_e32 v0, s46
12097; GFX10-NEXT:    v_writelane_b32 v40, s23, 19
12098; GFX10-NEXT:    v_mov_b32_e32 v1, s47
12099; GFX10-NEXT:    v_mov_b32_e32 v2, s48
12100; GFX10-NEXT:    v_mov_b32_e32 v3, s49
12101; GFX10-NEXT:    s_mov_b32 s20, s36
12102; GFX10-NEXT:    v_writelane_b32 v40, s24, 20
12103; GFX10-NEXT:    s_mov_b32 s21, s37
12104; GFX10-NEXT:    s_mov_b32 s22, s38
12105; GFX10-NEXT:    s_mov_b32 s23, s39
12106; GFX10-NEXT:    s_mov_b32 s24, s40
12107; GFX10-NEXT:    v_writelane_b32 v40, s25, 21
12108; GFX10-NEXT:    s_mov_b32 s25, s41
12109; GFX10-NEXT:    v_mov_b32_e32 v4, s50
12110; GFX10-NEXT:    v_mov_b32_e32 v5, s51
12111; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32
12112; GFX10-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4
12113; GFX10-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8
12114; GFX10-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:12
12115; GFX10-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:16
12116; GFX10-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:20
12117; GFX10-NEXT:    v_writelane_b32 v40, s26, 22
12118; GFX10-NEXT:    s_mov_b32 s26, s42
12119; GFX10-NEXT:    v_writelane_b32 v40, s27, 23
12120; GFX10-NEXT:    s_mov_b32 s27, s43
12121; GFX10-NEXT:    v_writelane_b32 v40, s28, 24
12122; GFX10-NEXT:    s_mov_b32 s28, s44
12123; GFX10-NEXT:    v_writelane_b32 v40, s29, 25
12124; GFX10-NEXT:    s_mov_b32 s29, s45
12125; GFX10-NEXT:    v_writelane_b32 v40, s30, 26
12126; GFX10-NEXT:    v_writelane_b32 v40, s31, 27
12127; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
12128; GFX10-NEXT:    v_readlane_b32 s31, v40, 27
12129; GFX10-NEXT:    v_readlane_b32 s30, v40, 26
12130; GFX10-NEXT:    v_readlane_b32 s29, v40, 25
12131; GFX10-NEXT:    v_readlane_b32 s28, v40, 24
12132; GFX10-NEXT:    v_readlane_b32 s27, v40, 23
12133; GFX10-NEXT:    v_readlane_b32 s26, v40, 22
12134; GFX10-NEXT:    v_readlane_b32 s25, v40, 21
12135; GFX10-NEXT:    v_readlane_b32 s24, v40, 20
12136; GFX10-NEXT:    v_readlane_b32 s23, v40, 19
12137; GFX10-NEXT:    v_readlane_b32 s22, v40, 18
12138; GFX10-NEXT:    v_readlane_b32 s21, v40, 17
12139; GFX10-NEXT:    v_readlane_b32 s20, v40, 16
12140; GFX10-NEXT:    v_readlane_b32 s19, v40, 15
12141; GFX10-NEXT:    v_readlane_b32 s18, v40, 14
12142; GFX10-NEXT:    v_readlane_b32 s17, v40, 13
12143; GFX10-NEXT:    v_readlane_b32 s16, v40, 12
12144; GFX10-NEXT:    v_readlane_b32 s15, v40, 11
12145; GFX10-NEXT:    v_readlane_b32 s14, v40, 10
12146; GFX10-NEXT:    v_readlane_b32 s13, v40, 9
12147; GFX10-NEXT:    v_readlane_b32 s12, v40, 8
12148; GFX10-NEXT:    v_readlane_b32 s11, v40, 7
12149; GFX10-NEXT:    v_readlane_b32 s10, v40, 6
12150; GFX10-NEXT:    v_readlane_b32 s9, v40, 5
12151; GFX10-NEXT:    v_readlane_b32 s8, v40, 4
12152; GFX10-NEXT:    v_readlane_b32 s7, v40, 3
12153; GFX10-NEXT:    v_readlane_b32 s6, v40, 2
12154; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
12155; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
12156; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
12157; GFX10-NEXT:    v_readlane_b32 s33, v40, 28
12158; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
12159; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
12160; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
12161; GFX10-NEXT:    s_mov_b32 exec_lo, s34
12162; GFX10-NEXT:    s_waitcnt vmcnt(0)
12163; GFX10-NEXT:    s_setpc_b64 s[30:31]
12164;
12165; GFX11-LABEL: test_call_external_void_func_v32i32_inreg:
12166; GFX11:       ; %bb.0:
12167; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12168; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
12169; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
12170; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
12171; GFX11-NEXT:    s_mov_b32 exec_lo, s0
12172; GFX11-NEXT:    v_writelane_b32 v40, s33, 28
12173; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
12174; GFX11-NEXT:    s_mov_b32 s33, s32
12175; GFX11-NEXT:    s_add_i32 s32, s32, 16
12176; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
12177; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
12178; GFX11-NEXT:    v_writelane_b32 v40, s6, 2
12179; GFX11-NEXT:    v_writelane_b32 v40, s7, 3
12180; GFX11-NEXT:    v_writelane_b32 v40, s8, 4
12181; GFX11-NEXT:    v_writelane_b32 v40, s9, 5
12182; GFX11-NEXT:    v_writelane_b32 v40, s10, 6
12183; GFX11-NEXT:    v_writelane_b32 v40, s11, 7
12184; GFX11-NEXT:    v_writelane_b32 v40, s12, 8
12185; GFX11-NEXT:    v_writelane_b32 v40, s13, 9
12186; GFX11-NEXT:    v_writelane_b32 v40, s14, 10
12187; GFX11-NEXT:    v_writelane_b32 v40, s15, 11
12188; GFX11-NEXT:    v_writelane_b32 v40, s16, 12
12189; GFX11-NEXT:    v_writelane_b32 v40, s17, 13
12190; GFX11-NEXT:    v_writelane_b32 v40, s18, 14
12191; GFX11-NEXT:    v_writelane_b32 v40, s19, 15
12192; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
12193; GFX11-NEXT:    s_clause 0x1
12194; GFX11-NEXT:    s_load_b512 s[36:51], s[0:1], 0x40
12195; GFX11-NEXT:    s_load_b512 s[4:19], s[0:1], 0x0
12196; GFX11-NEXT:    s_getpc_b64 s[0:1]
12197; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v32i32_inreg@rel32@lo+4
12198; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v32i32_inreg@rel32@hi+12
12199; GFX11-NEXT:    v_writelane_b32 v40, s20, 16
12200; GFX11-NEXT:    v_writelane_b32 v40, s21, 17
12201; GFX11-NEXT:    v_writelane_b32 v40, s22, 18
12202; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
12203; GFX11-NEXT:    v_dual_mov_b32 v4, s50 :: v_dual_mov_b32 v5, s51
12204; GFX11-NEXT:    v_writelane_b32 v40, s23, 19
12205; GFX11-NEXT:    v_dual_mov_b32 v0, s46 :: v_dual_mov_b32 v1, s47
12206; GFX11-NEXT:    v_dual_mov_b32 v2, s48 :: v_dual_mov_b32 v3, s49
12207; GFX11-NEXT:    v_writelane_b32 v40, s24, 20
12208; GFX11-NEXT:    s_mov_b32 s20, s36
12209; GFX11-NEXT:    s_mov_b32 s21, s37
12210; GFX11-NEXT:    s_mov_b32 s22, s38
12211; GFX11-NEXT:    s_mov_b32 s23, s39
12212; GFX11-NEXT:    v_writelane_b32 v40, s25, 21
12213; GFX11-NEXT:    s_mov_b32 s24, s40
12214; GFX11-NEXT:    s_mov_b32 s25, s41
12215; GFX11-NEXT:    s_clause 0x1
12216; GFX11-NEXT:    scratch_store_b64 off, v[4:5], s32 offset:16
12217; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s32
12218; GFX11-NEXT:    v_writelane_b32 v40, s26, 22
12219; GFX11-NEXT:    s_mov_b32 s26, s42
12220; GFX11-NEXT:    v_writelane_b32 v40, s27, 23
12221; GFX11-NEXT:    s_mov_b32 s27, s43
12222; GFX11-NEXT:    v_writelane_b32 v40, s28, 24
12223; GFX11-NEXT:    s_mov_b32 s28, s44
12224; GFX11-NEXT:    v_writelane_b32 v40, s29, 25
12225; GFX11-NEXT:    s_mov_b32 s29, s45
12226; GFX11-NEXT:    v_writelane_b32 v40, s30, 26
12227; GFX11-NEXT:    v_writelane_b32 v40, s31, 27
12228; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
12229; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
12230; GFX11-NEXT:    v_readlane_b32 s31, v40, 27
12231; GFX11-NEXT:    v_readlane_b32 s30, v40, 26
12232; GFX11-NEXT:    v_readlane_b32 s29, v40, 25
12233; GFX11-NEXT:    v_readlane_b32 s28, v40, 24
12234; GFX11-NEXT:    v_readlane_b32 s27, v40, 23
12235; GFX11-NEXT:    v_readlane_b32 s26, v40, 22
12236; GFX11-NEXT:    v_readlane_b32 s25, v40, 21
12237; GFX11-NEXT:    v_readlane_b32 s24, v40, 20
12238; GFX11-NEXT:    v_readlane_b32 s23, v40, 19
12239; GFX11-NEXT:    v_readlane_b32 s22, v40, 18
12240; GFX11-NEXT:    v_readlane_b32 s21, v40, 17
12241; GFX11-NEXT:    v_readlane_b32 s20, v40, 16
12242; GFX11-NEXT:    v_readlane_b32 s19, v40, 15
12243; GFX11-NEXT:    v_readlane_b32 s18, v40, 14
12244; GFX11-NEXT:    v_readlane_b32 s17, v40, 13
12245; GFX11-NEXT:    v_readlane_b32 s16, v40, 12
12246; GFX11-NEXT:    v_readlane_b32 s15, v40, 11
12247; GFX11-NEXT:    v_readlane_b32 s14, v40, 10
12248; GFX11-NEXT:    v_readlane_b32 s13, v40, 9
12249; GFX11-NEXT:    v_readlane_b32 s12, v40, 8
12250; GFX11-NEXT:    v_readlane_b32 s11, v40, 7
12251; GFX11-NEXT:    v_readlane_b32 s10, v40, 6
12252; GFX11-NEXT:    v_readlane_b32 s9, v40, 5
12253; GFX11-NEXT:    v_readlane_b32 s8, v40, 4
12254; GFX11-NEXT:    v_readlane_b32 s7, v40, 3
12255; GFX11-NEXT:    v_readlane_b32 s6, v40, 2
12256; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
12257; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
12258; GFX11-NEXT:    s_add_i32 s32, s32, -16
12259; GFX11-NEXT:    v_readlane_b32 s33, v40, 28
12260; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
12261; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
12262; GFX11-NEXT:    s_mov_b32 exec_lo, s0
12263; GFX11-NEXT:    s_waitcnt vmcnt(0)
12264; GFX11-NEXT:    s_setpc_b64 s[30:31]
12265;
12266; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32_inreg:
12267; GFX10-SCRATCH:       ; %bb.0:
12268; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12269; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
12270; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
12271; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
12272; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
12273; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
12274; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 28
12275; GFX10-SCRATCH-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
12276; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
12277; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
12278; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
12279; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
12280; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s6, 2
12281; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s7, 3
12282; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s8, 4
12283; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s9, 5
12284; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s10, 6
12285; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s11, 7
12286; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s12, 8
12287; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s13, 9
12288; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s14, 10
12289; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s15, 11
12290; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s16, 12
12291; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s17, 13
12292; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s18, 14
12293; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s19, 15
12294; GFX10-SCRATCH-NEXT:    s_waitcnt lgkmcnt(0)
12295; GFX10-SCRATCH-NEXT:    s_clause 0x1
12296; GFX10-SCRATCH-NEXT:    s_load_dwordx16 s[36:51], s[0:1], 0x40
12297; GFX10-SCRATCH-NEXT:    s_load_dwordx16 s[4:19], s[0:1], 0x0
12298; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
12299; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v32i32_inreg@rel32@lo+4
12300; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v32i32_inreg@rel32@hi+12
12301; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s20, 16
12302; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s21, 17
12303; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s22, 18
12304; GFX10-SCRATCH-NEXT:    s_waitcnt lgkmcnt(0)
12305; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v4, s50
12306; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s23, 19
12307; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v5, s51
12308; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, s46
12309; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, s47
12310; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v2, s48
12311; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s24, 20
12312; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v3, s49
12313; GFX10-SCRATCH-NEXT:    s_mov_b32 s20, s36
12314; GFX10-SCRATCH-NEXT:    s_mov_b32 s21, s37
12315; GFX10-SCRATCH-NEXT:    s_mov_b32 s22, s38
12316; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s25, 21
12317; GFX10-SCRATCH-NEXT:    s_mov_b32 s23, s39
12318; GFX10-SCRATCH-NEXT:    s_mov_b32 s24, s40
12319; GFX10-SCRATCH-NEXT:    s_mov_b32 s25, s41
12320; GFX10-SCRATCH-NEXT:    scratch_store_dwordx2 off, v[4:5], s32 offset:16
12321; GFX10-SCRATCH-NEXT:    scratch_store_dwordx4 off, v[0:3], s32
12322; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s26, 22
12323; GFX10-SCRATCH-NEXT:    s_mov_b32 s26, s42
12324; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s27, 23
12325; GFX10-SCRATCH-NEXT:    s_mov_b32 s27, s43
12326; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s28, 24
12327; GFX10-SCRATCH-NEXT:    s_mov_b32 s28, s44
12328; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s29, 25
12329; GFX10-SCRATCH-NEXT:    s_mov_b32 s29, s45
12330; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 26
12331; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 27
12332; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
12333; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 27
12334; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 26
12335; GFX10-SCRATCH-NEXT:    v_readlane_b32 s29, v40, 25
12336; GFX10-SCRATCH-NEXT:    v_readlane_b32 s28, v40, 24
12337; GFX10-SCRATCH-NEXT:    v_readlane_b32 s27, v40, 23
12338; GFX10-SCRATCH-NEXT:    v_readlane_b32 s26, v40, 22
12339; GFX10-SCRATCH-NEXT:    v_readlane_b32 s25, v40, 21
12340; GFX10-SCRATCH-NEXT:    v_readlane_b32 s24, v40, 20
12341; GFX10-SCRATCH-NEXT:    v_readlane_b32 s23, v40, 19
12342; GFX10-SCRATCH-NEXT:    v_readlane_b32 s22, v40, 18
12343; GFX10-SCRATCH-NEXT:    v_readlane_b32 s21, v40, 17
12344; GFX10-SCRATCH-NEXT:    v_readlane_b32 s20, v40, 16
12345; GFX10-SCRATCH-NEXT:    v_readlane_b32 s19, v40, 15
12346; GFX10-SCRATCH-NEXT:    v_readlane_b32 s18, v40, 14
12347; GFX10-SCRATCH-NEXT:    v_readlane_b32 s17, v40, 13
12348; GFX10-SCRATCH-NEXT:    v_readlane_b32 s16, v40, 12
12349; GFX10-SCRATCH-NEXT:    v_readlane_b32 s15, v40, 11
12350; GFX10-SCRATCH-NEXT:    v_readlane_b32 s14, v40, 10
12351; GFX10-SCRATCH-NEXT:    v_readlane_b32 s13, v40, 9
12352; GFX10-SCRATCH-NEXT:    v_readlane_b32 s12, v40, 8
12353; GFX10-SCRATCH-NEXT:    v_readlane_b32 s11, v40, 7
12354; GFX10-SCRATCH-NEXT:    v_readlane_b32 s10, v40, 6
12355; GFX10-SCRATCH-NEXT:    v_readlane_b32 s9, v40, 5
12356; GFX10-SCRATCH-NEXT:    v_readlane_b32 s8, v40, 4
12357; GFX10-SCRATCH-NEXT:    v_readlane_b32 s7, v40, 3
12358; GFX10-SCRATCH-NEXT:    v_readlane_b32 s6, v40, 2
12359; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
12360; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
12361; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
12362; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 28
12363; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
12364; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
12365; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
12366; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
12367; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
12368; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
12369  %ptr = load <32 x i32> addrspace(4)*, <32 x i32> addrspace(4)* addrspace(4)* undef
12370  %val = load <32 x i32>, <32 x i32> addrspace(4)* %ptr
12371  call amdgpu_gfx void @external_void_func_v32i32_inreg(<32 x i32> inreg %val)
12372  ret void
12373}
12374
12375define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 {
12376; GFX9-LABEL: test_call_external_void_func_v32i32_i32_inreg:
12377; GFX9:       ; %bb.0:
12378; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12379; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
12380; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
12381; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
12382; GFX9-NEXT:    v_writelane_b32 v40, s33, 28
12383; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
12384; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
12385; GFX9-NEXT:    v_writelane_b32 v40, s6, 2
12386; GFX9-NEXT:    v_writelane_b32 v40, s7, 3
12387; GFX9-NEXT:    v_writelane_b32 v40, s8, 4
12388; GFX9-NEXT:    v_writelane_b32 v40, s9, 5
12389; GFX9-NEXT:    v_writelane_b32 v40, s10, 6
12390; GFX9-NEXT:    v_writelane_b32 v40, s11, 7
12391; GFX9-NEXT:    v_writelane_b32 v40, s12, 8
12392; GFX9-NEXT:    v_writelane_b32 v40, s13, 9
12393; GFX9-NEXT:    v_writelane_b32 v40, s14, 10
12394; GFX9-NEXT:    v_writelane_b32 v40, s15, 11
12395; GFX9-NEXT:    v_writelane_b32 v40, s16, 12
12396; GFX9-NEXT:    v_writelane_b32 v40, s17, 13
12397; GFX9-NEXT:    v_writelane_b32 v40, s18, 14
12398; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
12399; GFX9-NEXT:    v_writelane_b32 v40, s19, 15
12400; GFX9-NEXT:    v_writelane_b32 v40, s20, 16
12401; GFX9-NEXT:    v_writelane_b32 v40, s21, 17
12402; GFX9-NEXT:    v_writelane_b32 v40, s22, 18
12403; GFX9-NEXT:    v_writelane_b32 v40, s23, 19
12404; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
12405; GFX9-NEXT:    s_load_dword s52, s[34:35], 0x0
12406; GFX9-NEXT:    ; kill: killed $sgpr34_sgpr35
12407; GFX9-NEXT:    ; kill: killed $sgpr34_sgpr35
12408; GFX9-NEXT:    s_load_dwordx16 s[36:51], s[34:35], 0x40
12409; GFX9-NEXT:    s_load_dwordx16 s[4:19], s[34:35], 0x0
12410; GFX9-NEXT:    v_writelane_b32 v40, s24, 20
12411; GFX9-NEXT:    v_writelane_b32 v40, s25, 21
12412; GFX9-NEXT:    s_mov_b32 s33, s32
12413; GFX9-NEXT:    s_addk_i32 s32, 0x400
12414; GFX9-NEXT:    v_writelane_b32 v40, s26, 22
12415; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
12416; GFX9-NEXT:    v_mov_b32_e32 v0, s52
12417; GFX9-NEXT:    v_writelane_b32 v40, s27, 23
12418; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:24
12419; GFX9-NEXT:    v_mov_b32_e32 v0, s46
12420; GFX9-NEXT:    v_writelane_b32 v40, s28, 24
12421; GFX9-NEXT:    v_mov_b32_e32 v1, s47
12422; GFX9-NEXT:    v_mov_b32_e32 v2, s48
12423; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32
12424; GFX9-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4
12425; GFX9-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8
12426; GFX9-NEXT:    v_mov_b32_e32 v0, s49
12427; GFX9-NEXT:    v_writelane_b32 v40, s29, 25
12428; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12
12429; GFX9-NEXT:    v_mov_b32_e32 v0, s50
12430; GFX9-NEXT:    v_writelane_b32 v40, s30, 26
12431; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:16
12432; GFX9-NEXT:    v_mov_b32_e32 v0, s51
12433; GFX9-NEXT:    s_mov_b32 s20, s36
12434; GFX9-NEXT:    s_mov_b32 s21, s37
12435; GFX9-NEXT:    s_mov_b32 s22, s38
12436; GFX9-NEXT:    s_mov_b32 s23, s39
12437; GFX9-NEXT:    s_mov_b32 s24, s40
12438; GFX9-NEXT:    s_mov_b32 s25, s41
12439; GFX9-NEXT:    s_mov_b32 s26, s42
12440; GFX9-NEXT:    s_mov_b32 s27, s43
12441; GFX9-NEXT:    s_mov_b32 s28, s44
12442; GFX9-NEXT:    s_mov_b32 s29, s45
12443; GFX9-NEXT:    v_writelane_b32 v40, s31, 27
12444; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:20
12445; GFX9-NEXT:    s_getpc_b64 s[34:35]
12446; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_v32i32_i32_inreg@rel32@lo+4
12447; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_v32i32_i32_inreg@rel32@hi+12
12448; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
12449; GFX9-NEXT:    v_readlane_b32 s31, v40, 27
12450; GFX9-NEXT:    v_readlane_b32 s30, v40, 26
12451; GFX9-NEXT:    v_readlane_b32 s29, v40, 25
12452; GFX9-NEXT:    v_readlane_b32 s28, v40, 24
12453; GFX9-NEXT:    v_readlane_b32 s27, v40, 23
12454; GFX9-NEXT:    v_readlane_b32 s26, v40, 22
12455; GFX9-NEXT:    v_readlane_b32 s25, v40, 21
12456; GFX9-NEXT:    v_readlane_b32 s24, v40, 20
12457; GFX9-NEXT:    v_readlane_b32 s23, v40, 19
12458; GFX9-NEXT:    v_readlane_b32 s22, v40, 18
12459; GFX9-NEXT:    v_readlane_b32 s21, v40, 17
12460; GFX9-NEXT:    v_readlane_b32 s20, v40, 16
12461; GFX9-NEXT:    v_readlane_b32 s19, v40, 15
12462; GFX9-NEXT:    v_readlane_b32 s18, v40, 14
12463; GFX9-NEXT:    v_readlane_b32 s17, v40, 13
12464; GFX9-NEXT:    v_readlane_b32 s16, v40, 12
12465; GFX9-NEXT:    v_readlane_b32 s15, v40, 11
12466; GFX9-NEXT:    v_readlane_b32 s14, v40, 10
12467; GFX9-NEXT:    v_readlane_b32 s13, v40, 9
12468; GFX9-NEXT:    v_readlane_b32 s12, v40, 8
12469; GFX9-NEXT:    v_readlane_b32 s11, v40, 7
12470; GFX9-NEXT:    v_readlane_b32 s10, v40, 6
12471; GFX9-NEXT:    v_readlane_b32 s9, v40, 5
12472; GFX9-NEXT:    v_readlane_b32 s8, v40, 4
12473; GFX9-NEXT:    v_readlane_b32 s7, v40, 3
12474; GFX9-NEXT:    v_readlane_b32 s6, v40, 2
12475; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
12476; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
12477; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
12478; GFX9-NEXT:    v_readlane_b32 s33, v40, 28
12479; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
12480; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
12481; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
12482; GFX9-NEXT:    s_waitcnt vmcnt(0)
12483; GFX9-NEXT:    s_setpc_b64 s[30:31]
12484;
12485; GFX10-LABEL: test_call_external_void_func_v32i32_i32_inreg:
12486; GFX10:       ; %bb.0:
12487; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12488; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
12489; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
12490; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
12491; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
12492; GFX10-NEXT:    s_mov_b32 exec_lo, s34
12493; GFX10-NEXT:    v_writelane_b32 v40, s33, 28
12494; GFX10-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
12495; GFX10-NEXT:    s_mov_b32 s33, s32
12496; GFX10-NEXT:    s_addk_i32 s32, 0x200
12497; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
12498; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
12499; GFX10-NEXT:    v_writelane_b32 v40, s6, 2
12500; GFX10-NEXT:    v_writelane_b32 v40, s7, 3
12501; GFX10-NEXT:    v_writelane_b32 v40, s8, 4
12502; GFX10-NEXT:    v_writelane_b32 v40, s9, 5
12503; GFX10-NEXT:    v_writelane_b32 v40, s10, 6
12504; GFX10-NEXT:    v_writelane_b32 v40, s11, 7
12505; GFX10-NEXT:    v_writelane_b32 v40, s12, 8
12506; GFX10-NEXT:    v_writelane_b32 v40, s13, 9
12507; GFX10-NEXT:    v_writelane_b32 v40, s14, 10
12508; GFX10-NEXT:    v_writelane_b32 v40, s15, 11
12509; GFX10-NEXT:    v_writelane_b32 v40, s16, 12
12510; GFX10-NEXT:    v_writelane_b32 v40, s17, 13
12511; GFX10-NEXT:    v_writelane_b32 v40, s18, 14
12512; GFX10-NEXT:    v_writelane_b32 v40, s19, 15
12513; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
12514; GFX10-NEXT:    s_clause 0x2
12515; GFX10-NEXT:    s_load_dword s52, s[34:35], 0x0
12516; GFX10-NEXT:    ; meta instruction
12517; GFX10-NEXT:    ; meta instruction
12518; GFX10-NEXT:    s_load_dwordx16 s[36:51], s[34:35], 0x40
12519; GFX10-NEXT:    s_load_dwordx16 s[4:19], s[34:35], 0x0
12520; GFX10-NEXT:    s_getpc_b64 s[34:35]
12521; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_v32i32_i32_inreg@rel32@lo+4
12522; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_v32i32_i32_inreg@rel32@hi+12
12523; GFX10-NEXT:    v_writelane_b32 v40, s20, 16
12524; GFX10-NEXT:    v_writelane_b32 v40, s21, 17
12525; GFX10-NEXT:    v_writelane_b32 v40, s22, 18
12526; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
12527; GFX10-NEXT:    v_mov_b32_e32 v0, s52
12528; GFX10-NEXT:    v_mov_b32_e32 v1, s47
12529; GFX10-NEXT:    v_writelane_b32 v40, s23, 19
12530; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:24
12531; GFX10-NEXT:    v_mov_b32_e32 v0, s46
12532; GFX10-NEXT:    v_mov_b32_e32 v2, s48
12533; GFX10-NEXT:    v_mov_b32_e32 v3, s49
12534; GFX10-NEXT:    v_writelane_b32 v40, s24, 20
12535; GFX10-NEXT:    s_mov_b32 s20, s36
12536; GFX10-NEXT:    s_mov_b32 s21, s37
12537; GFX10-NEXT:    s_mov_b32 s22, s38
12538; GFX10-NEXT:    s_mov_b32 s23, s39
12539; GFX10-NEXT:    v_writelane_b32 v40, s25, 21
12540; GFX10-NEXT:    s_mov_b32 s24, s40
12541; GFX10-NEXT:    s_mov_b32 s25, s41
12542; GFX10-NEXT:    v_mov_b32_e32 v4, s50
12543; GFX10-NEXT:    v_mov_b32_e32 v5, s51
12544; GFX10-NEXT:    v_writelane_b32 v40, s26, 22
12545; GFX10-NEXT:    s_mov_b32 s26, s42
12546; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32
12547; GFX10-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4
12548; GFX10-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8
12549; GFX10-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:12
12550; GFX10-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:16
12551; GFX10-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:20
12552; GFX10-NEXT:    v_writelane_b32 v40, s27, 23
12553; GFX10-NEXT:    s_mov_b32 s27, s43
12554; GFX10-NEXT:    v_writelane_b32 v40, s28, 24
12555; GFX10-NEXT:    s_mov_b32 s28, s44
12556; GFX10-NEXT:    v_writelane_b32 v40, s29, 25
12557; GFX10-NEXT:    s_mov_b32 s29, s45
12558; GFX10-NEXT:    v_writelane_b32 v40, s30, 26
12559; GFX10-NEXT:    v_writelane_b32 v40, s31, 27
12560; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
12561; GFX10-NEXT:    v_readlane_b32 s31, v40, 27
12562; GFX10-NEXT:    v_readlane_b32 s30, v40, 26
12563; GFX10-NEXT:    v_readlane_b32 s29, v40, 25
12564; GFX10-NEXT:    v_readlane_b32 s28, v40, 24
12565; GFX10-NEXT:    v_readlane_b32 s27, v40, 23
12566; GFX10-NEXT:    v_readlane_b32 s26, v40, 22
12567; GFX10-NEXT:    v_readlane_b32 s25, v40, 21
12568; GFX10-NEXT:    v_readlane_b32 s24, v40, 20
12569; GFX10-NEXT:    v_readlane_b32 s23, v40, 19
12570; GFX10-NEXT:    v_readlane_b32 s22, v40, 18
12571; GFX10-NEXT:    v_readlane_b32 s21, v40, 17
12572; GFX10-NEXT:    v_readlane_b32 s20, v40, 16
12573; GFX10-NEXT:    v_readlane_b32 s19, v40, 15
12574; GFX10-NEXT:    v_readlane_b32 s18, v40, 14
12575; GFX10-NEXT:    v_readlane_b32 s17, v40, 13
12576; GFX10-NEXT:    v_readlane_b32 s16, v40, 12
12577; GFX10-NEXT:    v_readlane_b32 s15, v40, 11
12578; GFX10-NEXT:    v_readlane_b32 s14, v40, 10
12579; GFX10-NEXT:    v_readlane_b32 s13, v40, 9
12580; GFX10-NEXT:    v_readlane_b32 s12, v40, 8
12581; GFX10-NEXT:    v_readlane_b32 s11, v40, 7
12582; GFX10-NEXT:    v_readlane_b32 s10, v40, 6
12583; GFX10-NEXT:    v_readlane_b32 s9, v40, 5
12584; GFX10-NEXT:    v_readlane_b32 s8, v40, 4
12585; GFX10-NEXT:    v_readlane_b32 s7, v40, 3
12586; GFX10-NEXT:    v_readlane_b32 s6, v40, 2
12587; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
12588; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
12589; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
12590; GFX10-NEXT:    v_readlane_b32 s33, v40, 28
12591; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
12592; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
12593; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
12594; GFX10-NEXT:    s_mov_b32 exec_lo, s34
12595; GFX10-NEXT:    s_waitcnt vmcnt(0)
12596; GFX10-NEXT:    s_setpc_b64 s[30:31]
12597;
12598; GFX11-LABEL: test_call_external_void_func_v32i32_i32_inreg:
12599; GFX11:       ; %bb.0:
12600; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12601; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
12602; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
12603; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
12604; GFX11-NEXT:    s_mov_b32 exec_lo, s0
12605; GFX11-NEXT:    v_writelane_b32 v40, s33, 28
12606; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
12607; GFX11-NEXT:    s_mov_b32 s33, s32
12608; GFX11-NEXT:    s_add_i32 s32, s32, 16
12609; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
12610; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
12611; GFX11-NEXT:    v_writelane_b32 v40, s6, 2
12612; GFX11-NEXT:    v_writelane_b32 v40, s7, 3
12613; GFX11-NEXT:    v_writelane_b32 v40, s8, 4
12614; GFX11-NEXT:    v_writelane_b32 v40, s9, 5
12615; GFX11-NEXT:    v_writelane_b32 v40, s10, 6
12616; GFX11-NEXT:    v_writelane_b32 v40, s11, 7
12617; GFX11-NEXT:    v_writelane_b32 v40, s12, 8
12618; GFX11-NEXT:    v_writelane_b32 v40, s13, 9
12619; GFX11-NEXT:    v_writelane_b32 v40, s14, 10
12620; GFX11-NEXT:    v_writelane_b32 v40, s15, 11
12621; GFX11-NEXT:    v_writelane_b32 v40, s16, 12
12622; GFX11-NEXT:    v_writelane_b32 v40, s17, 13
12623; GFX11-NEXT:    v_writelane_b32 v40, s18, 14
12624; GFX11-NEXT:    v_writelane_b32 v40, s19, 15
12625; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
12626; GFX11-NEXT:    s_clause 0x2
12627; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x0
12628; GFX11-NEXT:    s_load_b512 s[36:51], s[0:1], 0x40
12629; GFX11-NEXT:    s_load_b512 s[4:19], s[0:1], 0x0
12630; GFX11-NEXT:    s_getpc_b64 s[0:1]
12631; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_v32i32_i32_inreg@rel32@lo+4
12632; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_v32i32_i32_inreg@rel32@hi+12
12633; GFX11-NEXT:    v_writelane_b32 v40, s20, 16
12634; GFX11-NEXT:    v_writelane_b32 v40, s21, 17
12635; GFX11-NEXT:    v_writelane_b32 v40, s22, 18
12636; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
12637; GFX11-NEXT:    v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v5, s51
12638; GFX11-NEXT:    v_writelane_b32 v40, s23, 19
12639; GFX11-NEXT:    v_dual_mov_b32 v4, s50 :: v_dual_mov_b32 v1, s47
12640; GFX11-NEXT:    v_dual_mov_b32 v0, s46 :: v_dual_mov_b32 v3, s49
12641; GFX11-NEXT:    v_writelane_b32 v40, s24, 20
12642; GFX11-NEXT:    v_mov_b32_e32 v2, s48
12643; GFX11-NEXT:    s_mov_b32 s20, s36
12644; GFX11-NEXT:    s_mov_b32 s21, s37
12645; GFX11-NEXT:    s_mov_b32 s22, s38
12646; GFX11-NEXT:    v_writelane_b32 v40, s25, 21
12647; GFX11-NEXT:    s_mov_b32 s23, s39
12648; GFX11-NEXT:    s_mov_b32 s24, s40
12649; GFX11-NEXT:    s_mov_b32 s25, s41
12650; GFX11-NEXT:    s_clause 0x2
12651; GFX11-NEXT:    scratch_store_b32 off, v6, s32 offset:24
12652; GFX11-NEXT:    scratch_store_b64 off, v[4:5], s32 offset:16
12653; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s32
12654; GFX11-NEXT:    v_writelane_b32 v40, s26, 22
12655; GFX11-NEXT:    s_mov_b32 s26, s42
12656; GFX11-NEXT:    v_writelane_b32 v40, s27, 23
12657; GFX11-NEXT:    s_mov_b32 s27, s43
12658; GFX11-NEXT:    v_writelane_b32 v40, s28, 24
12659; GFX11-NEXT:    s_mov_b32 s28, s44
12660; GFX11-NEXT:    v_writelane_b32 v40, s29, 25
12661; GFX11-NEXT:    s_mov_b32 s29, s45
12662; GFX11-NEXT:    v_writelane_b32 v40, s30, 26
12663; GFX11-NEXT:    v_writelane_b32 v40, s31, 27
12664; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
12665; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
12666; GFX11-NEXT:    v_readlane_b32 s31, v40, 27
12667; GFX11-NEXT:    v_readlane_b32 s30, v40, 26
12668; GFX11-NEXT:    v_readlane_b32 s29, v40, 25
12669; GFX11-NEXT:    v_readlane_b32 s28, v40, 24
12670; GFX11-NEXT:    v_readlane_b32 s27, v40, 23
12671; GFX11-NEXT:    v_readlane_b32 s26, v40, 22
12672; GFX11-NEXT:    v_readlane_b32 s25, v40, 21
12673; GFX11-NEXT:    v_readlane_b32 s24, v40, 20
12674; GFX11-NEXT:    v_readlane_b32 s23, v40, 19
12675; GFX11-NEXT:    v_readlane_b32 s22, v40, 18
12676; GFX11-NEXT:    v_readlane_b32 s21, v40, 17
12677; GFX11-NEXT:    v_readlane_b32 s20, v40, 16
12678; GFX11-NEXT:    v_readlane_b32 s19, v40, 15
12679; GFX11-NEXT:    v_readlane_b32 s18, v40, 14
12680; GFX11-NEXT:    v_readlane_b32 s17, v40, 13
12681; GFX11-NEXT:    v_readlane_b32 s16, v40, 12
12682; GFX11-NEXT:    v_readlane_b32 s15, v40, 11
12683; GFX11-NEXT:    v_readlane_b32 s14, v40, 10
12684; GFX11-NEXT:    v_readlane_b32 s13, v40, 9
12685; GFX11-NEXT:    v_readlane_b32 s12, v40, 8
12686; GFX11-NEXT:    v_readlane_b32 s11, v40, 7
12687; GFX11-NEXT:    v_readlane_b32 s10, v40, 6
12688; GFX11-NEXT:    v_readlane_b32 s9, v40, 5
12689; GFX11-NEXT:    v_readlane_b32 s8, v40, 4
12690; GFX11-NEXT:    v_readlane_b32 s7, v40, 3
12691; GFX11-NEXT:    v_readlane_b32 s6, v40, 2
12692; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
12693; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
12694; GFX11-NEXT:    s_add_i32 s32, s32, -16
12695; GFX11-NEXT:    v_readlane_b32 s33, v40, 28
12696; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
12697; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
12698; GFX11-NEXT:    s_mov_b32 exec_lo, s0
12699; GFX11-NEXT:    s_waitcnt vmcnt(0)
12700; GFX11-NEXT:    s_setpc_b64 s[30:31]
12701;
12702; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32_i32_inreg:
12703; GFX10-SCRATCH:       ; %bb.0:
12704; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12705; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
12706; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
12707; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
12708; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
12709; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
12710; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 28
12711; GFX10-SCRATCH-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
12712; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
12713; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
12714; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s4, 0
12715; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s5, 1
12716; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s6, 2
12717; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s7, 3
12718; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s8, 4
12719; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s9, 5
12720; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s10, 6
12721; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s11, 7
12722; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s12, 8
12723; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s13, 9
12724; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s14, 10
12725; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s15, 11
12726; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s16, 12
12727; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s17, 13
12728; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s18, 14
12729; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s19, 15
12730; GFX10-SCRATCH-NEXT:    s_waitcnt lgkmcnt(0)
12731; GFX10-SCRATCH-NEXT:    s_clause 0x2
12732; GFX10-SCRATCH-NEXT:    s_load_dword s2, s[0:1], 0x0
12733; GFX10-SCRATCH-NEXT:    ; meta instruction
12734; GFX10-SCRATCH-NEXT:    ; meta instruction
12735; GFX10-SCRATCH-NEXT:    s_load_dwordx16 s[36:51], s[0:1], 0x40
12736; GFX10-SCRATCH-NEXT:    s_load_dwordx16 s[4:19], s[0:1], 0x0
12737; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
12738; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_v32i32_i32_inreg@rel32@lo+4
12739; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_v32i32_i32_inreg@rel32@hi+12
12740; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s20, 16
12741; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s21, 17
12742; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s22, 18
12743; GFX10-SCRATCH-NEXT:    s_waitcnt lgkmcnt(0)
12744; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v6, s2
12745; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v4, s50
12746; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s23, 19
12747; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v5, s51
12748; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, s46
12749; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, s47
12750; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v2, s48
12751; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s24, 20
12752; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v3, s49
12753; GFX10-SCRATCH-NEXT:    s_mov_b32 s20, s36
12754; GFX10-SCRATCH-NEXT:    s_mov_b32 s21, s37
12755; GFX10-SCRATCH-NEXT:    s_mov_b32 s22, s38
12756; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s25, 21
12757; GFX10-SCRATCH-NEXT:    s_mov_b32 s23, s39
12758; GFX10-SCRATCH-NEXT:    s_mov_b32 s24, s40
12759; GFX10-SCRATCH-NEXT:    s_mov_b32 s25, s41
12760; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v6, s32 offset:24
12761; GFX10-SCRATCH-NEXT:    scratch_store_dwordx2 off, v[4:5], s32 offset:16
12762; GFX10-SCRATCH-NEXT:    scratch_store_dwordx4 off, v[0:3], s32
12763; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s26, 22
12764; GFX10-SCRATCH-NEXT:    s_mov_b32 s26, s42
12765; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s27, 23
12766; GFX10-SCRATCH-NEXT:    s_mov_b32 s27, s43
12767; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s28, 24
12768; GFX10-SCRATCH-NEXT:    s_mov_b32 s28, s44
12769; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s29, 25
12770; GFX10-SCRATCH-NEXT:    s_mov_b32 s29, s45
12771; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 26
12772; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 27
12773; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
12774; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 27
12775; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 26
12776; GFX10-SCRATCH-NEXT:    v_readlane_b32 s29, v40, 25
12777; GFX10-SCRATCH-NEXT:    v_readlane_b32 s28, v40, 24
12778; GFX10-SCRATCH-NEXT:    v_readlane_b32 s27, v40, 23
12779; GFX10-SCRATCH-NEXT:    v_readlane_b32 s26, v40, 22
12780; GFX10-SCRATCH-NEXT:    v_readlane_b32 s25, v40, 21
12781; GFX10-SCRATCH-NEXT:    v_readlane_b32 s24, v40, 20
12782; GFX10-SCRATCH-NEXT:    v_readlane_b32 s23, v40, 19
12783; GFX10-SCRATCH-NEXT:    v_readlane_b32 s22, v40, 18
12784; GFX10-SCRATCH-NEXT:    v_readlane_b32 s21, v40, 17
12785; GFX10-SCRATCH-NEXT:    v_readlane_b32 s20, v40, 16
12786; GFX10-SCRATCH-NEXT:    v_readlane_b32 s19, v40, 15
12787; GFX10-SCRATCH-NEXT:    v_readlane_b32 s18, v40, 14
12788; GFX10-SCRATCH-NEXT:    v_readlane_b32 s17, v40, 13
12789; GFX10-SCRATCH-NEXT:    v_readlane_b32 s16, v40, 12
12790; GFX10-SCRATCH-NEXT:    v_readlane_b32 s15, v40, 11
12791; GFX10-SCRATCH-NEXT:    v_readlane_b32 s14, v40, 10
12792; GFX10-SCRATCH-NEXT:    v_readlane_b32 s13, v40, 9
12793; GFX10-SCRATCH-NEXT:    v_readlane_b32 s12, v40, 8
12794; GFX10-SCRATCH-NEXT:    v_readlane_b32 s11, v40, 7
12795; GFX10-SCRATCH-NEXT:    v_readlane_b32 s10, v40, 6
12796; GFX10-SCRATCH-NEXT:    v_readlane_b32 s9, v40, 5
12797; GFX10-SCRATCH-NEXT:    v_readlane_b32 s8, v40, 4
12798; GFX10-SCRATCH-NEXT:    v_readlane_b32 s7, v40, 3
12799; GFX10-SCRATCH-NEXT:    v_readlane_b32 s6, v40, 2
12800; GFX10-SCRATCH-NEXT:    v_readlane_b32 s5, v40, 1
12801; GFX10-SCRATCH-NEXT:    v_readlane_b32 s4, v40, 0
12802; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
12803; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 28
12804; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
12805; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
12806; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
12807; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
12808; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
12809; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
12810  %ptr0 = load <32 x i32> addrspace(4)*, <32 x i32> addrspace(4)* addrspace(4)* undef
12811  %val0 = load <32 x i32>, <32 x i32> addrspace(4)* %ptr0
12812  %val1 = load i32, i32 addrspace(4)* undef
12813  call amdgpu_gfx void @external_void_func_v32i32_i32_inreg(<32 x i32> inreg %val0, i32 inreg %val1)
12814  ret void
12815}
12816
12817define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
12818; GFX9-LABEL: stack_passed_arg_alignment_v32i32_f64:
12819; GFX9:       ; %bb.0: ; %entry
12820; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12821; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
12822; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
12823; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
12824; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
12825; GFX9-NEXT:    s_mov_b32 s33, s32
12826; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s33
12827; GFX9-NEXT:    buffer_load_dword v33, off, s[0:3], s33 offset:4
12828; GFX9-NEXT:    s_addk_i32 s32, 0x400
12829; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
12830; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
12831; GFX9-NEXT:    s_getpc_b64 s[34:35]
12832; GFX9-NEXT:    s_add_u32 s34, s34, stack_passed_f64_arg@rel32@lo+4
12833; GFX9-NEXT:    s_addc_u32 s35, s35, stack_passed_f64_arg@rel32@hi+12
12834; GFX9-NEXT:    s_waitcnt vmcnt(1)
12835; GFX9-NEXT:    buffer_store_dword v32, off, s[0:3], s32
12836; GFX9-NEXT:    s_waitcnt vmcnt(1)
12837; GFX9-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:4
12838; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
12839; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
12840; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
12841; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
12842; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
12843; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
12844; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
12845; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
12846; GFX9-NEXT:    s_waitcnt vmcnt(0)
12847; GFX9-NEXT:    s_setpc_b64 s[30:31]
12848;
12849; GFX10-LABEL: stack_passed_arg_alignment_v32i32_f64:
12850; GFX10:       ; %bb.0: ; %entry
12851; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12852; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
12853; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
12854; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
12855; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
12856; GFX10-NEXT:    s_mov_b32 exec_lo, s34
12857; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
12858; GFX10-NEXT:    s_mov_b32 s33, s32
12859; GFX10-NEXT:    s_clause 0x1
12860; GFX10-NEXT:    buffer_load_dword v32, off, s[0:3], s33
12861; GFX10-NEXT:    buffer_load_dword v33, off, s[0:3], s33 offset:4
12862; GFX10-NEXT:    s_addk_i32 s32, 0x200
12863; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
12864; GFX10-NEXT:    s_getpc_b64 s[34:35]
12865; GFX10-NEXT:    s_add_u32 s34, s34, stack_passed_f64_arg@rel32@lo+4
12866; GFX10-NEXT:    s_addc_u32 s35, s35, stack_passed_f64_arg@rel32@hi+12
12867; GFX10-NEXT:    s_waitcnt vmcnt(1)
12868; GFX10-NEXT:    buffer_store_dword v32, off, s[0:3], s32
12869; GFX10-NEXT:    s_waitcnt vmcnt(0)
12870; GFX10-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:4
12871; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
12872; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
12873; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
12874; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
12875; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
12876; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
12877; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
12878; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
12879; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
12880; GFX10-NEXT:    s_mov_b32 exec_lo, s34
12881; GFX10-NEXT:    s_waitcnt vmcnt(0)
12882; GFX10-NEXT:    s_setpc_b64 s[30:31]
12883;
12884; GFX11-LABEL: stack_passed_arg_alignment_v32i32_f64:
12885; GFX11:       ; %bb.0: ; %entry
12886; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12887; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
12888; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
12889; GFX11-NEXT:    scratch_store_b32 off, v40, s32 offset:8 ; 4-byte Folded Spill
12890; GFX11-NEXT:    s_mov_b32 exec_lo, s0
12891; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
12892; GFX11-NEXT:    s_mov_b32 s33, s32
12893; GFX11-NEXT:    s_add_i32 s32, s32, 16
12894; GFX11-NEXT:    scratch_load_b64 v[32:33], off, s33
12895; GFX11-NEXT:    s_getpc_b64 s[0:1]
12896; GFX11-NEXT:    s_add_u32 s0, s0, stack_passed_f64_arg@rel32@lo+4
12897; GFX11-NEXT:    s_addc_u32 s1, s1, stack_passed_f64_arg@rel32@hi+12
12898; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
12899; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
12900; GFX11-NEXT:    s_waitcnt vmcnt(0)
12901; GFX11-NEXT:    scratch_store_b64 off, v[32:33], s32
12902; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
12903; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
12904; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
12905; GFX11-NEXT:    s_add_i32 s32, s32, -16
12906; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
12907; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
12908; GFX11-NEXT:    scratch_load_b32 v40, off, s32 offset:8 ; 4-byte Folded Reload
12909; GFX11-NEXT:    s_mov_b32 exec_lo, s0
12910; GFX11-NEXT:    s_waitcnt vmcnt(0)
12911; GFX11-NEXT:    s_setpc_b64 s[30:31]
12912;
12913; GFX10-SCRATCH-LABEL: stack_passed_arg_alignment_v32i32_f64:
12914; GFX10-SCRATCH:       ; %bb.0: ; %entry
12915; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12916; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
12917; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
12918; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 offset:8 ; 4-byte Folded Spill
12919; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
12920; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
12921; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
12922; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
12923; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
12924; GFX10-SCRATCH-NEXT:    scratch_load_dwordx2 v[32:33], off, s33
12925; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
12926; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, stack_passed_f64_arg@rel32@lo+4
12927; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, stack_passed_f64_arg@rel32@hi+12
12928; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
12929; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
12930; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
12931; GFX10-SCRATCH-NEXT:    scratch_store_dwordx2 off, v[32:33], s32
12932; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
12933; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
12934; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
12935; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
12936; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
12937; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
12938; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 offset:8 ; 4-byte Folded Reload
12939; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
12940; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
12941; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
12942; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
12943entry:
12944  call amdgpu_gfx void @stack_passed_f64_arg(<32 x i32> %val, double %tmp)
12945  ret void
12946}
12947
12948define amdgpu_gfx void @stack_12xv3i32() #0 {
12949; GFX9-LABEL: stack_12xv3i32:
12950; GFX9:       ; %bb.0: ; %entry
12951; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12952; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
12953; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
12954; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
12955; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
12956; GFX9-NEXT:    s_mov_b32 s33, s32
12957; GFX9-NEXT:    s_addk_i32 s32, 0x400
12958; GFX9-NEXT:    v_mov_b32_e32 v0, 12
12959; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32
12960; GFX9-NEXT:    v_mov_b32_e32 v0, 13
12961; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4
12962; GFX9-NEXT:    v_mov_b32_e32 v0, 14
12963; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8
12964; GFX9-NEXT:    v_mov_b32_e32 v0, 15
12965; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
12966; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12
12967; GFX9-NEXT:    v_mov_b32_e32 v0, 0
12968; GFX9-NEXT:    v_mov_b32_e32 v1, 0
12969; GFX9-NEXT:    v_mov_b32_e32 v2, 0
12970; GFX9-NEXT:    v_mov_b32_e32 v3, 1
12971; GFX9-NEXT:    v_mov_b32_e32 v4, 1
12972; GFX9-NEXT:    v_mov_b32_e32 v5, 1
12973; GFX9-NEXT:    v_mov_b32_e32 v6, 2
12974; GFX9-NEXT:    v_mov_b32_e32 v7, 2
12975; GFX9-NEXT:    v_mov_b32_e32 v8, 2
12976; GFX9-NEXT:    v_mov_b32_e32 v9, 3
12977; GFX9-NEXT:    v_mov_b32_e32 v10, 3
12978; GFX9-NEXT:    v_mov_b32_e32 v11, 3
12979; GFX9-NEXT:    v_mov_b32_e32 v12, 4
12980; GFX9-NEXT:    v_mov_b32_e32 v13, 4
12981; GFX9-NEXT:    v_mov_b32_e32 v14, 4
12982; GFX9-NEXT:    v_mov_b32_e32 v15, 5
12983; GFX9-NEXT:    v_mov_b32_e32 v16, 5
12984; GFX9-NEXT:    v_mov_b32_e32 v17, 5
12985; GFX9-NEXT:    v_mov_b32_e32 v18, 6
12986; GFX9-NEXT:    v_mov_b32_e32 v19, 6
12987; GFX9-NEXT:    v_mov_b32_e32 v20, 6
12988; GFX9-NEXT:    v_mov_b32_e32 v21, 7
12989; GFX9-NEXT:    v_mov_b32_e32 v22, 7
12990; GFX9-NEXT:    v_mov_b32_e32 v23, 7
12991; GFX9-NEXT:    v_mov_b32_e32 v24, 8
12992; GFX9-NEXT:    v_mov_b32_e32 v25, 8
12993; GFX9-NEXT:    v_mov_b32_e32 v26, 8
12994; GFX9-NEXT:    v_mov_b32_e32 v27, 9
12995; GFX9-NEXT:    v_mov_b32_e32 v28, 9
12996; GFX9-NEXT:    v_mov_b32_e32 v29, 9
12997; GFX9-NEXT:    v_mov_b32_e32 v30, 10
12998; GFX9-NEXT:    v_mov_b32_e32 v31, 11
12999; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
13000; GFX9-NEXT:    s_getpc_b64 s[34:35]
13001; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_12xv3i32@rel32@lo+4
13002; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_12xv3i32@rel32@hi+12
13003; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
13004; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
13005; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
13006; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
13007; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
13008; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
13009; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
13010; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
13011; GFX9-NEXT:    s_waitcnt vmcnt(0)
13012; GFX9-NEXT:    s_setpc_b64 s[30:31]
13013;
13014; GFX10-LABEL: stack_12xv3i32:
13015; GFX10:       ; %bb.0: ; %entry
13016; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13017; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
13018; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
13019; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
13020; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
13021; GFX10-NEXT:    s_mov_b32 exec_lo, s34
13022; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
13023; GFX10-NEXT:    v_mov_b32_e32 v0, 12
13024; GFX10-NEXT:    v_mov_b32_e32 v1, 13
13025; GFX10-NEXT:    v_mov_b32_e32 v2, 14
13026; GFX10-NEXT:    s_mov_b32 s33, s32
13027; GFX10-NEXT:    s_addk_i32 s32, 0x200
13028; GFX10-NEXT:    v_mov_b32_e32 v3, 15
13029; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
13030; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32
13031; GFX10-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4
13032; GFX10-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8
13033; GFX10-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:12
13034; GFX10-NEXT:    v_mov_b32_e32 v0, 0
13035; GFX10-NEXT:    v_mov_b32_e32 v1, 0
13036; GFX10-NEXT:    v_mov_b32_e32 v2, 0
13037; GFX10-NEXT:    v_mov_b32_e32 v3, 1
13038; GFX10-NEXT:    v_mov_b32_e32 v4, 1
13039; GFX10-NEXT:    v_mov_b32_e32 v5, 1
13040; GFX10-NEXT:    v_mov_b32_e32 v6, 2
13041; GFX10-NEXT:    v_mov_b32_e32 v7, 2
13042; GFX10-NEXT:    v_mov_b32_e32 v8, 2
13043; GFX10-NEXT:    v_mov_b32_e32 v9, 3
13044; GFX10-NEXT:    v_mov_b32_e32 v10, 3
13045; GFX10-NEXT:    v_mov_b32_e32 v11, 3
13046; GFX10-NEXT:    v_mov_b32_e32 v12, 4
13047; GFX10-NEXT:    v_mov_b32_e32 v13, 4
13048; GFX10-NEXT:    v_mov_b32_e32 v14, 4
13049; GFX10-NEXT:    v_mov_b32_e32 v15, 5
13050; GFX10-NEXT:    v_mov_b32_e32 v16, 5
13051; GFX10-NEXT:    v_mov_b32_e32 v17, 5
13052; GFX10-NEXT:    v_mov_b32_e32 v18, 6
13053; GFX10-NEXT:    v_mov_b32_e32 v19, 6
13054; GFX10-NEXT:    v_mov_b32_e32 v20, 6
13055; GFX10-NEXT:    v_mov_b32_e32 v21, 7
13056; GFX10-NEXT:    v_mov_b32_e32 v22, 7
13057; GFX10-NEXT:    v_mov_b32_e32 v23, 7
13058; GFX10-NEXT:    v_mov_b32_e32 v24, 8
13059; GFX10-NEXT:    v_mov_b32_e32 v25, 8
13060; GFX10-NEXT:    v_mov_b32_e32 v26, 8
13061; GFX10-NEXT:    v_mov_b32_e32 v27, 9
13062; GFX10-NEXT:    v_mov_b32_e32 v28, 9
13063; GFX10-NEXT:    v_mov_b32_e32 v29, 9
13064; GFX10-NEXT:    v_mov_b32_e32 v30, 10
13065; GFX10-NEXT:    v_mov_b32_e32 v31, 11
13066; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
13067; GFX10-NEXT:    s_getpc_b64 s[34:35]
13068; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_12xv3i32@rel32@lo+4
13069; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_12xv3i32@rel32@hi+12
13070; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
13071; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
13072; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
13073; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
13074; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
13075; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
13076; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
13077; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
13078; GFX10-NEXT:    s_mov_b32 exec_lo, s34
13079; GFX10-NEXT:    s_waitcnt vmcnt(0)
13080; GFX10-NEXT:    s_setpc_b64 s[30:31]
13081;
13082; GFX11-LABEL: stack_12xv3i32:
13083; GFX11:       ; %bb.0: ; %entry
13084; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13085; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
13086; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
13087; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
13088; GFX11-NEXT:    s_mov_b32 exec_lo, s0
13089; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
13090; GFX11-NEXT:    v_dual_mov_b32 v0, 12 :: v_dual_mov_b32 v1, 13
13091; GFX11-NEXT:    v_dual_mov_b32 v2, 14 :: v_dual_mov_b32 v3, 15
13092; GFX11-NEXT:    s_mov_b32 s33, s32
13093; GFX11-NEXT:    s_add_i32 s32, s32, 16
13094; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
13095; GFX11-NEXT:    v_dual_mov_b32 v4, 1 :: v_dual_mov_b32 v5, 1
13096; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s32
13097; GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0
13098; GFX11-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 1
13099; GFX11-NEXT:    v_dual_mov_b32 v6, 2 :: v_dual_mov_b32 v7, 2
13100; GFX11-NEXT:    v_dual_mov_b32 v8, 2 :: v_dual_mov_b32 v9, 3
13101; GFX11-NEXT:    v_dual_mov_b32 v10, 3 :: v_dual_mov_b32 v11, 3
13102; GFX11-NEXT:    v_dual_mov_b32 v12, 4 :: v_dual_mov_b32 v13, 4
13103; GFX11-NEXT:    v_dual_mov_b32 v14, 4 :: v_dual_mov_b32 v15, 5
13104; GFX11-NEXT:    v_dual_mov_b32 v16, 5 :: v_dual_mov_b32 v17, 5
13105; GFX11-NEXT:    v_dual_mov_b32 v18, 6 :: v_dual_mov_b32 v19, 6
13106; GFX11-NEXT:    v_dual_mov_b32 v20, 6 :: v_dual_mov_b32 v21, 7
13107; GFX11-NEXT:    v_dual_mov_b32 v22, 7 :: v_dual_mov_b32 v23, 7
13108; GFX11-NEXT:    v_dual_mov_b32 v24, 8 :: v_dual_mov_b32 v25, 8
13109; GFX11-NEXT:    v_dual_mov_b32 v26, 8 :: v_dual_mov_b32 v27, 9
13110; GFX11-NEXT:    v_dual_mov_b32 v28, 9 :: v_dual_mov_b32 v29, 9
13111; GFX11-NEXT:    v_dual_mov_b32 v30, 10 :: v_dual_mov_b32 v31, 11
13112; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
13113; GFX11-NEXT:    s_getpc_b64 s[0:1]
13114; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_12xv3i32@rel32@lo+4
13115; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_12xv3i32@rel32@hi+12
13116; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
13117; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
13118; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
13119; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
13120; GFX11-NEXT:    s_add_i32 s32, s32, -16
13121; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
13122; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
13123; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
13124; GFX11-NEXT:    s_mov_b32 exec_lo, s0
13125; GFX11-NEXT:    s_waitcnt vmcnt(0)
13126; GFX11-NEXT:    s_setpc_b64 s[30:31]
13127;
13128; GFX10-SCRATCH-LABEL: stack_12xv3i32:
13129; GFX10-SCRATCH:       ; %bb.0: ; %entry
13130; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13131; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
13132; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
13133; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
13134; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
13135; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
13136; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
13137; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 12
13138; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, 13
13139; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v2, 14
13140; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v3, 15
13141; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
13142; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
13143; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
13144; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v4, 1
13145; GFX10-SCRATCH-NEXT:    scratch_store_dwordx4 off, v[0:3], s32
13146; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 0
13147; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, 0
13148; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v2, 0
13149; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v3, 1
13150; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v5, 1
13151; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v6, 2
13152; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v7, 2
13153; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v8, 2
13154; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v9, 3
13155; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v10, 3
13156; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v11, 3
13157; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v12, 4
13158; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v13, 4
13159; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v14, 4
13160; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v15, 5
13161; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v16, 5
13162; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v17, 5
13163; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v18, 6
13164; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v19, 6
13165; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v20, 6
13166; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v21, 7
13167; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v22, 7
13168; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v23, 7
13169; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v24, 8
13170; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v25, 8
13171; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v26, 8
13172; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v27, 9
13173; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v28, 9
13174; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v29, 9
13175; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v30, 10
13176; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v31, 11
13177; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
13178; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
13179; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_12xv3i32@rel32@lo+4
13180; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_12xv3i32@rel32@hi+12
13181; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
13182; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
13183; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
13184; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
13185; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
13186; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
13187; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
13188; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
13189; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
13190; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
13191; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
13192entry:
13193  call amdgpu_gfx void @external_void_func_12xv3i32(
13194      <3 x i32><i32 0, i32 0, i32 0>,
13195      <3 x i32><i32 1, i32 1, i32 1>,
13196      <3 x i32><i32 2, i32 2, i32 2>,
13197      <3 x i32><i32 3, i32 3, i32 3>,
13198      <3 x i32><i32 4, i32 4, i32 4>,
13199      <3 x i32><i32 5, i32 5, i32 5>,
13200      <3 x i32><i32 6, i32 6, i32 6>,
13201      <3 x i32><i32 7, i32 7, i32 7>,
13202      <3 x i32><i32 8, i32 8, i32 8>,
13203      <3 x i32><i32 9, i32 9, i32 9>,
13204      <3 x i32><i32 10, i32 11, i32 12>,
13205      <3 x i32><i32 13, i32 14, i32 15>)
13206  ret void
13207}
13208
13209define amdgpu_gfx void @stack_8xv5i32() #0 {
13210; GFX9-LABEL: stack_8xv5i32:
13211; GFX9:       ; %bb.0: ; %entry
13212; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13213; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
13214; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
13215; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
13216; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
13217; GFX9-NEXT:    s_mov_b32 s33, s32
13218; GFX9-NEXT:    s_addk_i32 s32, 0x400
13219; GFX9-NEXT:    v_mov_b32_e32 v0, 8
13220; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32
13221; GFX9-NEXT:    v_mov_b32_e32 v0, 9
13222; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4
13223; GFX9-NEXT:    v_mov_b32_e32 v0, 10
13224; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8
13225; GFX9-NEXT:    v_mov_b32_e32 v0, 11
13226; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12
13227; GFX9-NEXT:    v_mov_b32_e32 v0, 12
13228; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:16
13229; GFX9-NEXT:    v_mov_b32_e32 v0, 13
13230; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:20
13231; GFX9-NEXT:    v_mov_b32_e32 v0, 14
13232; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:24
13233; GFX9-NEXT:    v_mov_b32_e32 v0, 15
13234; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
13235; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:28
13236; GFX9-NEXT:    v_mov_b32_e32 v0, 0
13237; GFX9-NEXT:    v_mov_b32_e32 v1, 0
13238; GFX9-NEXT:    v_mov_b32_e32 v2, 0
13239; GFX9-NEXT:    v_mov_b32_e32 v3, 0
13240; GFX9-NEXT:    v_mov_b32_e32 v4, 0
13241; GFX9-NEXT:    v_mov_b32_e32 v5, 1
13242; GFX9-NEXT:    v_mov_b32_e32 v6, 1
13243; GFX9-NEXT:    v_mov_b32_e32 v7, 1
13244; GFX9-NEXT:    v_mov_b32_e32 v8, 1
13245; GFX9-NEXT:    v_mov_b32_e32 v9, 1
13246; GFX9-NEXT:    v_mov_b32_e32 v10, 2
13247; GFX9-NEXT:    v_mov_b32_e32 v11, 2
13248; GFX9-NEXT:    v_mov_b32_e32 v12, 2
13249; GFX9-NEXT:    v_mov_b32_e32 v13, 2
13250; GFX9-NEXT:    v_mov_b32_e32 v14, 2
13251; GFX9-NEXT:    v_mov_b32_e32 v15, 3
13252; GFX9-NEXT:    v_mov_b32_e32 v16, 3
13253; GFX9-NEXT:    v_mov_b32_e32 v17, 3
13254; GFX9-NEXT:    v_mov_b32_e32 v18, 3
13255; GFX9-NEXT:    v_mov_b32_e32 v19, 3
13256; GFX9-NEXT:    v_mov_b32_e32 v20, 4
13257; GFX9-NEXT:    v_mov_b32_e32 v21, 4
13258; GFX9-NEXT:    v_mov_b32_e32 v22, 4
13259; GFX9-NEXT:    v_mov_b32_e32 v23, 4
13260; GFX9-NEXT:    v_mov_b32_e32 v24, 4
13261; GFX9-NEXT:    v_mov_b32_e32 v25, 5
13262; GFX9-NEXT:    v_mov_b32_e32 v26, 5
13263; GFX9-NEXT:    v_mov_b32_e32 v27, 5
13264; GFX9-NEXT:    v_mov_b32_e32 v28, 5
13265; GFX9-NEXT:    v_mov_b32_e32 v29, 5
13266; GFX9-NEXT:    v_mov_b32_e32 v30, 6
13267; GFX9-NEXT:    v_mov_b32_e32 v31, 7
13268; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
13269; GFX9-NEXT:    s_getpc_b64 s[34:35]
13270; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_8xv5i32@rel32@lo+4
13271; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_8xv5i32@rel32@hi+12
13272; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
13273; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
13274; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
13275; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
13276; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
13277; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
13278; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
13279; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
13280; GFX9-NEXT:    s_waitcnt vmcnt(0)
13281; GFX9-NEXT:    s_setpc_b64 s[30:31]
13282;
13283; GFX10-LABEL: stack_8xv5i32:
13284; GFX10:       ; %bb.0: ; %entry
13285; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13286; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
13287; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
13288; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
13289; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
13290; GFX10-NEXT:    s_mov_b32 exec_lo, s34
13291; GFX10-NEXT:    v_mov_b32_e32 v0, 8
13292; GFX10-NEXT:    v_mov_b32_e32 v1, 9
13293; GFX10-NEXT:    v_mov_b32_e32 v2, 10
13294; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
13295; GFX10-NEXT:    s_mov_b32 s33, s32
13296; GFX10-NEXT:    s_addk_i32 s32, 0x200
13297; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32
13298; GFX10-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4
13299; GFX10-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8
13300; GFX10-NEXT:    v_mov_b32_e32 v0, 11
13301; GFX10-NEXT:    v_mov_b32_e32 v1, 12
13302; GFX10-NEXT:    v_mov_b32_e32 v2, 13
13303; GFX10-NEXT:    v_mov_b32_e32 v3, 14
13304; GFX10-NEXT:    v_mov_b32_e32 v4, 15
13305; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
13306; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12
13307; GFX10-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:16
13308; GFX10-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:20
13309; GFX10-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:24
13310; GFX10-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:28
13311; GFX10-NEXT:    v_mov_b32_e32 v0, 0
13312; GFX10-NEXT:    v_mov_b32_e32 v1, 0
13313; GFX10-NEXT:    v_mov_b32_e32 v2, 0
13314; GFX10-NEXT:    v_mov_b32_e32 v3, 0
13315; GFX10-NEXT:    v_mov_b32_e32 v4, 0
13316; GFX10-NEXT:    v_mov_b32_e32 v5, 1
13317; GFX10-NEXT:    v_mov_b32_e32 v6, 1
13318; GFX10-NEXT:    v_mov_b32_e32 v7, 1
13319; GFX10-NEXT:    v_mov_b32_e32 v8, 1
13320; GFX10-NEXT:    v_mov_b32_e32 v9, 1
13321; GFX10-NEXT:    v_mov_b32_e32 v10, 2
13322; GFX10-NEXT:    v_mov_b32_e32 v11, 2
13323; GFX10-NEXT:    v_mov_b32_e32 v12, 2
13324; GFX10-NEXT:    v_mov_b32_e32 v13, 2
13325; GFX10-NEXT:    v_mov_b32_e32 v14, 2
13326; GFX10-NEXT:    v_mov_b32_e32 v15, 3
13327; GFX10-NEXT:    v_mov_b32_e32 v16, 3
13328; GFX10-NEXT:    v_mov_b32_e32 v17, 3
13329; GFX10-NEXT:    v_mov_b32_e32 v18, 3
13330; GFX10-NEXT:    v_mov_b32_e32 v19, 3
13331; GFX10-NEXT:    v_mov_b32_e32 v20, 4
13332; GFX10-NEXT:    v_mov_b32_e32 v21, 4
13333; GFX10-NEXT:    v_mov_b32_e32 v22, 4
13334; GFX10-NEXT:    v_mov_b32_e32 v23, 4
13335; GFX10-NEXT:    v_mov_b32_e32 v24, 4
13336; GFX10-NEXT:    v_mov_b32_e32 v25, 5
13337; GFX10-NEXT:    v_mov_b32_e32 v26, 5
13338; GFX10-NEXT:    v_mov_b32_e32 v27, 5
13339; GFX10-NEXT:    v_mov_b32_e32 v28, 5
13340; GFX10-NEXT:    v_mov_b32_e32 v29, 5
13341; GFX10-NEXT:    v_mov_b32_e32 v30, 6
13342; GFX10-NEXT:    v_mov_b32_e32 v31, 7
13343; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
13344; GFX10-NEXT:    s_getpc_b64 s[34:35]
13345; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_8xv5i32@rel32@lo+4
13346; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_8xv5i32@rel32@hi+12
13347; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
13348; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
13349; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
13350; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
13351; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
13352; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
13353; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
13354; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
13355; GFX10-NEXT:    s_mov_b32 exec_lo, s34
13356; GFX10-NEXT:    s_waitcnt vmcnt(0)
13357; GFX10-NEXT:    s_setpc_b64 s[30:31]
13358;
13359; GFX11-LABEL: stack_8xv5i32:
13360; GFX11:       ; %bb.0: ; %entry
13361; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13362; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
13363; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
13364; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
13365; GFX11-NEXT:    s_mov_b32 exec_lo, s0
13366; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
13367; GFX11-NEXT:    v_dual_mov_b32 v0, 12 :: v_dual_mov_b32 v1, 13
13368; GFX11-NEXT:    v_dual_mov_b32 v2, 14 :: v_dual_mov_b32 v3, 15
13369; GFX11-NEXT:    v_dual_mov_b32 v4, 8 :: v_dual_mov_b32 v5, 9
13370; GFX11-NEXT:    v_dual_mov_b32 v6, 10 :: v_dual_mov_b32 v7, 11
13371; GFX11-NEXT:    s_mov_b32 s33, s32
13372; GFX11-NEXT:    s_add_i32 s32, s32, 16
13373; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
13374; GFX11-NEXT:    s_clause 0x1
13375; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s32 offset:16
13376; GFX11-NEXT:    scratch_store_b128 off, v[4:7], s32
13377; GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0
13378; GFX11-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0
13379; GFX11-NEXT:    v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 1
13380; GFX11-NEXT:    v_dual_mov_b32 v6, 1 :: v_dual_mov_b32 v7, 1
13381; GFX11-NEXT:    v_dual_mov_b32 v8, 1 :: v_dual_mov_b32 v9, 1
13382; GFX11-NEXT:    v_dual_mov_b32 v10, 2 :: v_dual_mov_b32 v11, 2
13383; GFX11-NEXT:    v_dual_mov_b32 v12, 2 :: v_dual_mov_b32 v13, 2
13384; GFX11-NEXT:    v_dual_mov_b32 v14, 2 :: v_dual_mov_b32 v15, 3
13385; GFX11-NEXT:    v_dual_mov_b32 v16, 3 :: v_dual_mov_b32 v17, 3
13386; GFX11-NEXT:    v_dual_mov_b32 v18, 3 :: v_dual_mov_b32 v19, 3
13387; GFX11-NEXT:    v_dual_mov_b32 v20, 4 :: v_dual_mov_b32 v21, 4
13388; GFX11-NEXT:    v_dual_mov_b32 v22, 4 :: v_dual_mov_b32 v23, 4
13389; GFX11-NEXT:    v_dual_mov_b32 v24, 4 :: v_dual_mov_b32 v25, 5
13390; GFX11-NEXT:    v_dual_mov_b32 v26, 5 :: v_dual_mov_b32 v27, 5
13391; GFX11-NEXT:    v_dual_mov_b32 v28, 5 :: v_dual_mov_b32 v29, 5
13392; GFX11-NEXT:    v_dual_mov_b32 v30, 6 :: v_dual_mov_b32 v31, 7
13393; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
13394; GFX11-NEXT:    s_getpc_b64 s[0:1]
13395; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_8xv5i32@rel32@lo+4
13396; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_8xv5i32@rel32@hi+12
13397; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
13398; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
13399; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
13400; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
13401; GFX11-NEXT:    s_add_i32 s32, s32, -16
13402; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
13403; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
13404; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
13405; GFX11-NEXT:    s_mov_b32 exec_lo, s0
13406; GFX11-NEXT:    s_waitcnt vmcnt(0)
13407; GFX11-NEXT:    s_setpc_b64 s[30:31]
13408;
13409; GFX10-SCRATCH-LABEL: stack_8xv5i32:
13410; GFX10-SCRATCH:       ; %bb.0: ; %entry
13411; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13412; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
13413; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
13414; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
13415; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
13416; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
13417; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
13418; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 12
13419; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, 13
13420; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v2, 14
13421; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v3, 15
13422; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v4, 8
13423; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v5, 9
13424; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v6, 10
13425; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v7, 11
13426; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
13427; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
13428; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
13429; GFX10-SCRATCH-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:16
13430; GFX10-SCRATCH-NEXT:    scratch_store_dwordx4 off, v[4:7], s32
13431; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 0
13432; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, 0
13433; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v2, 0
13434; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v3, 0
13435; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v4, 0
13436; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v5, 1
13437; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v6, 1
13438; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v7, 1
13439; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v8, 1
13440; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v9, 1
13441; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v10, 2
13442; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v11, 2
13443; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v12, 2
13444; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v13, 2
13445; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v14, 2
13446; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v15, 3
13447; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v16, 3
13448; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v17, 3
13449; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v18, 3
13450; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v19, 3
13451; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v20, 4
13452; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v21, 4
13453; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v22, 4
13454; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v23, 4
13455; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v24, 4
13456; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v25, 5
13457; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v26, 5
13458; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v27, 5
13459; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v28, 5
13460; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v29, 5
13461; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v30, 6
13462; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v31, 7
13463; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
13464; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
13465; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_8xv5i32@rel32@lo+4
13466; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_8xv5i32@rel32@hi+12
13467; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
13468; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
13469; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
13470; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
13471; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
13472; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
13473; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
13474; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
13475; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
13476; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
13477; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
13478entry:
13479  call amdgpu_gfx void @external_void_func_8xv5i32(
13480      <5 x i32><i32 0, i32 0, i32 0, i32 0, i32 0>,
13481      <5 x i32><i32 1, i32 1, i32 1, i32 1, i32 1>,
13482      <5 x i32><i32 2, i32 2, i32 2, i32 2, i32 2>,
13483      <5 x i32><i32 3, i32 3, i32 3, i32 3, i32 3>,
13484      <5 x i32><i32 4, i32 4, i32 4, i32 4, i32 4>,
13485      <5 x i32><i32 5, i32 5, i32 5, i32 5, i32 5>,
13486      <5 x i32><i32 6, i32 7, i32 8, i32 9, i32 10>,
13487      <5 x i32><i32 11, i32 12, i32 13, i32 14, i32 15>)
13488  ret void
13489}
13490
13491define amdgpu_gfx void @stack_8xv5f32() #0 {
13492; GFX9-LABEL: stack_8xv5f32:
13493; GFX9:       ; %bb.0: ; %entry
13494; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13495; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
13496; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
13497; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
13498; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
13499; GFX9-NEXT:    s_mov_b32 s33, s32
13500; GFX9-NEXT:    s_addk_i32 s32, 0x400
13501; GFX9-NEXT:    v_mov_b32_e32 v0, 0x41000000
13502; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32
13503; GFX9-NEXT:    v_mov_b32_e32 v0, 0x41100000
13504; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4
13505; GFX9-NEXT:    v_mov_b32_e32 v0, 0x41200000
13506; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8
13507; GFX9-NEXT:    v_mov_b32_e32 v0, 0x41300000
13508; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12
13509; GFX9-NEXT:    v_mov_b32_e32 v0, 0x41400000
13510; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:16
13511; GFX9-NEXT:    v_mov_b32_e32 v0, 0x41500000
13512; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:20
13513; GFX9-NEXT:    v_mov_b32_e32 v0, 0x41600000
13514; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:24
13515; GFX9-NEXT:    v_mov_b32_e32 v0, 0x41700000
13516; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
13517; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:28
13518; GFX9-NEXT:    v_mov_b32_e32 v0, 0
13519; GFX9-NEXT:    v_mov_b32_e32 v1, 0
13520; GFX9-NEXT:    v_mov_b32_e32 v2, 0
13521; GFX9-NEXT:    v_mov_b32_e32 v3, 0
13522; GFX9-NEXT:    v_mov_b32_e32 v4, 0
13523; GFX9-NEXT:    v_mov_b32_e32 v5, 1.0
13524; GFX9-NEXT:    v_mov_b32_e32 v6, 1.0
13525; GFX9-NEXT:    v_mov_b32_e32 v7, 1.0
13526; GFX9-NEXT:    v_mov_b32_e32 v8, 1.0
13527; GFX9-NEXT:    v_mov_b32_e32 v9, 1.0
13528; GFX9-NEXT:    v_mov_b32_e32 v10, 2.0
13529; GFX9-NEXT:    v_mov_b32_e32 v11, 2.0
13530; GFX9-NEXT:    v_mov_b32_e32 v12, 2.0
13531; GFX9-NEXT:    v_mov_b32_e32 v13, 2.0
13532; GFX9-NEXT:    v_mov_b32_e32 v14, 2.0
13533; GFX9-NEXT:    v_mov_b32_e32 v15, 0x40400000
13534; GFX9-NEXT:    v_mov_b32_e32 v16, 0x40400000
13535; GFX9-NEXT:    v_mov_b32_e32 v17, 0x40400000
13536; GFX9-NEXT:    v_mov_b32_e32 v18, 0x40400000
13537; GFX9-NEXT:    v_mov_b32_e32 v19, 0x40400000
13538; GFX9-NEXT:    v_mov_b32_e32 v20, 4.0
13539; GFX9-NEXT:    v_mov_b32_e32 v21, 4.0
13540; GFX9-NEXT:    v_mov_b32_e32 v22, 4.0
13541; GFX9-NEXT:    v_mov_b32_e32 v23, 4.0
13542; GFX9-NEXT:    v_mov_b32_e32 v24, 4.0
13543; GFX9-NEXT:    v_mov_b32_e32 v25, 0x40a00000
13544; GFX9-NEXT:    v_mov_b32_e32 v26, 0x40a00000
13545; GFX9-NEXT:    v_mov_b32_e32 v27, 0x40a00000
13546; GFX9-NEXT:    v_mov_b32_e32 v28, 0x40a00000
13547; GFX9-NEXT:    v_mov_b32_e32 v29, 0x40a00000
13548; GFX9-NEXT:    v_mov_b32_e32 v30, 0x40c00000
13549; GFX9-NEXT:    v_mov_b32_e32 v31, 0x40e00000
13550; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
13551; GFX9-NEXT:    s_getpc_b64 s[34:35]
13552; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_8xv5f32@rel32@lo+4
13553; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_8xv5f32@rel32@hi+12
13554; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
13555; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
13556; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
13557; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
13558; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
13559; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
13560; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
13561; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
13562; GFX9-NEXT:    s_waitcnt vmcnt(0)
13563; GFX9-NEXT:    s_setpc_b64 s[30:31]
13564;
13565; GFX10-LABEL: stack_8xv5f32:
13566; GFX10:       ; %bb.0: ; %entry
13567; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13568; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
13569; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
13570; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
13571; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
13572; GFX10-NEXT:    s_mov_b32 exec_lo, s34
13573; GFX10-NEXT:    v_mov_b32_e32 v0, 0x41000000
13574; GFX10-NEXT:    v_mov_b32_e32 v1, 0x41100000
13575; GFX10-NEXT:    v_mov_b32_e32 v2, 0x41200000
13576; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
13577; GFX10-NEXT:    s_mov_b32 s33, s32
13578; GFX10-NEXT:    s_addk_i32 s32, 0x200
13579; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32
13580; GFX10-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4
13581; GFX10-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8
13582; GFX10-NEXT:    v_mov_b32_e32 v0, 0x41300000
13583; GFX10-NEXT:    v_mov_b32_e32 v1, 0x41400000
13584; GFX10-NEXT:    v_mov_b32_e32 v2, 0x41500000
13585; GFX10-NEXT:    v_mov_b32_e32 v3, 0x41600000
13586; GFX10-NEXT:    v_mov_b32_e32 v4, 0x41700000
13587; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
13588; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12
13589; GFX10-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:16
13590; GFX10-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:20
13591; GFX10-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:24
13592; GFX10-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:28
13593; GFX10-NEXT:    v_mov_b32_e32 v0, 0
13594; GFX10-NEXT:    v_mov_b32_e32 v1, 0
13595; GFX10-NEXT:    v_mov_b32_e32 v2, 0
13596; GFX10-NEXT:    v_mov_b32_e32 v3, 0
13597; GFX10-NEXT:    v_mov_b32_e32 v4, 0
13598; GFX10-NEXT:    v_mov_b32_e32 v5, 1.0
13599; GFX10-NEXT:    v_mov_b32_e32 v6, 1.0
13600; GFX10-NEXT:    v_mov_b32_e32 v7, 1.0
13601; GFX10-NEXT:    v_mov_b32_e32 v8, 1.0
13602; GFX10-NEXT:    v_mov_b32_e32 v9, 1.0
13603; GFX10-NEXT:    v_mov_b32_e32 v10, 2.0
13604; GFX10-NEXT:    v_mov_b32_e32 v11, 2.0
13605; GFX10-NEXT:    v_mov_b32_e32 v12, 2.0
13606; GFX10-NEXT:    v_mov_b32_e32 v13, 2.0
13607; GFX10-NEXT:    v_mov_b32_e32 v14, 2.0
13608; GFX10-NEXT:    v_mov_b32_e32 v15, 0x40400000
13609; GFX10-NEXT:    v_mov_b32_e32 v16, 0x40400000
13610; GFX10-NEXT:    v_mov_b32_e32 v17, 0x40400000
13611; GFX10-NEXT:    v_mov_b32_e32 v18, 0x40400000
13612; GFX10-NEXT:    v_mov_b32_e32 v19, 0x40400000
13613; GFX10-NEXT:    v_mov_b32_e32 v20, 4.0
13614; GFX10-NEXT:    v_mov_b32_e32 v21, 4.0
13615; GFX10-NEXT:    v_mov_b32_e32 v22, 4.0
13616; GFX10-NEXT:    v_mov_b32_e32 v23, 4.0
13617; GFX10-NEXT:    v_mov_b32_e32 v24, 4.0
13618; GFX10-NEXT:    v_mov_b32_e32 v25, 0x40a00000
13619; GFX10-NEXT:    v_mov_b32_e32 v26, 0x40a00000
13620; GFX10-NEXT:    v_mov_b32_e32 v27, 0x40a00000
13621; GFX10-NEXT:    v_mov_b32_e32 v28, 0x40a00000
13622; GFX10-NEXT:    v_mov_b32_e32 v29, 0x40a00000
13623; GFX10-NEXT:    v_mov_b32_e32 v30, 0x40c00000
13624; GFX10-NEXT:    v_mov_b32_e32 v31, 0x40e00000
13625; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
13626; GFX10-NEXT:    s_getpc_b64 s[34:35]
13627; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_8xv5f32@rel32@lo+4
13628; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_8xv5f32@rel32@hi+12
13629; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
13630; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
13631; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
13632; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
13633; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
13634; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
13635; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
13636; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
13637; GFX10-NEXT:    s_mov_b32 exec_lo, s34
13638; GFX10-NEXT:    s_waitcnt vmcnt(0)
13639; GFX10-NEXT:    s_setpc_b64 s[30:31]
13640;
13641; GFX11-LABEL: stack_8xv5f32:
13642; GFX11:       ; %bb.0: ; %entry
13643; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13644; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
13645; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
13646; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
13647; GFX11-NEXT:    s_mov_b32 exec_lo, s0
13648; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
13649; GFX11-NEXT:    v_mov_b32_e32 v0, 0x41400000
13650; GFX11-NEXT:    v_mov_b32_e32 v1, 0x41500000
13651; GFX11-NEXT:    v_mov_b32_e32 v2, 0x41600000
13652; GFX11-NEXT:    v_mov_b32_e32 v3, 0x41700000
13653; GFX11-NEXT:    v_mov_b32_e32 v4, 0x41000000
13654; GFX11-NEXT:    v_mov_b32_e32 v5, 0x41100000
13655; GFX11-NEXT:    v_mov_b32_e32 v6, 0x41200000
13656; GFX11-NEXT:    v_mov_b32_e32 v7, 0x41300000
13657; GFX11-NEXT:    s_mov_b32 s33, s32
13658; GFX11-NEXT:    s_add_i32 s32, s32, 16
13659; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
13660; GFX11-NEXT:    s_clause 0x1
13661; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s32 offset:16
13662; GFX11-NEXT:    scratch_store_b128 off, v[4:7], s32
13663; GFX11-NEXT:    v_mov_b32_e32 v6, 1.0
13664; GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0
13665; GFX11-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0
13666; GFX11-NEXT:    v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 1.0
13667; GFX11-NEXT:    v_dual_mov_b32 v7, 1.0 :: v_dual_mov_b32 v8, 1.0
13668; GFX11-NEXT:    v_dual_mov_b32 v9, 1.0 :: v_dual_mov_b32 v10, 2.0
13669; GFX11-NEXT:    v_dual_mov_b32 v11, 2.0 :: v_dual_mov_b32 v12, 2.0
13670; GFX11-NEXT:    v_dual_mov_b32 v13, 2.0 :: v_dual_mov_b32 v14, 2.0
13671; GFX11-NEXT:    v_dual_mov_b32 v15, 0x40400000 :: v_dual_mov_b32 v16, 0x40400000
13672; GFX11-NEXT:    v_dual_mov_b32 v17, 0x40400000 :: v_dual_mov_b32 v18, 0x40400000
13673; GFX11-NEXT:    v_dual_mov_b32 v19, 0x40400000 :: v_dual_mov_b32 v20, 4.0
13674; GFX11-NEXT:    v_dual_mov_b32 v21, 4.0 :: v_dual_mov_b32 v22, 4.0
13675; GFX11-NEXT:    v_dual_mov_b32 v23, 4.0 :: v_dual_mov_b32 v24, 4.0
13676; GFX11-NEXT:    v_dual_mov_b32 v25, 0x40a00000 :: v_dual_mov_b32 v26, 0x40a00000
13677; GFX11-NEXT:    v_dual_mov_b32 v27, 0x40a00000 :: v_dual_mov_b32 v28, 0x40a00000
13678; GFX11-NEXT:    v_mov_b32_e32 v29, 0x40a00000
13679; GFX11-NEXT:    v_mov_b32_e32 v30, 0x40c00000
13680; GFX11-NEXT:    v_mov_b32_e32 v31, 0x40e00000
13681; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
13682; GFX11-NEXT:    s_getpc_b64 s[0:1]
13683; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_8xv5f32@rel32@lo+4
13684; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_8xv5f32@rel32@hi+12
13685; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
13686; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
13687; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
13688; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
13689; GFX11-NEXT:    s_add_i32 s32, s32, -16
13690; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
13691; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
13692; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
13693; GFX11-NEXT:    s_mov_b32 exec_lo, s0
13694; GFX11-NEXT:    s_waitcnt vmcnt(0)
13695; GFX11-NEXT:    s_setpc_b64 s[30:31]
13696;
13697; GFX10-SCRATCH-LABEL: stack_8xv5f32:
13698; GFX10-SCRATCH:       ; %bb.0: ; %entry
13699; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13700; GFX10-SCRATCH-NEXT:    s_waitcnt_vscnt null, 0x0
13701; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
13702; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
13703; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
13704; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
13705; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s33, 2
13706; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 0x41400000
13707; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, 0x41500000
13708; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v2, 0x41600000
13709; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v3, 0x41700000
13710; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v4, 0x41000000
13711; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v5, 0x41100000
13712; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v6, 0x41200000
13713; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v7, 0x41300000
13714; GFX10-SCRATCH-NEXT:    s_mov_b32 s33, s32
13715; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, 16
13716; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s30, 0
13717; GFX10-SCRATCH-NEXT:    scratch_store_dwordx4 off, v[0:3], s32 offset:16
13718; GFX10-SCRATCH-NEXT:    scratch_store_dwordx4 off, v[4:7], s32
13719; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, 0
13720; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, 0
13721; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v2, 0
13722; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v3, 0
13723; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v4, 0
13724; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v5, 1.0
13725; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v6, 1.0
13726; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v7, 1.0
13727; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v8, 1.0
13728; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v9, 1.0
13729; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v10, 2.0
13730; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v11, 2.0
13731; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v12, 2.0
13732; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v13, 2.0
13733; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v14, 2.0
13734; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v15, 0x40400000
13735; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v16, 0x40400000
13736; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v17, 0x40400000
13737; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v18, 0x40400000
13738; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v19, 0x40400000
13739; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v20, 4.0
13740; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v21, 4.0
13741; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v22, 4.0
13742; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v23, 4.0
13743; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v24, 4.0
13744; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v25, 0x40a00000
13745; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v26, 0x40a00000
13746; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v27, 0x40a00000
13747; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v28, 0x40a00000
13748; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v29, 0x40a00000
13749; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v30, 0x40c00000
13750; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v31, 0x40e00000
13751; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s31, 1
13752; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]
13753; GFX10-SCRATCH-NEXT:    s_add_u32 s0, s0, external_void_func_8xv5f32@rel32@lo+4
13754; GFX10-SCRATCH-NEXT:    s_addc_u32 s1, s1, external_void_func_8xv5f32@rel32@hi+12
13755; GFX10-SCRATCH-NEXT:    s_swappc_b64 s[30:31], s[0:1]
13756; GFX10-SCRATCH-NEXT:    v_readlane_b32 s31, v40, 1
13757; GFX10-SCRATCH-NEXT:    v_readlane_b32 s30, v40, 0
13758; GFX10-SCRATCH-NEXT:    s_add_i32 s32, s32, -16
13759; GFX10-SCRATCH-NEXT:    v_readlane_b32 s33, v40, 2
13760; GFX10-SCRATCH-NEXT:    s_or_saveexec_b32 s0, -1
13761; GFX10-SCRATCH-NEXT:    scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
13762; GFX10-SCRATCH-NEXT:    s_waitcnt_depctr 0xffe3
13763; GFX10-SCRATCH-NEXT:    s_mov_b32 exec_lo, s0
13764; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
13765; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
13766entry:
13767  call amdgpu_gfx void @external_void_func_8xv5f32(
13768      <5 x float><float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>,
13769      <5 x float><float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>,
13770      <5 x float><float 2.0, float 2.0, float 2.0, float 2.0, float 2.0>,
13771      <5 x float><float 3.0, float 3.0, float 3.0, float 3.0, float 3.0>,
13772      <5 x float><float 4.0, float 4.0, float 4.0, float 4.0, float 4.0>,
13773      <5 x float><float 5.0, float 5.0, float 5.0, float 5.0, float 5.0>,
13774      <5 x float><float 6.0, float 7.0, float 8.0, float 9.0, float 10.0>,
13775      <5 x float><float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>)
13776  ret void
13777}
13778
13779declare hidden amdgpu_gfx void @byval_align16_f64_arg(<32 x i32>, double addrspace(5)* byval(double) align 16) #0
13780declare hidden amdgpu_gfx void @stack_passed_f64_arg(<32 x i32>, double) #0
13781declare hidden amdgpu_gfx void @external_void_func_12xv3i32(<3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>,
13782    <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>) #0
13783declare hidden amdgpu_gfx void @external_void_func_8xv5i32(<5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>,
13784    <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>) #0
13785declare hidden amdgpu_gfx void @external_void_func_12xv3f32(<3 x float>, <3 x float>, <3 x float>, <3 x float>,
13786    <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>) #0
13787declare hidden amdgpu_gfx void @external_void_func_8xv5f32(<5 x float>, <5 x float>, <5 x float>, <5 x float>,
13788    <5 x float>, <5 x float>, <5 x float>, <5 x float>) #0
13789attributes #0 = { nounwind }
13790attributes #1 = { nounwind noinline }
13791