1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=GISEL %s
4
5@gv.fptr0 = external hidden unnamed_addr addrspace(4) constant void()*, align 4
6@gv.fptr1 = external hidden unnamed_addr addrspace(4) constant void(i32)*, align 4
7
8define amdgpu_kernel void @test_indirect_call_sgpr_ptr(i8) {
9; GCN-LABEL: test_indirect_call_sgpr_ptr:
10; GCN:         .amd_kernel_code_t
11; GCN-NEXT:     amd_code_version_major = 1
12; GCN-NEXT:     amd_code_version_minor = 2
13; GCN-NEXT:     amd_machine_kind = 1
14; GCN-NEXT:     amd_machine_version_major = 7
15; GCN-NEXT:     amd_machine_version_minor = 0
16; GCN-NEXT:     amd_machine_version_stepping = 0
17; GCN-NEXT:     kernel_code_entry_byte_offset = 256
18; GCN-NEXT:     kernel_code_prefetch_byte_size = 0
19; GCN-NEXT:     granulated_workitem_vgpr_count = 10
20; GCN-NEXT:     granulated_wavefront_sgpr_count = 8
21; GCN-NEXT:     priority = 0
22; GCN-NEXT:     float_mode = 240
23; GCN-NEXT:     priv = 0
24; GCN-NEXT:     enable_dx10_clamp = 1
25; GCN-NEXT:     debug_mode = 0
26; GCN-NEXT:     enable_ieee_mode = 1
27; GCN-NEXT:     enable_wgp_mode = 0
28; GCN-NEXT:     enable_mem_ordered = 0
29; GCN-NEXT:     enable_fwd_progress = 0
30; GCN-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
31; GCN-NEXT:     user_sgpr_count = 14
32; GCN-NEXT:     enable_trap_handler = 0
33; GCN-NEXT:     enable_sgpr_workgroup_id_x = 1
34; GCN-NEXT:     enable_sgpr_workgroup_id_y = 1
35; GCN-NEXT:     enable_sgpr_workgroup_id_z = 1
36; GCN-NEXT:     enable_sgpr_workgroup_info = 0
37; GCN-NEXT:     enable_vgpr_workitem_id = 2
38; GCN-NEXT:     enable_exception_msb = 0
39; GCN-NEXT:     granulated_lds_size = 0
40; GCN-NEXT:     enable_exception = 0
41; GCN-NEXT:     enable_sgpr_private_segment_buffer = 1
42; GCN-NEXT:     enable_sgpr_dispatch_ptr = 1
43; GCN-NEXT:     enable_sgpr_queue_ptr = 1
44; GCN-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
45; GCN-NEXT:     enable_sgpr_dispatch_id = 1
46; GCN-NEXT:     enable_sgpr_flat_scratch_init = 1
47; GCN-NEXT:     enable_sgpr_private_segment_size = 0
48; GCN-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
49; GCN-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
50; GCN-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
51; GCN-NEXT:     enable_wavefront_size32 = 0
52; GCN-NEXT:     enable_ordered_append_gds = 0
53; GCN-NEXT:     private_element_size = 1
54; GCN-NEXT:     is_ptr64 = 1
55; GCN-NEXT:     is_dynamic_callstack = 1
56; GCN-NEXT:     is_debug_enabled = 0
57; GCN-NEXT:     is_xnack_enabled = 0
58; GCN-NEXT:     workitem_private_segment_byte_size = 16384
59; GCN-NEXT:     workgroup_group_segment_byte_size = 0
60; GCN-NEXT:     gds_segment_byte_size = 0
61; GCN-NEXT:     kernarg_segment_byte_size = 64
62; GCN-NEXT:     workgroup_fbarrier_count = 0
63; GCN-NEXT:     wavefront_sgpr_count = 68
64; GCN-NEXT:     workitem_vgpr_count = 42
65; GCN-NEXT:     reserved_vgpr_first = 0
66; GCN-NEXT:     reserved_vgpr_count = 0
67; GCN-NEXT:     reserved_sgpr_first = 0
68; GCN-NEXT:     reserved_sgpr_count = 0
69; GCN-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
70; GCN-NEXT:     debug_private_segment_buffer_sgpr = 0
71; GCN-NEXT:     kernarg_segment_alignment = 4
72; GCN-NEXT:     group_segment_alignment = 4
73; GCN-NEXT:     private_segment_alignment = 4
74; GCN-NEXT:     wavefront_size = 6
75; GCN-NEXT:     call_convention = -1
76; GCN-NEXT:     runtime_loader_kernel_symbol = 0
77; GCN-NEXT:    .end_amd_kernel_code_t
78; GCN-NEXT:  ; %bb.0:
79; GCN-NEXT:    s_mov_b32 s32, 0
80; GCN-NEXT:    s_mov_b32 flat_scratch_lo, s13
81; GCN-NEXT:    s_add_i32 s12, s12, s17
82; GCN-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
83; GCN-NEXT:    s_add_u32 s0, s0, s17
84; GCN-NEXT:    s_addc_u32 s1, s1, 0
85; GCN-NEXT:    s_mov_b32 s13, s15
86; GCN-NEXT:    s_mov_b32 s12, s14
87; GCN-NEXT:    s_getpc_b64 s[14:15]
88; GCN-NEXT:    s_add_u32 s14, s14, gv.fptr0@rel32@lo+4
89; GCN-NEXT:    s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12
90; GCN-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
91; GCN-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
92; GCN-NEXT:    s_add_u32 s8, s8, 8
93; GCN-NEXT:    s_addc_u32 s9, s9, 0
94; GCN-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
95; GCN-NEXT:    v_or_b32_e32 v0, v0, v1
96; GCN-NEXT:    v_or_b32_e32 v31, v0, v2
97; GCN-NEXT:    s_mov_b32 s14, s16
98; GCN-NEXT:    s_waitcnt lgkmcnt(0)
99; GCN-NEXT:    s_swappc_b64 s[30:31], s[18:19]
100; GCN-NEXT:    s_endpgm
101;
102; GISEL-LABEL: test_indirect_call_sgpr_ptr:
103; GISEL:         .amd_kernel_code_t
104; GISEL-NEXT:     amd_code_version_major = 1
105; GISEL-NEXT:     amd_code_version_minor = 2
106; GISEL-NEXT:     amd_machine_kind = 1
107; GISEL-NEXT:     amd_machine_version_major = 7
108; GISEL-NEXT:     amd_machine_version_minor = 0
109; GISEL-NEXT:     amd_machine_version_stepping = 0
110; GISEL-NEXT:     kernel_code_entry_byte_offset = 256
111; GISEL-NEXT:     kernel_code_prefetch_byte_size = 0
112; GISEL-NEXT:     granulated_workitem_vgpr_count = 10
113; GISEL-NEXT:     granulated_wavefront_sgpr_count = 8
114; GISEL-NEXT:     priority = 0
115; GISEL-NEXT:     float_mode = 240
116; GISEL-NEXT:     priv = 0
117; GISEL-NEXT:     enable_dx10_clamp = 1
118; GISEL-NEXT:     debug_mode = 0
119; GISEL-NEXT:     enable_ieee_mode = 1
120; GISEL-NEXT:     enable_wgp_mode = 0
121; GISEL-NEXT:     enable_mem_ordered = 0
122; GISEL-NEXT:     enable_fwd_progress = 0
123; GISEL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
124; GISEL-NEXT:     user_sgpr_count = 14
125; GISEL-NEXT:     enable_trap_handler = 0
126; GISEL-NEXT:     enable_sgpr_workgroup_id_x = 1
127; GISEL-NEXT:     enable_sgpr_workgroup_id_y = 1
128; GISEL-NEXT:     enable_sgpr_workgroup_id_z = 1
129; GISEL-NEXT:     enable_sgpr_workgroup_info = 0
130; GISEL-NEXT:     enable_vgpr_workitem_id = 2
131; GISEL-NEXT:     enable_exception_msb = 0
132; GISEL-NEXT:     granulated_lds_size = 0
133; GISEL-NEXT:     enable_exception = 0
134; GISEL-NEXT:     enable_sgpr_private_segment_buffer = 1
135; GISEL-NEXT:     enable_sgpr_dispatch_ptr = 1
136; GISEL-NEXT:     enable_sgpr_queue_ptr = 1
137; GISEL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
138; GISEL-NEXT:     enable_sgpr_dispatch_id = 1
139; GISEL-NEXT:     enable_sgpr_flat_scratch_init = 1
140; GISEL-NEXT:     enable_sgpr_private_segment_size = 0
141; GISEL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
142; GISEL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
143; GISEL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
144; GISEL-NEXT:     enable_wavefront_size32 = 0
145; GISEL-NEXT:     enable_ordered_append_gds = 0
146; GISEL-NEXT:     private_element_size = 1
147; GISEL-NEXT:     is_ptr64 = 1
148; GISEL-NEXT:     is_dynamic_callstack = 1
149; GISEL-NEXT:     is_debug_enabled = 0
150; GISEL-NEXT:     is_xnack_enabled = 0
151; GISEL-NEXT:     workitem_private_segment_byte_size = 16384
152; GISEL-NEXT:     workgroup_group_segment_byte_size = 0
153; GISEL-NEXT:     gds_segment_byte_size = 0
154; GISEL-NEXT:     kernarg_segment_byte_size = 64
155; GISEL-NEXT:     workgroup_fbarrier_count = 0
156; GISEL-NEXT:     wavefront_sgpr_count = 68
157; GISEL-NEXT:     workitem_vgpr_count = 42
158; GISEL-NEXT:     reserved_vgpr_first = 0
159; GISEL-NEXT:     reserved_vgpr_count = 0
160; GISEL-NEXT:     reserved_sgpr_first = 0
161; GISEL-NEXT:     reserved_sgpr_count = 0
162; GISEL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
163; GISEL-NEXT:     debug_private_segment_buffer_sgpr = 0
164; GISEL-NEXT:     kernarg_segment_alignment = 4
165; GISEL-NEXT:     group_segment_alignment = 4
166; GISEL-NEXT:     private_segment_alignment = 4
167; GISEL-NEXT:     wavefront_size = 6
168; GISEL-NEXT:     call_convention = -1
169; GISEL-NEXT:     runtime_loader_kernel_symbol = 0
170; GISEL-NEXT:    .end_amd_kernel_code_t
171; GISEL-NEXT:  ; %bb.0:
172; GISEL-NEXT:    s_mov_b32 s32, 0
173; GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
174; GISEL-NEXT:    s_add_i32 s12, s12, s17
175; GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
176; GISEL-NEXT:    s_add_u32 s0, s0, s17
177; GISEL-NEXT:    s_addc_u32 s1, s1, 0
178; GISEL-NEXT:    s_mov_b32 s13, s15
179; GISEL-NEXT:    s_mov_b32 s12, s14
180; GISEL-NEXT:    s_getpc_b64 s[14:15]
181; GISEL-NEXT:    s_add_u32 s14, s14, gv.fptr0@rel32@lo+4
182; GISEL-NEXT:    s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12
183; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
184; GISEL-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
185; GISEL-NEXT:    s_add_u32 s8, s8, 8
186; GISEL-NEXT:    s_addc_u32 s9, s9, 0
187; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
188; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 20, v2
189; GISEL-NEXT:    v_or_b32_e32 v31, v0, v1
190; GISEL-NEXT:    s_mov_b32 s14, s16
191; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
192; GISEL-NEXT:    s_swappc_b64 s[30:31], s[18:19]
193; GISEL-NEXT:    s_endpgm
194  %fptr = load void()*, void()* addrspace(4)* @gv.fptr0
195  call void %fptr()
196  ret void
197}
198
199define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg(i8) {
200; GCN-LABEL: test_indirect_call_sgpr_ptr_arg:
201; GCN:         .amd_kernel_code_t
202; GCN-NEXT:     amd_code_version_major = 1
203; GCN-NEXT:     amd_code_version_minor = 2
204; GCN-NEXT:     amd_machine_kind = 1
205; GCN-NEXT:     amd_machine_version_major = 7
206; GCN-NEXT:     amd_machine_version_minor = 0
207; GCN-NEXT:     amd_machine_version_stepping = 0
208; GCN-NEXT:     kernel_code_entry_byte_offset = 256
209; GCN-NEXT:     kernel_code_prefetch_byte_size = 0
210; GCN-NEXT:     granulated_workitem_vgpr_count = 10
211; GCN-NEXT:     granulated_wavefront_sgpr_count = 8
212; GCN-NEXT:     priority = 0
213; GCN-NEXT:     float_mode = 240
214; GCN-NEXT:     priv = 0
215; GCN-NEXT:     enable_dx10_clamp = 1
216; GCN-NEXT:     debug_mode = 0
217; GCN-NEXT:     enable_ieee_mode = 1
218; GCN-NEXT:     enable_wgp_mode = 0
219; GCN-NEXT:     enable_mem_ordered = 0
220; GCN-NEXT:     enable_fwd_progress = 0
221; GCN-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
222; GCN-NEXT:     user_sgpr_count = 14
223; GCN-NEXT:     enable_trap_handler = 0
224; GCN-NEXT:     enable_sgpr_workgroup_id_x = 1
225; GCN-NEXT:     enable_sgpr_workgroup_id_y = 1
226; GCN-NEXT:     enable_sgpr_workgroup_id_z = 1
227; GCN-NEXT:     enable_sgpr_workgroup_info = 0
228; GCN-NEXT:     enable_vgpr_workitem_id = 2
229; GCN-NEXT:     enable_exception_msb = 0
230; GCN-NEXT:     granulated_lds_size = 0
231; GCN-NEXT:     enable_exception = 0
232; GCN-NEXT:     enable_sgpr_private_segment_buffer = 1
233; GCN-NEXT:     enable_sgpr_dispatch_ptr = 1
234; GCN-NEXT:     enable_sgpr_queue_ptr = 1
235; GCN-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
236; GCN-NEXT:     enable_sgpr_dispatch_id = 1
237; GCN-NEXT:     enable_sgpr_flat_scratch_init = 1
238; GCN-NEXT:     enable_sgpr_private_segment_size = 0
239; GCN-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
240; GCN-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
241; GCN-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
242; GCN-NEXT:     enable_wavefront_size32 = 0
243; GCN-NEXT:     enable_ordered_append_gds = 0
244; GCN-NEXT:     private_element_size = 1
245; GCN-NEXT:     is_ptr64 = 1
246; GCN-NEXT:     is_dynamic_callstack = 1
247; GCN-NEXT:     is_debug_enabled = 0
248; GCN-NEXT:     is_xnack_enabled = 0
249; GCN-NEXT:     workitem_private_segment_byte_size = 16384
250; GCN-NEXT:     workgroup_group_segment_byte_size = 0
251; GCN-NEXT:     gds_segment_byte_size = 0
252; GCN-NEXT:     kernarg_segment_byte_size = 64
253; GCN-NEXT:     workgroup_fbarrier_count = 0
254; GCN-NEXT:     wavefront_sgpr_count = 68
255; GCN-NEXT:     workitem_vgpr_count = 42
256; GCN-NEXT:     reserved_vgpr_first = 0
257; GCN-NEXT:     reserved_vgpr_count = 0
258; GCN-NEXT:     reserved_sgpr_first = 0
259; GCN-NEXT:     reserved_sgpr_count = 0
260; GCN-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
261; GCN-NEXT:     debug_private_segment_buffer_sgpr = 0
262; GCN-NEXT:     kernarg_segment_alignment = 4
263; GCN-NEXT:     group_segment_alignment = 4
264; GCN-NEXT:     private_segment_alignment = 4
265; GCN-NEXT:     wavefront_size = 6
266; GCN-NEXT:     call_convention = -1
267; GCN-NEXT:     runtime_loader_kernel_symbol = 0
268; GCN-NEXT:    .end_amd_kernel_code_t
269; GCN-NEXT:  ; %bb.0:
270; GCN-NEXT:    s_mov_b32 s32, 0
271; GCN-NEXT:    s_mov_b32 flat_scratch_lo, s13
272; GCN-NEXT:    s_add_i32 s12, s12, s17
273; GCN-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
274; GCN-NEXT:    s_add_u32 s0, s0, s17
275; GCN-NEXT:    s_addc_u32 s1, s1, 0
276; GCN-NEXT:    s_mov_b32 s13, s15
277; GCN-NEXT:    s_mov_b32 s12, s14
278; GCN-NEXT:    s_getpc_b64 s[14:15]
279; GCN-NEXT:    s_add_u32 s14, s14, gv.fptr1@rel32@lo+4
280; GCN-NEXT:    s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12
281; GCN-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
282; GCN-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
283; GCN-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
284; GCN-NEXT:    s_add_u32 s8, s8, 8
285; GCN-NEXT:    s_addc_u32 s9, s9, 0
286; GCN-NEXT:    v_or_b32_e32 v0, v0, v1
287; GCN-NEXT:    v_or_b32_e32 v31, v0, v2
288; GCN-NEXT:    v_mov_b32_e32 v0, 0x7b
289; GCN-NEXT:    s_mov_b32 s14, s16
290; GCN-NEXT:    s_waitcnt lgkmcnt(0)
291; GCN-NEXT:    s_swappc_b64 s[30:31], s[18:19]
292; GCN-NEXT:    s_endpgm
293;
294; GISEL-LABEL: test_indirect_call_sgpr_ptr_arg:
295; GISEL:         .amd_kernel_code_t
296; GISEL-NEXT:     amd_code_version_major = 1
297; GISEL-NEXT:     amd_code_version_minor = 2
298; GISEL-NEXT:     amd_machine_kind = 1
299; GISEL-NEXT:     amd_machine_version_major = 7
300; GISEL-NEXT:     amd_machine_version_minor = 0
301; GISEL-NEXT:     amd_machine_version_stepping = 0
302; GISEL-NEXT:     kernel_code_entry_byte_offset = 256
303; GISEL-NEXT:     kernel_code_prefetch_byte_size = 0
304; GISEL-NEXT:     granulated_workitem_vgpr_count = 10
305; GISEL-NEXT:     granulated_wavefront_sgpr_count = 8
306; GISEL-NEXT:     priority = 0
307; GISEL-NEXT:     float_mode = 240
308; GISEL-NEXT:     priv = 0
309; GISEL-NEXT:     enable_dx10_clamp = 1
310; GISEL-NEXT:     debug_mode = 0
311; GISEL-NEXT:     enable_ieee_mode = 1
312; GISEL-NEXT:     enable_wgp_mode = 0
313; GISEL-NEXT:     enable_mem_ordered = 0
314; GISEL-NEXT:     enable_fwd_progress = 0
315; GISEL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
316; GISEL-NEXT:     user_sgpr_count = 14
317; GISEL-NEXT:     enable_trap_handler = 0
318; GISEL-NEXT:     enable_sgpr_workgroup_id_x = 1
319; GISEL-NEXT:     enable_sgpr_workgroup_id_y = 1
320; GISEL-NEXT:     enable_sgpr_workgroup_id_z = 1
321; GISEL-NEXT:     enable_sgpr_workgroup_info = 0
322; GISEL-NEXT:     enable_vgpr_workitem_id = 2
323; GISEL-NEXT:     enable_exception_msb = 0
324; GISEL-NEXT:     granulated_lds_size = 0
325; GISEL-NEXT:     enable_exception = 0
326; GISEL-NEXT:     enable_sgpr_private_segment_buffer = 1
327; GISEL-NEXT:     enable_sgpr_dispatch_ptr = 1
328; GISEL-NEXT:     enable_sgpr_queue_ptr = 1
329; GISEL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
330; GISEL-NEXT:     enable_sgpr_dispatch_id = 1
331; GISEL-NEXT:     enable_sgpr_flat_scratch_init = 1
332; GISEL-NEXT:     enable_sgpr_private_segment_size = 0
333; GISEL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
334; GISEL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
335; GISEL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
336; GISEL-NEXT:     enable_wavefront_size32 = 0
337; GISEL-NEXT:     enable_ordered_append_gds = 0
338; GISEL-NEXT:     private_element_size = 1
339; GISEL-NEXT:     is_ptr64 = 1
340; GISEL-NEXT:     is_dynamic_callstack = 1
341; GISEL-NEXT:     is_debug_enabled = 0
342; GISEL-NEXT:     is_xnack_enabled = 0
343; GISEL-NEXT:     workitem_private_segment_byte_size = 16384
344; GISEL-NEXT:     workgroup_group_segment_byte_size = 0
345; GISEL-NEXT:     gds_segment_byte_size = 0
346; GISEL-NEXT:     kernarg_segment_byte_size = 64
347; GISEL-NEXT:     workgroup_fbarrier_count = 0
348; GISEL-NEXT:     wavefront_sgpr_count = 68
349; GISEL-NEXT:     workitem_vgpr_count = 42
350; GISEL-NEXT:     reserved_vgpr_first = 0
351; GISEL-NEXT:     reserved_vgpr_count = 0
352; GISEL-NEXT:     reserved_sgpr_first = 0
353; GISEL-NEXT:     reserved_sgpr_count = 0
354; GISEL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
355; GISEL-NEXT:     debug_private_segment_buffer_sgpr = 0
356; GISEL-NEXT:     kernarg_segment_alignment = 4
357; GISEL-NEXT:     group_segment_alignment = 4
358; GISEL-NEXT:     private_segment_alignment = 4
359; GISEL-NEXT:     wavefront_size = 6
360; GISEL-NEXT:     call_convention = -1
361; GISEL-NEXT:     runtime_loader_kernel_symbol = 0
362; GISEL-NEXT:    .end_amd_kernel_code_t
363; GISEL-NEXT:  ; %bb.0:
364; GISEL-NEXT:    s_mov_b32 s32, 0
365; GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
366; GISEL-NEXT:    s_add_i32 s12, s12, s17
367; GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
368; GISEL-NEXT:    s_add_u32 s0, s0, s17
369; GISEL-NEXT:    s_addc_u32 s1, s1, 0
370; GISEL-NEXT:    s_mov_b32 s13, s15
371; GISEL-NEXT:    s_mov_b32 s12, s14
372; GISEL-NEXT:    s_getpc_b64 s[14:15]
373; GISEL-NEXT:    s_add_u32 s14, s14, gv.fptr1@rel32@lo+4
374; GISEL-NEXT:    s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12
375; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
376; GISEL-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
377; GISEL-NEXT:    s_add_u32 s8, s8, 8
378; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
379; GISEL-NEXT:    s_addc_u32 s9, s9, 0
380; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 20, v2
381; GISEL-NEXT:    v_or_b32_e32 v31, v0, v1
382; GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
383; GISEL-NEXT:    s_mov_b32 s14, s16
384; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
385; GISEL-NEXT:    s_swappc_b64 s[30:31], s[18:19]
386; GISEL-NEXT:    s_endpgm
387  %fptr = load void(i32)*, void(i32)* addrspace(4)* @gv.fptr1
388  call void %fptr(i32 123)
389  ret void
390}
391
392define void @test_indirect_call_vgpr_ptr(void()* %fptr) {
393; GCN-LABEL: test_indirect_call_vgpr_ptr:
394; GCN:       ; %bb.0:
395; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
396; GCN-NEXT:    s_or_saveexec_b64 s[16:17], -1
397; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
398; GCN-NEXT:    s_mov_b64 exec, s[16:17]
399; GCN-NEXT:    v_writelane_b32 v40, s33, 18
400; GCN-NEXT:    s_mov_b32 s33, s32
401; GCN-NEXT:    s_addk_i32 s32, 0x400
402; GCN-NEXT:    v_writelane_b32 v40, s30, 0
403; GCN-NEXT:    v_writelane_b32 v40, s31, 1
404; GCN-NEXT:    v_writelane_b32 v40, s34, 2
405; GCN-NEXT:    v_writelane_b32 v40, s35, 3
406; GCN-NEXT:    v_writelane_b32 v40, s36, 4
407; GCN-NEXT:    v_writelane_b32 v40, s37, 5
408; GCN-NEXT:    v_writelane_b32 v40, s38, 6
409; GCN-NEXT:    v_writelane_b32 v40, s39, 7
410; GCN-NEXT:    v_writelane_b32 v40, s40, 8
411; GCN-NEXT:    v_writelane_b32 v40, s41, 9
412; GCN-NEXT:    v_writelane_b32 v40, s42, 10
413; GCN-NEXT:    v_writelane_b32 v40, s43, 11
414; GCN-NEXT:    v_writelane_b32 v40, s44, 12
415; GCN-NEXT:    v_writelane_b32 v40, s45, 13
416; GCN-NEXT:    v_writelane_b32 v40, s46, 14
417; GCN-NEXT:    v_writelane_b32 v40, s47, 15
418; GCN-NEXT:    v_writelane_b32 v40, s48, 16
419; GCN-NEXT:    v_writelane_b32 v40, s49, 17
420; GCN-NEXT:    s_mov_b32 s42, s15
421; GCN-NEXT:    s_mov_b32 s43, s14
422; GCN-NEXT:    s_mov_b32 s44, s13
423; GCN-NEXT:    s_mov_b32 s45, s12
424; GCN-NEXT:    s_mov_b64 s[34:35], s[10:11]
425; GCN-NEXT:    s_mov_b64 s[36:37], s[8:9]
426; GCN-NEXT:    s_mov_b64 s[38:39], s[6:7]
427; GCN-NEXT:    s_mov_b64 s[40:41], s[4:5]
428; GCN-NEXT:    s_mov_b64 s[46:47], exec
429; GCN-NEXT:  .LBB2_1: ; =>This Inner Loop Header: Depth=1
430; GCN-NEXT:    v_readfirstlane_b32 s16, v0
431; GCN-NEXT:    v_readfirstlane_b32 s17, v1
432; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
433; GCN-NEXT:    s_and_saveexec_b64 s[48:49], vcc
434; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
435; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
436; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
437; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
438; GCN-NEXT:    s_mov_b32 s12, s45
439; GCN-NEXT:    s_mov_b32 s13, s44
440; GCN-NEXT:    s_mov_b32 s14, s43
441; GCN-NEXT:    s_mov_b32 s15, s42
442; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
443; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
444; GCN-NEXT:    ; implicit-def: $vgpr31
445; GCN-NEXT:    s_xor_b64 exec, exec, s[48:49]
446; GCN-NEXT:    s_cbranch_execnz .LBB2_1
447; GCN-NEXT:  ; %bb.2:
448; GCN-NEXT:    s_mov_b64 exec, s[46:47]
449; GCN-NEXT:    v_readlane_b32 s49, v40, 17
450; GCN-NEXT:    v_readlane_b32 s48, v40, 16
451; GCN-NEXT:    v_readlane_b32 s47, v40, 15
452; GCN-NEXT:    v_readlane_b32 s46, v40, 14
453; GCN-NEXT:    v_readlane_b32 s45, v40, 13
454; GCN-NEXT:    v_readlane_b32 s44, v40, 12
455; GCN-NEXT:    v_readlane_b32 s43, v40, 11
456; GCN-NEXT:    v_readlane_b32 s42, v40, 10
457; GCN-NEXT:    v_readlane_b32 s41, v40, 9
458; GCN-NEXT:    v_readlane_b32 s40, v40, 8
459; GCN-NEXT:    v_readlane_b32 s39, v40, 7
460; GCN-NEXT:    v_readlane_b32 s38, v40, 6
461; GCN-NEXT:    v_readlane_b32 s37, v40, 5
462; GCN-NEXT:    v_readlane_b32 s36, v40, 4
463; GCN-NEXT:    v_readlane_b32 s35, v40, 3
464; GCN-NEXT:    v_readlane_b32 s34, v40, 2
465; GCN-NEXT:    v_readlane_b32 s31, v40, 1
466; GCN-NEXT:    v_readlane_b32 s30, v40, 0
467; GCN-NEXT:    s_addk_i32 s32, 0xfc00
468; GCN-NEXT:    v_readlane_b32 s33, v40, 18
469; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
470; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
471; GCN-NEXT:    s_mov_b64 exec, s[4:5]
472; GCN-NEXT:    s_waitcnt vmcnt(0)
473; GCN-NEXT:    s_setpc_b64 s[30:31]
474;
475; GISEL-LABEL: test_indirect_call_vgpr_ptr:
476; GISEL:       ; %bb.0:
477; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
478; GISEL-NEXT:    s_or_saveexec_b64 s[16:17], -1
479; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
480; GISEL-NEXT:    s_mov_b64 exec, s[16:17]
481; GISEL-NEXT:    v_writelane_b32 v40, s33, 18
482; GISEL-NEXT:    s_mov_b32 s33, s32
483; GISEL-NEXT:    s_addk_i32 s32, 0x400
484; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
485; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
486; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
487; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
488; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
489; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
490; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
491; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
492; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
493; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
494; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
495; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
496; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
497; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
498; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
499; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
500; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
501; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
502; GISEL-NEXT:    s_mov_b32 s42, s15
503; GISEL-NEXT:    s_mov_b32 s43, s14
504; GISEL-NEXT:    s_mov_b32 s44, s13
505; GISEL-NEXT:    s_mov_b32 s45, s12
506; GISEL-NEXT:    s_mov_b64 s[34:35], s[10:11]
507; GISEL-NEXT:    s_mov_b64 s[36:37], s[8:9]
508; GISEL-NEXT:    s_mov_b64 s[38:39], s[6:7]
509; GISEL-NEXT:    s_mov_b64 s[40:41], s[4:5]
510; GISEL-NEXT:    s_mov_b64 s[46:47], exec
511; GISEL-NEXT:  .LBB2_1: ; =>This Inner Loop Header: Depth=1
512; GISEL-NEXT:    v_readfirstlane_b32 s16, v0
513; GISEL-NEXT:    v_readfirstlane_b32 s17, v1
514; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
515; GISEL-NEXT:    s_and_saveexec_b64 s[48:49], vcc
516; GISEL-NEXT:    s_mov_b64 s[4:5], s[40:41]
517; GISEL-NEXT:    s_mov_b64 s[6:7], s[38:39]
518; GISEL-NEXT:    s_mov_b64 s[8:9], s[36:37]
519; GISEL-NEXT:    s_mov_b64 s[10:11], s[34:35]
520; GISEL-NEXT:    s_mov_b32 s12, s45
521; GISEL-NEXT:    s_mov_b32 s13, s44
522; GISEL-NEXT:    s_mov_b32 s14, s43
523; GISEL-NEXT:    s_mov_b32 s15, s42
524; GISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
525; GISEL-NEXT:    ; implicit-def: $vgpr0
526; GISEL-NEXT:    ; implicit-def: $vgpr31
527; GISEL-NEXT:    s_xor_b64 exec, exec, s[48:49]
528; GISEL-NEXT:    s_cbranch_execnz .LBB2_1
529; GISEL-NEXT:  ; %bb.2:
530; GISEL-NEXT:    s_mov_b64 exec, s[46:47]
531; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
532; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
533; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
534; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
535; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
536; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
537; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
538; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
539; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
540; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
541; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
542; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
543; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
544; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
545; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
546; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
547; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
548; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
549; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
550; GISEL-NEXT:    v_readlane_b32 s33, v40, 18
551; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
552; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
553; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
554; GISEL-NEXT:    s_waitcnt vmcnt(0)
555; GISEL-NEXT:    s_setpc_b64 s[30:31]
556  call void %fptr()
557  ret void
558}
559
560define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) {
561; GCN-LABEL: test_indirect_call_vgpr_ptr_arg:
562; GCN:       ; %bb.0:
563; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
564; GCN-NEXT:    s_or_saveexec_b64 s[16:17], -1
565; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
566; GCN-NEXT:    s_mov_b64 exec, s[16:17]
567; GCN-NEXT:    v_writelane_b32 v40, s33, 18
568; GCN-NEXT:    s_mov_b32 s33, s32
569; GCN-NEXT:    s_addk_i32 s32, 0x400
570; GCN-NEXT:    v_writelane_b32 v40, s30, 0
571; GCN-NEXT:    v_writelane_b32 v40, s31, 1
572; GCN-NEXT:    v_writelane_b32 v40, s34, 2
573; GCN-NEXT:    v_writelane_b32 v40, s35, 3
574; GCN-NEXT:    v_writelane_b32 v40, s36, 4
575; GCN-NEXT:    v_writelane_b32 v40, s37, 5
576; GCN-NEXT:    v_writelane_b32 v40, s38, 6
577; GCN-NEXT:    v_writelane_b32 v40, s39, 7
578; GCN-NEXT:    v_writelane_b32 v40, s40, 8
579; GCN-NEXT:    v_writelane_b32 v40, s41, 9
580; GCN-NEXT:    v_writelane_b32 v40, s42, 10
581; GCN-NEXT:    v_writelane_b32 v40, s43, 11
582; GCN-NEXT:    v_writelane_b32 v40, s44, 12
583; GCN-NEXT:    v_writelane_b32 v40, s45, 13
584; GCN-NEXT:    v_writelane_b32 v40, s46, 14
585; GCN-NEXT:    v_writelane_b32 v40, s47, 15
586; GCN-NEXT:    v_writelane_b32 v40, s48, 16
587; GCN-NEXT:    v_writelane_b32 v40, s49, 17
588; GCN-NEXT:    s_mov_b32 s42, s15
589; GCN-NEXT:    s_mov_b32 s43, s14
590; GCN-NEXT:    s_mov_b32 s44, s13
591; GCN-NEXT:    s_mov_b32 s45, s12
592; GCN-NEXT:    s_mov_b64 s[34:35], s[10:11]
593; GCN-NEXT:    s_mov_b64 s[36:37], s[8:9]
594; GCN-NEXT:    s_mov_b64 s[38:39], s[6:7]
595; GCN-NEXT:    s_mov_b64 s[40:41], s[4:5]
596; GCN-NEXT:    s_mov_b64 s[46:47], exec
597; GCN-NEXT:    v_mov_b32_e32 v2, 0x7b
598; GCN-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
599; GCN-NEXT:    v_readfirstlane_b32 s16, v0
600; GCN-NEXT:    v_readfirstlane_b32 s17, v1
601; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
602; GCN-NEXT:    s_and_saveexec_b64 s[48:49], vcc
603; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
604; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
605; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
606; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
607; GCN-NEXT:    s_mov_b32 s12, s45
608; GCN-NEXT:    s_mov_b32 s13, s44
609; GCN-NEXT:    s_mov_b32 s14, s43
610; GCN-NEXT:    s_mov_b32 s15, s42
611; GCN-NEXT:    v_mov_b32_e32 v0, v2
612; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
613; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
614; GCN-NEXT:    ; implicit-def: $vgpr31
615; GCN-NEXT:    ; implicit-def: $vgpr2
616; GCN-NEXT:    s_xor_b64 exec, exec, s[48:49]
617; GCN-NEXT:    s_cbranch_execnz .LBB3_1
618; GCN-NEXT:  ; %bb.2:
619; GCN-NEXT:    s_mov_b64 exec, s[46:47]
620; GCN-NEXT:    v_readlane_b32 s49, v40, 17
621; GCN-NEXT:    v_readlane_b32 s48, v40, 16
622; GCN-NEXT:    v_readlane_b32 s47, v40, 15
623; GCN-NEXT:    v_readlane_b32 s46, v40, 14
624; GCN-NEXT:    v_readlane_b32 s45, v40, 13
625; GCN-NEXT:    v_readlane_b32 s44, v40, 12
626; GCN-NEXT:    v_readlane_b32 s43, v40, 11
627; GCN-NEXT:    v_readlane_b32 s42, v40, 10
628; GCN-NEXT:    v_readlane_b32 s41, v40, 9
629; GCN-NEXT:    v_readlane_b32 s40, v40, 8
630; GCN-NEXT:    v_readlane_b32 s39, v40, 7
631; GCN-NEXT:    v_readlane_b32 s38, v40, 6
632; GCN-NEXT:    v_readlane_b32 s37, v40, 5
633; GCN-NEXT:    v_readlane_b32 s36, v40, 4
634; GCN-NEXT:    v_readlane_b32 s35, v40, 3
635; GCN-NEXT:    v_readlane_b32 s34, v40, 2
636; GCN-NEXT:    v_readlane_b32 s31, v40, 1
637; GCN-NEXT:    v_readlane_b32 s30, v40, 0
638; GCN-NEXT:    s_addk_i32 s32, 0xfc00
639; GCN-NEXT:    v_readlane_b32 s33, v40, 18
640; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
641; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
642; GCN-NEXT:    s_mov_b64 exec, s[4:5]
643; GCN-NEXT:    s_waitcnt vmcnt(0)
644; GCN-NEXT:    s_setpc_b64 s[30:31]
645;
646; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg:
647; GISEL:       ; %bb.0:
648; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
649; GISEL-NEXT:    s_or_saveexec_b64 s[16:17], -1
650; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
651; GISEL-NEXT:    s_mov_b64 exec, s[16:17]
652; GISEL-NEXT:    v_writelane_b32 v40, s33, 18
653; GISEL-NEXT:    s_mov_b32 s33, s32
654; GISEL-NEXT:    s_addk_i32 s32, 0x400
655; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
656; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
657; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
658; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
659; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
660; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
661; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
662; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
663; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
664; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
665; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
666; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
667; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
668; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
669; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
670; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
671; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
672; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
673; GISEL-NEXT:    s_mov_b32 s42, s15
674; GISEL-NEXT:    s_mov_b32 s43, s14
675; GISEL-NEXT:    s_mov_b32 s44, s13
676; GISEL-NEXT:    s_mov_b32 s45, s12
677; GISEL-NEXT:    s_mov_b64 s[34:35], s[10:11]
678; GISEL-NEXT:    s_mov_b64 s[36:37], s[8:9]
679; GISEL-NEXT:    s_mov_b64 s[38:39], s[6:7]
680; GISEL-NEXT:    s_mov_b64 s[40:41], s[4:5]
681; GISEL-NEXT:    s_mov_b64 s[46:47], exec
682; GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
683; GISEL-NEXT:    v_readfirstlane_b32 s16, v0
684; GISEL-NEXT:    v_readfirstlane_b32 s17, v1
685; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
686; GISEL-NEXT:    s_and_saveexec_b64 s[48:49], vcc
687; GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
688; GISEL-NEXT:    s_mov_b64 s[4:5], s[40:41]
689; GISEL-NEXT:    s_mov_b64 s[6:7], s[38:39]
690; GISEL-NEXT:    s_mov_b64 s[8:9], s[36:37]
691; GISEL-NEXT:    s_mov_b64 s[10:11], s[34:35]
692; GISEL-NEXT:    s_mov_b32 s12, s45
693; GISEL-NEXT:    s_mov_b32 s13, s44
694; GISEL-NEXT:    s_mov_b32 s14, s43
695; GISEL-NEXT:    s_mov_b32 s15, s42
696; GISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
697; GISEL-NEXT:    ; implicit-def: $vgpr0
698; GISEL-NEXT:    ; implicit-def: $vgpr31
699; GISEL-NEXT:    s_xor_b64 exec, exec, s[48:49]
700; GISEL-NEXT:    s_cbranch_execnz .LBB3_1
701; GISEL-NEXT:  ; %bb.2:
702; GISEL-NEXT:    s_mov_b64 exec, s[46:47]
703; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
704; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
705; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
706; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
707; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
708; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
709; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
710; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
711; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
712; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
713; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
714; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
715; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
716; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
717; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
718; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
719; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
720; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
721; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
722; GISEL-NEXT:    v_readlane_b32 s33, v40, 18
723; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
724; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
725; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
726; GISEL-NEXT:    s_waitcnt vmcnt(0)
727; GISEL-NEXT:    s_setpc_b64 s[30:31]
728  call void %fptr(i32 123)
729  ret void
730}
731
732define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) {
733; GCN-LABEL: test_indirect_call_vgpr_ptr_ret:
734; GCN:       ; %bb.0:
735; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
736; GCN-NEXT:    s_or_saveexec_b64 s[16:17], -1
737; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
738; GCN-NEXT:    s_mov_b64 exec, s[16:17]
739; GCN-NEXT:    v_writelane_b32 v40, s33, 18
740; GCN-NEXT:    s_mov_b32 s33, s32
741; GCN-NEXT:    s_addk_i32 s32, 0x400
742; GCN-NEXT:    v_writelane_b32 v40, s30, 0
743; GCN-NEXT:    v_writelane_b32 v40, s31, 1
744; GCN-NEXT:    v_writelane_b32 v40, s34, 2
745; GCN-NEXT:    v_writelane_b32 v40, s35, 3
746; GCN-NEXT:    v_writelane_b32 v40, s36, 4
747; GCN-NEXT:    v_writelane_b32 v40, s37, 5
748; GCN-NEXT:    v_writelane_b32 v40, s38, 6
749; GCN-NEXT:    v_writelane_b32 v40, s39, 7
750; GCN-NEXT:    v_writelane_b32 v40, s40, 8
751; GCN-NEXT:    v_writelane_b32 v40, s41, 9
752; GCN-NEXT:    v_writelane_b32 v40, s42, 10
753; GCN-NEXT:    v_writelane_b32 v40, s43, 11
754; GCN-NEXT:    v_writelane_b32 v40, s44, 12
755; GCN-NEXT:    v_writelane_b32 v40, s45, 13
756; GCN-NEXT:    v_writelane_b32 v40, s46, 14
757; GCN-NEXT:    v_writelane_b32 v40, s47, 15
758; GCN-NEXT:    v_writelane_b32 v40, s48, 16
759; GCN-NEXT:    v_writelane_b32 v40, s49, 17
760; GCN-NEXT:    s_mov_b32 s42, s15
761; GCN-NEXT:    s_mov_b32 s43, s14
762; GCN-NEXT:    s_mov_b32 s44, s13
763; GCN-NEXT:    s_mov_b32 s45, s12
764; GCN-NEXT:    s_mov_b64 s[34:35], s[10:11]
765; GCN-NEXT:    s_mov_b64 s[36:37], s[8:9]
766; GCN-NEXT:    s_mov_b64 s[38:39], s[6:7]
767; GCN-NEXT:    s_mov_b64 s[40:41], s[4:5]
768; GCN-NEXT:    s_mov_b64 s[46:47], exec
769; GCN-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
770; GCN-NEXT:    v_readfirstlane_b32 s16, v0
771; GCN-NEXT:    v_readfirstlane_b32 s17, v1
772; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
773; GCN-NEXT:    s_and_saveexec_b64 s[48:49], vcc
774; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
775; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
776; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
777; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
778; GCN-NEXT:    s_mov_b32 s12, s45
779; GCN-NEXT:    s_mov_b32 s13, s44
780; GCN-NEXT:    s_mov_b32 s14, s43
781; GCN-NEXT:    s_mov_b32 s15, s42
782; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
783; GCN-NEXT:    v_mov_b32_e32 v2, v0
784; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
785; GCN-NEXT:    ; implicit-def: $vgpr31
786; GCN-NEXT:    s_xor_b64 exec, exec, s[48:49]
787; GCN-NEXT:    s_cbranch_execnz .LBB4_1
788; GCN-NEXT:  ; %bb.2:
789; GCN-NEXT:    s_mov_b64 exec, s[46:47]
790; GCN-NEXT:    v_add_i32_e32 v0, vcc, 1, v2
791; GCN-NEXT:    v_readlane_b32 s49, v40, 17
792; GCN-NEXT:    v_readlane_b32 s48, v40, 16
793; GCN-NEXT:    v_readlane_b32 s47, v40, 15
794; GCN-NEXT:    v_readlane_b32 s46, v40, 14
795; GCN-NEXT:    v_readlane_b32 s45, v40, 13
796; GCN-NEXT:    v_readlane_b32 s44, v40, 12
797; GCN-NEXT:    v_readlane_b32 s43, v40, 11
798; GCN-NEXT:    v_readlane_b32 s42, v40, 10
799; GCN-NEXT:    v_readlane_b32 s41, v40, 9
800; GCN-NEXT:    v_readlane_b32 s40, v40, 8
801; GCN-NEXT:    v_readlane_b32 s39, v40, 7
802; GCN-NEXT:    v_readlane_b32 s38, v40, 6
803; GCN-NEXT:    v_readlane_b32 s37, v40, 5
804; GCN-NEXT:    v_readlane_b32 s36, v40, 4
805; GCN-NEXT:    v_readlane_b32 s35, v40, 3
806; GCN-NEXT:    v_readlane_b32 s34, v40, 2
807; GCN-NEXT:    v_readlane_b32 s31, v40, 1
808; GCN-NEXT:    v_readlane_b32 s30, v40, 0
809; GCN-NEXT:    s_addk_i32 s32, 0xfc00
810; GCN-NEXT:    v_readlane_b32 s33, v40, 18
811; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
812; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
813; GCN-NEXT:    s_mov_b64 exec, s[4:5]
814; GCN-NEXT:    s_waitcnt vmcnt(0)
815; GCN-NEXT:    s_setpc_b64 s[30:31]
816;
817; GISEL-LABEL: test_indirect_call_vgpr_ptr_ret:
818; GISEL:       ; %bb.0:
819; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
820; GISEL-NEXT:    s_or_saveexec_b64 s[16:17], -1
821; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
822; GISEL-NEXT:    s_mov_b64 exec, s[16:17]
823; GISEL-NEXT:    v_writelane_b32 v40, s33, 18
824; GISEL-NEXT:    s_mov_b32 s33, s32
825; GISEL-NEXT:    s_addk_i32 s32, 0x400
826; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
827; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
828; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
829; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
830; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
831; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
832; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
833; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
834; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
835; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
836; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
837; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
838; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
839; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
840; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
841; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
842; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
843; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
844; GISEL-NEXT:    s_mov_b32 s42, s15
845; GISEL-NEXT:    s_mov_b32 s43, s14
846; GISEL-NEXT:    s_mov_b32 s44, s13
847; GISEL-NEXT:    s_mov_b32 s45, s12
848; GISEL-NEXT:    s_mov_b64 s[34:35], s[10:11]
849; GISEL-NEXT:    s_mov_b64 s[36:37], s[8:9]
850; GISEL-NEXT:    s_mov_b64 s[38:39], s[6:7]
851; GISEL-NEXT:    s_mov_b64 s[40:41], s[4:5]
852; GISEL-NEXT:    s_mov_b64 s[46:47], exec
853; GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
854; GISEL-NEXT:    v_readfirstlane_b32 s16, v0
855; GISEL-NEXT:    v_readfirstlane_b32 s17, v1
856; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
857; GISEL-NEXT:    s_and_saveexec_b64 s[48:49], vcc
858; GISEL-NEXT:    s_mov_b64 s[4:5], s[40:41]
859; GISEL-NEXT:    s_mov_b64 s[6:7], s[38:39]
860; GISEL-NEXT:    s_mov_b64 s[8:9], s[36:37]
861; GISEL-NEXT:    s_mov_b64 s[10:11], s[34:35]
862; GISEL-NEXT:    s_mov_b32 s12, s45
863; GISEL-NEXT:    s_mov_b32 s13, s44
864; GISEL-NEXT:    s_mov_b32 s14, s43
865; GISEL-NEXT:    s_mov_b32 s15, s42
866; GISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
867; GISEL-NEXT:    v_mov_b32_e32 v1, v0
868; GISEL-NEXT:    ; implicit-def: $vgpr0
869; GISEL-NEXT:    ; implicit-def: $vgpr31
870; GISEL-NEXT:    s_xor_b64 exec, exec, s[48:49]
871; GISEL-NEXT:    s_cbranch_execnz .LBB4_1
872; GISEL-NEXT:  ; %bb.2:
873; GISEL-NEXT:    s_mov_b64 exec, s[46:47]
874; GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v1
875; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
876; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
877; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
878; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
879; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
880; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
881; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
882; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
883; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
884; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
885; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
886; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
887; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
888; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
889; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
890; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
891; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
892; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
893; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
894; GISEL-NEXT:    v_readlane_b32 s33, v40, 18
895; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
896; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
897; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
898; GISEL-NEXT:    s_waitcnt vmcnt(0)
899; GISEL-NEXT:    s_setpc_b64 s[30:31]
900  %a = call i32 %fptr()
901  %b = add i32 %a, 1
902  ret i32 %b
903}
904
905define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) {
906; GCN-LABEL: test_indirect_call_vgpr_ptr_in_branch:
907; GCN:       ; %bb.0: ; %bb0
908; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
909; GCN-NEXT:    s_or_saveexec_b64 s[16:17], -1
910; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
911; GCN-NEXT:    s_mov_b64 exec, s[16:17]
912; GCN-NEXT:    v_writelane_b32 v40, s33, 20
913; GCN-NEXT:    s_mov_b32 s33, s32
914; GCN-NEXT:    s_addk_i32 s32, 0x400
915; GCN-NEXT:    v_writelane_b32 v40, s30, 0
916; GCN-NEXT:    v_writelane_b32 v40, s31, 1
917; GCN-NEXT:    v_writelane_b32 v40, s34, 2
918; GCN-NEXT:    v_writelane_b32 v40, s35, 3
919; GCN-NEXT:    v_writelane_b32 v40, s36, 4
920; GCN-NEXT:    v_writelane_b32 v40, s37, 5
921; GCN-NEXT:    v_writelane_b32 v40, s38, 6
922; GCN-NEXT:    v_writelane_b32 v40, s39, 7
923; GCN-NEXT:    v_writelane_b32 v40, s40, 8
924; GCN-NEXT:    v_writelane_b32 v40, s41, 9
925; GCN-NEXT:    v_writelane_b32 v40, s42, 10
926; GCN-NEXT:    v_writelane_b32 v40, s43, 11
927; GCN-NEXT:    v_writelane_b32 v40, s44, 12
928; GCN-NEXT:    v_writelane_b32 v40, s45, 13
929; GCN-NEXT:    v_writelane_b32 v40, s46, 14
930; GCN-NEXT:    v_writelane_b32 v40, s47, 15
931; GCN-NEXT:    v_writelane_b32 v40, s48, 16
932; GCN-NEXT:    v_writelane_b32 v40, s49, 17
933; GCN-NEXT:    v_writelane_b32 v40, s50, 18
934; GCN-NEXT:    v_writelane_b32 v40, s51, 19
935; GCN-NEXT:    s_mov_b32 s42, s15
936; GCN-NEXT:    s_mov_b32 s43, s14
937; GCN-NEXT:    s_mov_b32 s44, s13
938; GCN-NEXT:    s_mov_b32 s45, s12
939; GCN-NEXT:    s_mov_b64 s[34:35], s[10:11]
940; GCN-NEXT:    s_mov_b64 s[36:37], s[8:9]
941; GCN-NEXT:    s_mov_b64 s[38:39], s[6:7]
942; GCN-NEXT:    s_mov_b64 s[40:41], s[4:5]
943; GCN-NEXT:    v_and_b32_e32 v2, 1, v2
944; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
945; GCN-NEXT:    s_and_saveexec_b64 s[46:47], vcc
946; GCN-NEXT:    s_cbranch_execz .LBB5_4
947; GCN-NEXT:  ; %bb.1: ; %bb1
948; GCN-NEXT:    s_mov_b64 s[48:49], exec
949; GCN-NEXT:  .LBB5_2: ; =>This Inner Loop Header: Depth=1
950; GCN-NEXT:    v_readfirstlane_b32 s16, v0
951; GCN-NEXT:    v_readfirstlane_b32 s17, v1
952; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
953; GCN-NEXT:    s_and_saveexec_b64 s[50:51], vcc
954; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
955; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
956; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
957; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
958; GCN-NEXT:    s_mov_b32 s12, s45
959; GCN-NEXT:    s_mov_b32 s13, s44
960; GCN-NEXT:    s_mov_b32 s14, s43
961; GCN-NEXT:    s_mov_b32 s15, s42
962; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
963; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
964; GCN-NEXT:    ; implicit-def: $vgpr31
965; GCN-NEXT:    s_xor_b64 exec, exec, s[50:51]
966; GCN-NEXT:    s_cbranch_execnz .LBB5_2
967; GCN-NEXT:  ; %bb.3:
968; GCN-NEXT:    s_mov_b64 exec, s[48:49]
969; GCN-NEXT:  .LBB5_4: ; %bb2
970; GCN-NEXT:    s_or_b64 exec, exec, s[46:47]
971; GCN-NEXT:    v_readlane_b32 s51, v40, 19
972; GCN-NEXT:    v_readlane_b32 s50, v40, 18
973; GCN-NEXT:    v_readlane_b32 s49, v40, 17
974; GCN-NEXT:    v_readlane_b32 s48, v40, 16
975; GCN-NEXT:    v_readlane_b32 s47, v40, 15
976; GCN-NEXT:    v_readlane_b32 s46, v40, 14
977; GCN-NEXT:    v_readlane_b32 s45, v40, 13
978; GCN-NEXT:    v_readlane_b32 s44, v40, 12
979; GCN-NEXT:    v_readlane_b32 s43, v40, 11
980; GCN-NEXT:    v_readlane_b32 s42, v40, 10
981; GCN-NEXT:    v_readlane_b32 s41, v40, 9
982; GCN-NEXT:    v_readlane_b32 s40, v40, 8
983; GCN-NEXT:    v_readlane_b32 s39, v40, 7
984; GCN-NEXT:    v_readlane_b32 s38, v40, 6
985; GCN-NEXT:    v_readlane_b32 s37, v40, 5
986; GCN-NEXT:    v_readlane_b32 s36, v40, 4
987; GCN-NEXT:    v_readlane_b32 s35, v40, 3
988; GCN-NEXT:    v_readlane_b32 s34, v40, 2
989; GCN-NEXT:    v_readlane_b32 s31, v40, 1
990; GCN-NEXT:    v_readlane_b32 s30, v40, 0
991; GCN-NEXT:    s_addk_i32 s32, 0xfc00
992; GCN-NEXT:    v_readlane_b32 s33, v40, 20
993; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
994; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
995; GCN-NEXT:    s_mov_b64 exec, s[4:5]
996; GCN-NEXT:    s_waitcnt vmcnt(0)
997; GCN-NEXT:    s_setpc_b64 s[30:31]
998;
999; GISEL-LABEL: test_indirect_call_vgpr_ptr_in_branch:
1000; GISEL:       ; %bb.0: ; %bb0
1001; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1002; GISEL-NEXT:    s_or_saveexec_b64 s[16:17], -1
1003; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1004; GISEL-NEXT:    s_mov_b64 exec, s[16:17]
1005; GISEL-NEXT:    v_writelane_b32 v40, s33, 20
1006; GISEL-NEXT:    s_mov_b32 s33, s32
1007; GISEL-NEXT:    s_addk_i32 s32, 0x400
1008; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
1009; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
1010; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
1011; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
1012; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
1013; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
1014; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
1015; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
1016; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
1017; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
1018; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
1019; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
1020; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
1021; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
1022; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
1023; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
1024; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
1025; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
1026; GISEL-NEXT:    v_writelane_b32 v40, s50, 18
1027; GISEL-NEXT:    v_writelane_b32 v40, s51, 19
1028; GISEL-NEXT:    s_mov_b32 s42, s15
1029; GISEL-NEXT:    s_mov_b32 s43, s14
1030; GISEL-NEXT:    s_mov_b32 s44, s13
1031; GISEL-NEXT:    s_mov_b32 s45, s12
1032; GISEL-NEXT:    s_mov_b64 s[34:35], s[10:11]
1033; GISEL-NEXT:    s_mov_b64 s[36:37], s[8:9]
1034; GISEL-NEXT:    s_mov_b64 s[38:39], s[6:7]
1035; GISEL-NEXT:    s_mov_b64 s[40:41], s[4:5]
1036; GISEL-NEXT:    v_and_b32_e32 v2, 1, v2
1037; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
1038; GISEL-NEXT:    s_and_saveexec_b64 s[46:47], vcc
1039; GISEL-NEXT:    s_cbranch_execz .LBB5_4
1040; GISEL-NEXT:  ; %bb.1: ; %bb1
1041; GISEL-NEXT:    s_mov_b64 s[48:49], exec
1042; GISEL-NEXT:  .LBB5_2: ; =>This Inner Loop Header: Depth=1
1043; GISEL-NEXT:    v_readfirstlane_b32 s16, v0
1044; GISEL-NEXT:    v_readfirstlane_b32 s17, v1
1045; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
1046; GISEL-NEXT:    s_and_saveexec_b64 s[50:51], vcc
1047; GISEL-NEXT:    s_mov_b64 s[4:5], s[40:41]
1048; GISEL-NEXT:    s_mov_b64 s[6:7], s[38:39]
1049; GISEL-NEXT:    s_mov_b64 s[8:9], s[36:37]
1050; GISEL-NEXT:    s_mov_b64 s[10:11], s[34:35]
1051; GISEL-NEXT:    s_mov_b32 s12, s45
1052; GISEL-NEXT:    s_mov_b32 s13, s44
1053; GISEL-NEXT:    s_mov_b32 s14, s43
1054; GISEL-NEXT:    s_mov_b32 s15, s42
1055; GISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
1056; GISEL-NEXT:    ; implicit-def: $vgpr0
1057; GISEL-NEXT:    ; implicit-def: $vgpr31
1058; GISEL-NEXT:    s_xor_b64 exec, exec, s[50:51]
1059; GISEL-NEXT:    s_cbranch_execnz .LBB5_2
1060; GISEL-NEXT:  ; %bb.3:
1061; GISEL-NEXT:    s_mov_b64 exec, s[48:49]
1062; GISEL-NEXT:  .LBB5_4: ; %bb2
1063; GISEL-NEXT:    s_or_b64 exec, exec, s[46:47]
1064; GISEL-NEXT:    v_readlane_b32 s51, v40, 19
1065; GISEL-NEXT:    v_readlane_b32 s50, v40, 18
1066; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
1067; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
1068; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
1069; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
1070; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
1071; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
1072; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
1073; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
1074; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
1075; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
1076; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
1077; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
1078; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
1079; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
1080; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
1081; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
1082; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
1083; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
1084; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
1085; GISEL-NEXT:    v_readlane_b32 s33, v40, 20
1086; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
1087; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1088; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
1089; GISEL-NEXT:    s_waitcnt vmcnt(0)
1090; GISEL-NEXT:    s_setpc_b64 s[30:31]
1091bb0:
1092  br i1 %cond, label %bb1, label %bb2
1093
1094bb1:
1095  call void %fptr()
1096  br label %bb2
1097
1098bb2:
1099  ret void
1100}
1101
1102define void @test_indirect_call_vgpr_ptr_inreg_arg(void(i32)* %fptr) {
1103; GCN-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
1104; GCN:       ; %bb.0:
1105; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1106; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
1107; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1108; GCN-NEXT:    s_mov_b64 exec, s[4:5]
1109; GCN-NEXT:    v_writelane_b32 v40, s33, 32
1110; GCN-NEXT:    s_mov_b32 s33, s32
1111; GCN-NEXT:    s_addk_i32 s32, 0x400
1112; GCN-NEXT:    v_writelane_b32 v40, s30, 0
1113; GCN-NEXT:    v_writelane_b32 v40, s31, 1
1114; GCN-NEXT:    v_writelane_b32 v40, s34, 2
1115; GCN-NEXT:    v_writelane_b32 v40, s35, 3
1116; GCN-NEXT:    v_writelane_b32 v40, s36, 4
1117; GCN-NEXT:    v_writelane_b32 v40, s37, 5
1118; GCN-NEXT:    v_writelane_b32 v40, s38, 6
1119; GCN-NEXT:    v_writelane_b32 v40, s39, 7
1120; GCN-NEXT:    v_writelane_b32 v40, s40, 8
1121; GCN-NEXT:    v_writelane_b32 v40, s41, 9
1122; GCN-NEXT:    v_writelane_b32 v40, s42, 10
1123; GCN-NEXT:    v_writelane_b32 v40, s43, 11
1124; GCN-NEXT:    v_writelane_b32 v40, s44, 12
1125; GCN-NEXT:    v_writelane_b32 v40, s45, 13
1126; GCN-NEXT:    v_writelane_b32 v40, s46, 14
1127; GCN-NEXT:    v_writelane_b32 v40, s47, 15
1128; GCN-NEXT:    v_writelane_b32 v40, s48, 16
1129; GCN-NEXT:    v_writelane_b32 v40, s49, 17
1130; GCN-NEXT:    v_writelane_b32 v40, s50, 18
1131; GCN-NEXT:    v_writelane_b32 v40, s51, 19
1132; GCN-NEXT:    v_writelane_b32 v40, s52, 20
1133; GCN-NEXT:    v_writelane_b32 v40, s53, 21
1134; GCN-NEXT:    v_writelane_b32 v40, s54, 22
1135; GCN-NEXT:    v_writelane_b32 v40, s55, 23
1136; GCN-NEXT:    v_writelane_b32 v40, s56, 24
1137; GCN-NEXT:    v_writelane_b32 v40, s57, 25
1138; GCN-NEXT:    v_writelane_b32 v40, s58, 26
1139; GCN-NEXT:    v_writelane_b32 v40, s59, 27
1140; GCN-NEXT:    v_writelane_b32 v40, s60, 28
1141; GCN-NEXT:    v_writelane_b32 v40, s61, 29
1142; GCN-NEXT:    v_writelane_b32 v40, s62, 30
1143; GCN-NEXT:    v_writelane_b32 v40, s63, 31
1144; GCN-NEXT:    s_mov_b64 s[6:7], exec
1145; GCN-NEXT:    s_movk_i32 s4, 0x7b
1146; GCN-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
1147; GCN-NEXT:    v_readfirstlane_b32 s8, v0
1148; GCN-NEXT:    v_readfirstlane_b32 s9, v1
1149; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1]
1150; GCN-NEXT:    s_and_saveexec_b64 s[10:11], vcc
1151; GCN-NEXT:    s_swappc_b64 s[30:31], s[8:9]
1152; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
1153; GCN-NEXT:    s_xor_b64 exec, exec, s[10:11]
1154; GCN-NEXT:    s_cbranch_execnz .LBB6_1
1155; GCN-NEXT:  ; %bb.2:
1156; GCN-NEXT:    s_mov_b64 exec, s[6:7]
1157; GCN-NEXT:    v_readlane_b32 s63, v40, 31
1158; GCN-NEXT:    v_readlane_b32 s62, v40, 30
1159; GCN-NEXT:    v_readlane_b32 s61, v40, 29
1160; GCN-NEXT:    v_readlane_b32 s60, v40, 28
1161; GCN-NEXT:    v_readlane_b32 s59, v40, 27
1162; GCN-NEXT:    v_readlane_b32 s58, v40, 26
1163; GCN-NEXT:    v_readlane_b32 s57, v40, 25
1164; GCN-NEXT:    v_readlane_b32 s56, v40, 24
1165; GCN-NEXT:    v_readlane_b32 s55, v40, 23
1166; GCN-NEXT:    v_readlane_b32 s54, v40, 22
1167; GCN-NEXT:    v_readlane_b32 s53, v40, 21
1168; GCN-NEXT:    v_readlane_b32 s52, v40, 20
1169; GCN-NEXT:    v_readlane_b32 s51, v40, 19
1170; GCN-NEXT:    v_readlane_b32 s50, v40, 18
1171; GCN-NEXT:    v_readlane_b32 s49, v40, 17
1172; GCN-NEXT:    v_readlane_b32 s48, v40, 16
1173; GCN-NEXT:    v_readlane_b32 s47, v40, 15
1174; GCN-NEXT:    v_readlane_b32 s46, v40, 14
1175; GCN-NEXT:    v_readlane_b32 s45, v40, 13
1176; GCN-NEXT:    v_readlane_b32 s44, v40, 12
1177; GCN-NEXT:    v_readlane_b32 s43, v40, 11
1178; GCN-NEXT:    v_readlane_b32 s42, v40, 10
1179; GCN-NEXT:    v_readlane_b32 s41, v40, 9
1180; GCN-NEXT:    v_readlane_b32 s40, v40, 8
1181; GCN-NEXT:    v_readlane_b32 s39, v40, 7
1182; GCN-NEXT:    v_readlane_b32 s38, v40, 6
1183; GCN-NEXT:    v_readlane_b32 s37, v40, 5
1184; GCN-NEXT:    v_readlane_b32 s36, v40, 4
1185; GCN-NEXT:    v_readlane_b32 s35, v40, 3
1186; GCN-NEXT:    v_readlane_b32 s34, v40, 2
1187; GCN-NEXT:    v_readlane_b32 s31, v40, 1
1188; GCN-NEXT:    v_readlane_b32 s30, v40, 0
1189; GCN-NEXT:    s_addk_i32 s32, 0xfc00
1190; GCN-NEXT:    v_readlane_b32 s33, v40, 32
1191; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
1192; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1193; GCN-NEXT:    s_mov_b64 exec, s[4:5]
1194; GCN-NEXT:    s_waitcnt vmcnt(0)
1195; GCN-NEXT:    s_setpc_b64 s[30:31]
1196;
1197; GISEL-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
1198; GISEL:       ; %bb.0:
1199; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1200; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
1201; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1202; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
1203; GISEL-NEXT:    v_writelane_b32 v40, s33, 32
1204; GISEL-NEXT:    s_mov_b32 s33, s32
1205; GISEL-NEXT:    s_addk_i32 s32, 0x400
1206; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
1207; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
1208; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
1209; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
1210; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
1211; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
1212; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
1213; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
1214; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
1215; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
1216; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
1217; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
1218; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
1219; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
1220; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
1221; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
1222; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
1223; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
1224; GISEL-NEXT:    v_writelane_b32 v40, s50, 18
1225; GISEL-NEXT:    v_writelane_b32 v40, s51, 19
1226; GISEL-NEXT:    v_writelane_b32 v40, s52, 20
1227; GISEL-NEXT:    v_writelane_b32 v40, s53, 21
1228; GISEL-NEXT:    v_writelane_b32 v40, s54, 22
1229; GISEL-NEXT:    v_writelane_b32 v40, s55, 23
1230; GISEL-NEXT:    v_writelane_b32 v40, s56, 24
1231; GISEL-NEXT:    v_writelane_b32 v40, s57, 25
1232; GISEL-NEXT:    v_writelane_b32 v40, s58, 26
1233; GISEL-NEXT:    v_writelane_b32 v40, s59, 27
1234; GISEL-NEXT:    v_writelane_b32 v40, s60, 28
1235; GISEL-NEXT:    v_writelane_b32 v40, s61, 29
1236; GISEL-NEXT:    v_writelane_b32 v40, s62, 30
1237; GISEL-NEXT:    v_writelane_b32 v40, s63, 31
1238; GISEL-NEXT:    s_mov_b64 s[6:7], exec
1239; GISEL-NEXT:    s_movk_i32 s4, 0x7b
1240; GISEL-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
1241; GISEL-NEXT:    v_readfirstlane_b32 s8, v0
1242; GISEL-NEXT:    v_readfirstlane_b32 s9, v1
1243; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1]
1244; GISEL-NEXT:    s_and_saveexec_b64 s[10:11], vcc
1245; GISEL-NEXT:    s_swappc_b64 s[30:31], s[8:9]
1246; GISEL-NEXT:    ; implicit-def: $vgpr0
1247; GISEL-NEXT:    s_xor_b64 exec, exec, s[10:11]
1248; GISEL-NEXT:    s_cbranch_execnz .LBB6_1
1249; GISEL-NEXT:  ; %bb.2:
1250; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
1251; GISEL-NEXT:    v_readlane_b32 s63, v40, 31
1252; GISEL-NEXT:    v_readlane_b32 s62, v40, 30
1253; GISEL-NEXT:    v_readlane_b32 s61, v40, 29
1254; GISEL-NEXT:    v_readlane_b32 s60, v40, 28
1255; GISEL-NEXT:    v_readlane_b32 s59, v40, 27
1256; GISEL-NEXT:    v_readlane_b32 s58, v40, 26
1257; GISEL-NEXT:    v_readlane_b32 s57, v40, 25
1258; GISEL-NEXT:    v_readlane_b32 s56, v40, 24
1259; GISEL-NEXT:    v_readlane_b32 s55, v40, 23
1260; GISEL-NEXT:    v_readlane_b32 s54, v40, 22
1261; GISEL-NEXT:    v_readlane_b32 s53, v40, 21
1262; GISEL-NEXT:    v_readlane_b32 s52, v40, 20
1263; GISEL-NEXT:    v_readlane_b32 s51, v40, 19
1264; GISEL-NEXT:    v_readlane_b32 s50, v40, 18
1265; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
1266; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
1267; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
1268; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
1269; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
1270; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
1271; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
1272; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
1273; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
1274; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
1275; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
1276; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
1277; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
1278; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
1279; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
1280; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
1281; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
1282; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
1283; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
1284; GISEL-NEXT:    v_readlane_b32 s33, v40, 32
1285; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
1286; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1287; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
1288; GISEL-NEXT:    s_waitcnt vmcnt(0)
1289; GISEL-NEXT:    s_setpc_b64 s[30:31]
1290  call amdgpu_gfx void %fptr(i32 inreg 123)
1291  ret void
1292}
1293
1294define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, void(i32)* %fptr) {
1295; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
1296; GCN:       ; %bb.0:
1297; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1298; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
1299; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
1300; GCN-NEXT:    s_mov_b64 exec, s[4:5]
1301; GCN-NEXT:    v_writelane_b32 v40, s33, 32
1302; GCN-NEXT:    s_mov_b32 s33, s32
1303; GCN-NEXT:    s_addk_i32 s32, 0x400
1304; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
1305; GCN-NEXT:    v_writelane_b32 v40, s30, 0
1306; GCN-NEXT:    v_writelane_b32 v40, s31, 1
1307; GCN-NEXT:    v_writelane_b32 v40, s34, 2
1308; GCN-NEXT:    v_writelane_b32 v40, s35, 3
1309; GCN-NEXT:    v_writelane_b32 v40, s36, 4
1310; GCN-NEXT:    v_writelane_b32 v40, s37, 5
1311; GCN-NEXT:    v_writelane_b32 v40, s38, 6
1312; GCN-NEXT:    v_writelane_b32 v40, s39, 7
1313; GCN-NEXT:    v_writelane_b32 v40, s40, 8
1314; GCN-NEXT:    v_writelane_b32 v40, s41, 9
1315; GCN-NEXT:    v_writelane_b32 v40, s42, 10
1316; GCN-NEXT:    v_writelane_b32 v40, s43, 11
1317; GCN-NEXT:    v_writelane_b32 v40, s44, 12
1318; GCN-NEXT:    v_writelane_b32 v40, s45, 13
1319; GCN-NEXT:    v_writelane_b32 v40, s46, 14
1320; GCN-NEXT:    v_writelane_b32 v40, s47, 15
1321; GCN-NEXT:    v_writelane_b32 v40, s48, 16
1322; GCN-NEXT:    v_writelane_b32 v40, s49, 17
1323; GCN-NEXT:    v_writelane_b32 v40, s50, 18
1324; GCN-NEXT:    v_writelane_b32 v40, s51, 19
1325; GCN-NEXT:    v_writelane_b32 v40, s52, 20
1326; GCN-NEXT:    v_writelane_b32 v40, s53, 21
1327; GCN-NEXT:    v_writelane_b32 v40, s54, 22
1328; GCN-NEXT:    v_writelane_b32 v40, s55, 23
1329; GCN-NEXT:    v_writelane_b32 v40, s56, 24
1330; GCN-NEXT:    v_writelane_b32 v40, s57, 25
1331; GCN-NEXT:    v_writelane_b32 v40, s58, 26
1332; GCN-NEXT:    v_writelane_b32 v40, s59, 27
1333; GCN-NEXT:    v_writelane_b32 v40, s60, 28
1334; GCN-NEXT:    v_writelane_b32 v40, s61, 29
1335; GCN-NEXT:    v_writelane_b32 v40, s62, 30
1336; GCN-NEXT:    v_writelane_b32 v40, s63, 31
1337; GCN-NEXT:    v_mov_b32_e32 v41, v0
1338; GCN-NEXT:    s_mov_b64 s[4:5], exec
1339; GCN-NEXT:  .LBB7_1: ; =>This Inner Loop Header: Depth=1
1340; GCN-NEXT:    v_readfirstlane_b32 s6, v1
1341; GCN-NEXT:    v_readfirstlane_b32 s7, v2
1342; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
1343; GCN-NEXT:    s_and_saveexec_b64 s[8:9], vcc
1344; GCN-NEXT:    v_mov_b32_e32 v0, v41
1345; GCN-NEXT:    s_swappc_b64 s[30:31], s[6:7]
1346; GCN-NEXT:    ; implicit-def: $vgpr1_vgpr2
1347; GCN-NEXT:    s_xor_b64 exec, exec, s[8:9]
1348; GCN-NEXT:    s_cbranch_execnz .LBB7_1
1349; GCN-NEXT:  ; %bb.2:
1350; GCN-NEXT:    s_mov_b64 exec, s[4:5]
1351; GCN-NEXT:    v_mov_b32_e32 v0, v41
1352; GCN-NEXT:    v_readlane_b32 s63, v40, 31
1353; GCN-NEXT:    v_readlane_b32 s62, v40, 30
1354; GCN-NEXT:    v_readlane_b32 s61, v40, 29
1355; GCN-NEXT:    v_readlane_b32 s60, v40, 28
1356; GCN-NEXT:    v_readlane_b32 s59, v40, 27
1357; GCN-NEXT:    v_readlane_b32 s58, v40, 26
1358; GCN-NEXT:    v_readlane_b32 s57, v40, 25
1359; GCN-NEXT:    v_readlane_b32 s56, v40, 24
1360; GCN-NEXT:    v_readlane_b32 s55, v40, 23
1361; GCN-NEXT:    v_readlane_b32 s54, v40, 22
1362; GCN-NEXT:    v_readlane_b32 s53, v40, 21
1363; GCN-NEXT:    v_readlane_b32 s52, v40, 20
1364; GCN-NEXT:    v_readlane_b32 s51, v40, 19
1365; GCN-NEXT:    v_readlane_b32 s50, v40, 18
1366; GCN-NEXT:    v_readlane_b32 s49, v40, 17
1367; GCN-NEXT:    v_readlane_b32 s48, v40, 16
1368; GCN-NEXT:    v_readlane_b32 s47, v40, 15
1369; GCN-NEXT:    v_readlane_b32 s46, v40, 14
1370; GCN-NEXT:    v_readlane_b32 s45, v40, 13
1371; GCN-NEXT:    v_readlane_b32 s44, v40, 12
1372; GCN-NEXT:    v_readlane_b32 s43, v40, 11
1373; GCN-NEXT:    v_readlane_b32 s42, v40, 10
1374; GCN-NEXT:    v_readlane_b32 s41, v40, 9
1375; GCN-NEXT:    v_readlane_b32 s40, v40, 8
1376; GCN-NEXT:    v_readlane_b32 s39, v40, 7
1377; GCN-NEXT:    v_readlane_b32 s38, v40, 6
1378; GCN-NEXT:    v_readlane_b32 s37, v40, 5
1379; GCN-NEXT:    v_readlane_b32 s36, v40, 4
1380; GCN-NEXT:    v_readlane_b32 s35, v40, 3
1381; GCN-NEXT:    v_readlane_b32 s34, v40, 2
1382; GCN-NEXT:    v_readlane_b32 s31, v40, 1
1383; GCN-NEXT:    v_readlane_b32 s30, v40, 0
1384; GCN-NEXT:    buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
1385; GCN-NEXT:    s_addk_i32 s32, 0xfc00
1386; GCN-NEXT:    v_readlane_b32 s33, v40, 32
1387; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
1388; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
1389; GCN-NEXT:    s_mov_b64 exec, s[4:5]
1390; GCN-NEXT:    s_waitcnt vmcnt(0)
1391; GCN-NEXT:    s_setpc_b64 s[30:31]
1392;
1393; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
1394; GISEL:       ; %bb.0:
1395; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1396; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
1397; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
1398; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
1399; GISEL-NEXT:    v_writelane_b32 v40, s33, 32
1400; GISEL-NEXT:    s_mov_b32 s33, s32
1401; GISEL-NEXT:    s_addk_i32 s32, 0x400
1402; GISEL-NEXT:    buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
1403; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
1404; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
1405; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
1406; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
1407; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
1408; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
1409; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
1410; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
1411; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
1412; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
1413; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
1414; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
1415; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
1416; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
1417; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
1418; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
1419; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
1420; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
1421; GISEL-NEXT:    v_writelane_b32 v40, s50, 18
1422; GISEL-NEXT:    v_writelane_b32 v40, s51, 19
1423; GISEL-NEXT:    v_writelane_b32 v40, s52, 20
1424; GISEL-NEXT:    v_writelane_b32 v40, s53, 21
1425; GISEL-NEXT:    v_writelane_b32 v40, s54, 22
1426; GISEL-NEXT:    v_writelane_b32 v40, s55, 23
1427; GISEL-NEXT:    v_writelane_b32 v40, s56, 24
1428; GISEL-NEXT:    v_writelane_b32 v40, s57, 25
1429; GISEL-NEXT:    v_writelane_b32 v40, s58, 26
1430; GISEL-NEXT:    v_writelane_b32 v40, s59, 27
1431; GISEL-NEXT:    v_writelane_b32 v40, s60, 28
1432; GISEL-NEXT:    v_writelane_b32 v40, s61, 29
1433; GISEL-NEXT:    v_writelane_b32 v40, s62, 30
1434; GISEL-NEXT:    v_writelane_b32 v40, s63, 31
1435; GISEL-NEXT:    v_mov_b32_e32 v41, v0
1436; GISEL-NEXT:    s_mov_b64 s[4:5], exec
1437; GISEL-NEXT:  .LBB7_1: ; =>This Inner Loop Header: Depth=1
1438; GISEL-NEXT:    v_readfirstlane_b32 s6, v1
1439; GISEL-NEXT:    v_readfirstlane_b32 s7, v2
1440; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
1441; GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
1442; GISEL-NEXT:    v_mov_b32_e32 v0, v41
1443; GISEL-NEXT:    s_swappc_b64 s[30:31], s[6:7]
1444; GISEL-NEXT:    ; implicit-def: $vgpr1
1445; GISEL-NEXT:    s_xor_b64 exec, exec, s[8:9]
1446; GISEL-NEXT:    s_cbranch_execnz .LBB7_1
1447; GISEL-NEXT:  ; %bb.2:
1448; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
1449; GISEL-NEXT:    v_mov_b32_e32 v0, v41
1450; GISEL-NEXT:    v_readlane_b32 s63, v40, 31
1451; GISEL-NEXT:    v_readlane_b32 s62, v40, 30
1452; GISEL-NEXT:    v_readlane_b32 s61, v40, 29
1453; GISEL-NEXT:    v_readlane_b32 s60, v40, 28
1454; GISEL-NEXT:    v_readlane_b32 s59, v40, 27
1455; GISEL-NEXT:    v_readlane_b32 s58, v40, 26
1456; GISEL-NEXT:    v_readlane_b32 s57, v40, 25
1457; GISEL-NEXT:    v_readlane_b32 s56, v40, 24
1458; GISEL-NEXT:    v_readlane_b32 s55, v40, 23
1459; GISEL-NEXT:    v_readlane_b32 s54, v40, 22
1460; GISEL-NEXT:    v_readlane_b32 s53, v40, 21
1461; GISEL-NEXT:    v_readlane_b32 s52, v40, 20
1462; GISEL-NEXT:    v_readlane_b32 s51, v40, 19
1463; GISEL-NEXT:    v_readlane_b32 s50, v40, 18
1464; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
1465; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
1466; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
1467; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
1468; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
1469; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
1470; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
1471; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
1472; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
1473; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
1474; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
1475; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
1476; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
1477; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
1478; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
1479; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
1480; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
1481; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
1482; GISEL-NEXT:    buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
1483; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
1484; GISEL-NEXT:    v_readlane_b32 s33, v40, 32
1485; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
1486; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
1487; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
1488; GISEL-NEXT:    s_waitcnt vmcnt(0)
1489; GISEL-NEXT:    s_setpc_b64 s[30:31]
1490  call amdgpu_gfx void %fptr(i32 %i)
1491  ret i32 %i
1492}
1493
1494; Use a variable inside a waterfall loop and use the return variable after the loop.
1495; TODO The argument and return variable could be in the same physical register, but the register
1496; allocator is not able to do that because the return value clashes with the liverange of an
1497; IMPLICIT_DEF of the argument.
1498define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, i32(i32)* %fptr) {
1499; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
1500; GCN:       ; %bb.0:
1501; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1502; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
1503; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1504; GCN-NEXT:    s_mov_b64 exec, s[4:5]
1505; GCN-NEXT:    v_writelane_b32 v40, s33, 32
1506; GCN-NEXT:    s_mov_b32 s33, s32
1507; GCN-NEXT:    s_addk_i32 s32, 0x400
1508; GCN-NEXT:    v_writelane_b32 v40, s30, 0
1509; GCN-NEXT:    v_writelane_b32 v40, s31, 1
1510; GCN-NEXT:    v_writelane_b32 v40, s34, 2
1511; GCN-NEXT:    v_writelane_b32 v40, s35, 3
1512; GCN-NEXT:    v_writelane_b32 v40, s36, 4
1513; GCN-NEXT:    v_writelane_b32 v40, s37, 5
1514; GCN-NEXT:    v_writelane_b32 v40, s38, 6
1515; GCN-NEXT:    v_writelane_b32 v40, s39, 7
1516; GCN-NEXT:    v_writelane_b32 v40, s40, 8
1517; GCN-NEXT:    v_writelane_b32 v40, s41, 9
1518; GCN-NEXT:    v_writelane_b32 v40, s42, 10
1519; GCN-NEXT:    v_writelane_b32 v40, s43, 11
1520; GCN-NEXT:    v_writelane_b32 v40, s44, 12
1521; GCN-NEXT:    v_writelane_b32 v40, s45, 13
1522; GCN-NEXT:    v_writelane_b32 v40, s46, 14
1523; GCN-NEXT:    v_writelane_b32 v40, s47, 15
1524; GCN-NEXT:    v_writelane_b32 v40, s48, 16
1525; GCN-NEXT:    v_writelane_b32 v40, s49, 17
1526; GCN-NEXT:    v_writelane_b32 v40, s50, 18
1527; GCN-NEXT:    v_writelane_b32 v40, s51, 19
1528; GCN-NEXT:    v_writelane_b32 v40, s52, 20
1529; GCN-NEXT:    v_writelane_b32 v40, s53, 21
1530; GCN-NEXT:    v_writelane_b32 v40, s54, 22
1531; GCN-NEXT:    v_writelane_b32 v40, s55, 23
1532; GCN-NEXT:    v_writelane_b32 v40, s56, 24
1533; GCN-NEXT:    v_writelane_b32 v40, s57, 25
1534; GCN-NEXT:    v_writelane_b32 v40, s58, 26
1535; GCN-NEXT:    v_writelane_b32 v40, s59, 27
1536; GCN-NEXT:    v_writelane_b32 v40, s60, 28
1537; GCN-NEXT:    v_writelane_b32 v40, s61, 29
1538; GCN-NEXT:    v_writelane_b32 v40, s62, 30
1539; GCN-NEXT:    v_writelane_b32 v40, s63, 31
1540; GCN-NEXT:    s_mov_b64 s[4:5], exec
1541; GCN-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
1542; GCN-NEXT:    v_readfirstlane_b32 s6, v1
1543; GCN-NEXT:    v_readfirstlane_b32 s7, v2
1544; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
1545; GCN-NEXT:    s_and_saveexec_b64 s[8:9], vcc
1546; GCN-NEXT:    s_swappc_b64 s[30:31], s[6:7]
1547; GCN-NEXT:    v_mov_b32_e32 v3, v0
1548; GCN-NEXT:    ; implicit-def: $vgpr1_vgpr2
1549; GCN-NEXT:    ; implicit-def: $vgpr0
1550; GCN-NEXT:    s_xor_b64 exec, exec, s[8:9]
1551; GCN-NEXT:    s_cbranch_execnz .LBB8_1
1552; GCN-NEXT:  ; %bb.2:
1553; GCN-NEXT:    s_mov_b64 exec, s[4:5]
1554; GCN-NEXT:    v_mov_b32_e32 v0, v3
1555; GCN-NEXT:    v_readlane_b32 s63, v40, 31
1556; GCN-NEXT:    v_readlane_b32 s62, v40, 30
1557; GCN-NEXT:    v_readlane_b32 s61, v40, 29
1558; GCN-NEXT:    v_readlane_b32 s60, v40, 28
1559; GCN-NEXT:    v_readlane_b32 s59, v40, 27
1560; GCN-NEXT:    v_readlane_b32 s58, v40, 26
1561; GCN-NEXT:    v_readlane_b32 s57, v40, 25
1562; GCN-NEXT:    v_readlane_b32 s56, v40, 24
1563; GCN-NEXT:    v_readlane_b32 s55, v40, 23
1564; GCN-NEXT:    v_readlane_b32 s54, v40, 22
1565; GCN-NEXT:    v_readlane_b32 s53, v40, 21
1566; GCN-NEXT:    v_readlane_b32 s52, v40, 20
1567; GCN-NEXT:    v_readlane_b32 s51, v40, 19
1568; GCN-NEXT:    v_readlane_b32 s50, v40, 18
1569; GCN-NEXT:    v_readlane_b32 s49, v40, 17
1570; GCN-NEXT:    v_readlane_b32 s48, v40, 16
1571; GCN-NEXT:    v_readlane_b32 s47, v40, 15
1572; GCN-NEXT:    v_readlane_b32 s46, v40, 14
1573; GCN-NEXT:    v_readlane_b32 s45, v40, 13
1574; GCN-NEXT:    v_readlane_b32 s44, v40, 12
1575; GCN-NEXT:    v_readlane_b32 s43, v40, 11
1576; GCN-NEXT:    v_readlane_b32 s42, v40, 10
1577; GCN-NEXT:    v_readlane_b32 s41, v40, 9
1578; GCN-NEXT:    v_readlane_b32 s40, v40, 8
1579; GCN-NEXT:    v_readlane_b32 s39, v40, 7
1580; GCN-NEXT:    v_readlane_b32 s38, v40, 6
1581; GCN-NEXT:    v_readlane_b32 s37, v40, 5
1582; GCN-NEXT:    v_readlane_b32 s36, v40, 4
1583; GCN-NEXT:    v_readlane_b32 s35, v40, 3
1584; GCN-NEXT:    v_readlane_b32 s34, v40, 2
1585; GCN-NEXT:    v_readlane_b32 s31, v40, 1
1586; GCN-NEXT:    v_readlane_b32 s30, v40, 0
1587; GCN-NEXT:    s_addk_i32 s32, 0xfc00
1588; GCN-NEXT:    v_readlane_b32 s33, v40, 32
1589; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
1590; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1591; GCN-NEXT:    s_mov_b64 exec, s[4:5]
1592; GCN-NEXT:    s_waitcnt vmcnt(0)
1593; GCN-NEXT:    s_setpc_b64 s[30:31]
1594;
1595; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
1596; GISEL:       ; %bb.0:
1597; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1598; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
1599; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1600; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
1601; GISEL-NEXT:    v_writelane_b32 v40, s33, 32
1602; GISEL-NEXT:    s_mov_b32 s33, s32
1603; GISEL-NEXT:    s_addk_i32 s32, 0x400
1604; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
1605; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
1606; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
1607; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
1608; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
1609; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
1610; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
1611; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
1612; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
1613; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
1614; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
1615; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
1616; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
1617; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
1618; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
1619; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
1620; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
1621; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
1622; GISEL-NEXT:    v_writelane_b32 v40, s50, 18
1623; GISEL-NEXT:    v_writelane_b32 v40, s51, 19
1624; GISEL-NEXT:    v_writelane_b32 v40, s52, 20
1625; GISEL-NEXT:    v_writelane_b32 v40, s53, 21
1626; GISEL-NEXT:    v_writelane_b32 v40, s54, 22
1627; GISEL-NEXT:    v_writelane_b32 v40, s55, 23
1628; GISEL-NEXT:    v_writelane_b32 v40, s56, 24
1629; GISEL-NEXT:    v_writelane_b32 v40, s57, 25
1630; GISEL-NEXT:    v_writelane_b32 v40, s58, 26
1631; GISEL-NEXT:    v_writelane_b32 v40, s59, 27
1632; GISEL-NEXT:    v_writelane_b32 v40, s60, 28
1633; GISEL-NEXT:    v_writelane_b32 v40, s61, 29
1634; GISEL-NEXT:    v_writelane_b32 v40, s62, 30
1635; GISEL-NEXT:    v_writelane_b32 v40, s63, 31
1636; GISEL-NEXT:    s_mov_b64 s[4:5], exec
1637; GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
1638; GISEL-NEXT:    v_readfirstlane_b32 s8, v1
1639; GISEL-NEXT:    v_readfirstlane_b32 s9, v2
1640; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
1641; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
1642; GISEL-NEXT:    s_swappc_b64 s[30:31], s[8:9]
1643; GISEL-NEXT:    v_mov_b32_e32 v2, v0
1644; GISEL-NEXT:    ; implicit-def: $vgpr1
1645; GISEL-NEXT:    ; implicit-def: $vgpr0
1646; GISEL-NEXT:    s_xor_b64 exec, exec, s[6:7]
1647; GISEL-NEXT:    s_cbranch_execnz .LBB8_1
1648; GISEL-NEXT:  ; %bb.2:
1649; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
1650; GISEL-NEXT:    v_mov_b32_e32 v0, v2
1651; GISEL-NEXT:    v_readlane_b32 s63, v40, 31
1652; GISEL-NEXT:    v_readlane_b32 s62, v40, 30
1653; GISEL-NEXT:    v_readlane_b32 s61, v40, 29
1654; GISEL-NEXT:    v_readlane_b32 s60, v40, 28
1655; GISEL-NEXT:    v_readlane_b32 s59, v40, 27
1656; GISEL-NEXT:    v_readlane_b32 s58, v40, 26
1657; GISEL-NEXT:    v_readlane_b32 s57, v40, 25
1658; GISEL-NEXT:    v_readlane_b32 s56, v40, 24
1659; GISEL-NEXT:    v_readlane_b32 s55, v40, 23
1660; GISEL-NEXT:    v_readlane_b32 s54, v40, 22
1661; GISEL-NEXT:    v_readlane_b32 s53, v40, 21
1662; GISEL-NEXT:    v_readlane_b32 s52, v40, 20
1663; GISEL-NEXT:    v_readlane_b32 s51, v40, 19
1664; GISEL-NEXT:    v_readlane_b32 s50, v40, 18
1665; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
1666; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
1667; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
1668; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
1669; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
1670; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
1671; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
1672; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
1673; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
1674; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
1675; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
1676; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
1677; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
1678; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
1679; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
1680; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
1681; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
1682; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
1683; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
1684; GISEL-NEXT:    v_readlane_b32 s33, v40, 32
1685; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
1686; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1687; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
1688; GISEL-NEXT:    s_waitcnt vmcnt(0)
1689; GISEL-NEXT:    s_setpc_b64 s[30:31]
1690  %ret = call amdgpu_gfx i32 %fptr(i32 %i)
1691  ret i32 %ret
1692}
1693
1694; Calling a vgpr can never be a tail call.
1695define void @test_indirect_tail_call_vgpr_ptr(void()* %fptr) {
1696; GCN-LABEL: test_indirect_tail_call_vgpr_ptr:
1697; GCN:       ; %bb.0:
1698; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1699; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
1700; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1701; GCN-NEXT:    s_mov_b64 exec, s[4:5]
1702; GCN-NEXT:    v_writelane_b32 v40, s33, 32
1703; GCN-NEXT:    s_mov_b32 s33, s32
1704; GCN-NEXT:    s_addk_i32 s32, 0x400
1705; GCN-NEXT:    v_writelane_b32 v40, s30, 0
1706; GCN-NEXT:    v_writelane_b32 v40, s31, 1
1707; GCN-NEXT:    v_writelane_b32 v40, s34, 2
1708; GCN-NEXT:    v_writelane_b32 v40, s35, 3
1709; GCN-NEXT:    v_writelane_b32 v40, s36, 4
1710; GCN-NEXT:    v_writelane_b32 v40, s37, 5
1711; GCN-NEXT:    v_writelane_b32 v40, s38, 6
1712; GCN-NEXT:    v_writelane_b32 v40, s39, 7
1713; GCN-NEXT:    v_writelane_b32 v40, s40, 8
1714; GCN-NEXT:    v_writelane_b32 v40, s41, 9
1715; GCN-NEXT:    v_writelane_b32 v40, s42, 10
1716; GCN-NEXT:    v_writelane_b32 v40, s43, 11
1717; GCN-NEXT:    v_writelane_b32 v40, s44, 12
1718; GCN-NEXT:    v_writelane_b32 v40, s45, 13
1719; GCN-NEXT:    v_writelane_b32 v40, s46, 14
1720; GCN-NEXT:    v_writelane_b32 v40, s47, 15
1721; GCN-NEXT:    v_writelane_b32 v40, s48, 16
1722; GCN-NEXT:    v_writelane_b32 v40, s49, 17
1723; GCN-NEXT:    v_writelane_b32 v40, s50, 18
1724; GCN-NEXT:    v_writelane_b32 v40, s51, 19
1725; GCN-NEXT:    v_writelane_b32 v40, s52, 20
1726; GCN-NEXT:    v_writelane_b32 v40, s53, 21
1727; GCN-NEXT:    v_writelane_b32 v40, s54, 22
1728; GCN-NEXT:    v_writelane_b32 v40, s55, 23
1729; GCN-NEXT:    v_writelane_b32 v40, s56, 24
1730; GCN-NEXT:    v_writelane_b32 v40, s57, 25
1731; GCN-NEXT:    v_writelane_b32 v40, s58, 26
1732; GCN-NEXT:    v_writelane_b32 v40, s59, 27
1733; GCN-NEXT:    v_writelane_b32 v40, s60, 28
1734; GCN-NEXT:    v_writelane_b32 v40, s61, 29
1735; GCN-NEXT:    v_writelane_b32 v40, s62, 30
1736; GCN-NEXT:    v_writelane_b32 v40, s63, 31
1737; GCN-NEXT:    s_mov_b64 s[4:5], exec
1738; GCN-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
1739; GCN-NEXT:    v_readfirstlane_b32 s6, v0
1740; GCN-NEXT:    v_readfirstlane_b32 s7, v1
1741; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1]
1742; GCN-NEXT:    s_and_saveexec_b64 s[8:9], vcc
1743; GCN-NEXT:    s_swappc_b64 s[30:31], s[6:7]
1744; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
1745; GCN-NEXT:    s_xor_b64 exec, exec, s[8:9]
1746; GCN-NEXT:    s_cbranch_execnz .LBB9_1
1747; GCN-NEXT:  ; %bb.2:
1748; GCN-NEXT:    s_mov_b64 exec, s[4:5]
1749; GCN-NEXT:    v_readlane_b32 s63, v40, 31
1750; GCN-NEXT:    v_readlane_b32 s62, v40, 30
1751; GCN-NEXT:    v_readlane_b32 s61, v40, 29
1752; GCN-NEXT:    v_readlane_b32 s60, v40, 28
1753; GCN-NEXT:    v_readlane_b32 s59, v40, 27
1754; GCN-NEXT:    v_readlane_b32 s58, v40, 26
1755; GCN-NEXT:    v_readlane_b32 s57, v40, 25
1756; GCN-NEXT:    v_readlane_b32 s56, v40, 24
1757; GCN-NEXT:    v_readlane_b32 s55, v40, 23
1758; GCN-NEXT:    v_readlane_b32 s54, v40, 22
1759; GCN-NEXT:    v_readlane_b32 s53, v40, 21
1760; GCN-NEXT:    v_readlane_b32 s52, v40, 20
1761; GCN-NEXT:    v_readlane_b32 s51, v40, 19
1762; GCN-NEXT:    v_readlane_b32 s50, v40, 18
1763; GCN-NEXT:    v_readlane_b32 s49, v40, 17
1764; GCN-NEXT:    v_readlane_b32 s48, v40, 16
1765; GCN-NEXT:    v_readlane_b32 s47, v40, 15
1766; GCN-NEXT:    v_readlane_b32 s46, v40, 14
1767; GCN-NEXT:    v_readlane_b32 s45, v40, 13
1768; GCN-NEXT:    v_readlane_b32 s44, v40, 12
1769; GCN-NEXT:    v_readlane_b32 s43, v40, 11
1770; GCN-NEXT:    v_readlane_b32 s42, v40, 10
1771; GCN-NEXT:    v_readlane_b32 s41, v40, 9
1772; GCN-NEXT:    v_readlane_b32 s40, v40, 8
1773; GCN-NEXT:    v_readlane_b32 s39, v40, 7
1774; GCN-NEXT:    v_readlane_b32 s38, v40, 6
1775; GCN-NEXT:    v_readlane_b32 s37, v40, 5
1776; GCN-NEXT:    v_readlane_b32 s36, v40, 4
1777; GCN-NEXT:    v_readlane_b32 s35, v40, 3
1778; GCN-NEXT:    v_readlane_b32 s34, v40, 2
1779; GCN-NEXT:    v_readlane_b32 s31, v40, 1
1780; GCN-NEXT:    v_readlane_b32 s30, v40, 0
1781; GCN-NEXT:    s_addk_i32 s32, 0xfc00
1782; GCN-NEXT:    v_readlane_b32 s33, v40, 32
1783; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
1784; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1785; GCN-NEXT:    s_mov_b64 exec, s[4:5]
1786; GCN-NEXT:    s_waitcnt vmcnt(0)
1787; GCN-NEXT:    s_setpc_b64 s[30:31]
1788;
1789; GISEL-LABEL: test_indirect_tail_call_vgpr_ptr:
1790; GISEL:       ; %bb.0:
1791; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1792; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
1793; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1794; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
1795; GISEL-NEXT:    v_writelane_b32 v40, s33, 32
1796; GISEL-NEXT:    s_mov_b32 s33, s32
1797; GISEL-NEXT:    s_addk_i32 s32, 0x400
1798; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
1799; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
1800; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
1801; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
1802; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
1803; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
1804; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
1805; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
1806; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
1807; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
1808; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
1809; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
1810; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
1811; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
1812; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
1813; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
1814; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
1815; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
1816; GISEL-NEXT:    v_writelane_b32 v40, s50, 18
1817; GISEL-NEXT:    v_writelane_b32 v40, s51, 19
1818; GISEL-NEXT:    v_writelane_b32 v40, s52, 20
1819; GISEL-NEXT:    v_writelane_b32 v40, s53, 21
1820; GISEL-NEXT:    v_writelane_b32 v40, s54, 22
1821; GISEL-NEXT:    v_writelane_b32 v40, s55, 23
1822; GISEL-NEXT:    v_writelane_b32 v40, s56, 24
1823; GISEL-NEXT:    v_writelane_b32 v40, s57, 25
1824; GISEL-NEXT:    v_writelane_b32 v40, s58, 26
1825; GISEL-NEXT:    v_writelane_b32 v40, s59, 27
1826; GISEL-NEXT:    v_writelane_b32 v40, s60, 28
1827; GISEL-NEXT:    v_writelane_b32 v40, s61, 29
1828; GISEL-NEXT:    v_writelane_b32 v40, s62, 30
1829; GISEL-NEXT:    v_writelane_b32 v40, s63, 31
1830; GISEL-NEXT:    s_mov_b64 s[4:5], exec
1831; GISEL-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
1832; GISEL-NEXT:    v_readfirstlane_b32 s6, v0
1833; GISEL-NEXT:    v_readfirstlane_b32 s7, v1
1834; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1]
1835; GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
1836; GISEL-NEXT:    s_swappc_b64 s[30:31], s[6:7]
1837; GISEL-NEXT:    ; implicit-def: $vgpr0
1838; GISEL-NEXT:    s_xor_b64 exec, exec, s[8:9]
1839; GISEL-NEXT:    s_cbranch_execnz .LBB9_1
1840; GISEL-NEXT:  ; %bb.2:
1841; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
1842; GISEL-NEXT:    v_readlane_b32 s63, v40, 31
1843; GISEL-NEXT:    v_readlane_b32 s62, v40, 30
1844; GISEL-NEXT:    v_readlane_b32 s61, v40, 29
1845; GISEL-NEXT:    v_readlane_b32 s60, v40, 28
1846; GISEL-NEXT:    v_readlane_b32 s59, v40, 27
1847; GISEL-NEXT:    v_readlane_b32 s58, v40, 26
1848; GISEL-NEXT:    v_readlane_b32 s57, v40, 25
1849; GISEL-NEXT:    v_readlane_b32 s56, v40, 24
1850; GISEL-NEXT:    v_readlane_b32 s55, v40, 23
1851; GISEL-NEXT:    v_readlane_b32 s54, v40, 22
1852; GISEL-NEXT:    v_readlane_b32 s53, v40, 21
1853; GISEL-NEXT:    v_readlane_b32 s52, v40, 20
1854; GISEL-NEXT:    v_readlane_b32 s51, v40, 19
1855; GISEL-NEXT:    v_readlane_b32 s50, v40, 18
1856; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
1857; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
1858; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
1859; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
1860; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
1861; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
1862; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
1863; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
1864; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
1865; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
1866; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
1867; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
1868; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
1869; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
1870; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
1871; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
1872; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
1873; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
1874; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
1875; GISEL-NEXT:    v_readlane_b32 s33, v40, 32
1876; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
1877; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1878; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
1879; GISEL-NEXT:    s_waitcnt vmcnt(0)
1880; GISEL-NEXT:    s_setpc_b64 s[30:31]
1881  tail call amdgpu_gfx void %fptr()
1882  ret void
1883}
1884