1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=GISEL %s
4
5@gv.fptr0 = external hidden unnamed_addr addrspace(4) constant void()*, align 4
6@gv.fptr1 = external hidden unnamed_addr addrspace(4) constant void(i32)*, align 4
7
8define amdgpu_kernel void @test_indirect_call_sgpr_ptr(i8) {
9; GCN-LABEL: test_indirect_call_sgpr_ptr:
10; GCN:         .amd_kernel_code_t
11; GCN-NEXT:     amd_code_version_major = 1
12; GCN-NEXT:     amd_code_version_minor = 2
13; GCN-NEXT:     amd_machine_kind = 1
14; GCN-NEXT:     amd_machine_version_major = 7
15; GCN-NEXT:     amd_machine_version_minor = 0
16; GCN-NEXT:     amd_machine_version_stepping = 0
17; GCN-NEXT:     kernel_code_entry_byte_offset = 256
18; GCN-NEXT:     kernel_code_prefetch_byte_size = 0
19; GCN-NEXT:     granulated_workitem_vgpr_count = 7
20; GCN-NEXT:     granulated_wavefront_sgpr_count = 4
21; GCN-NEXT:     priority = 0
22; GCN-NEXT:     float_mode = 240
23; GCN-NEXT:     priv = 0
24; GCN-NEXT:     enable_dx10_clamp = 1
25; GCN-NEXT:     debug_mode = 0
26; GCN-NEXT:     enable_ieee_mode = 1
27; GCN-NEXT:     enable_wgp_mode = 0
28; GCN-NEXT:     enable_mem_ordered = 0
29; GCN-NEXT:     enable_fwd_progress = 0
30; GCN-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
31; GCN-NEXT:     user_sgpr_count = 14
32; GCN-NEXT:     enable_trap_handler = 0
33; GCN-NEXT:     enable_sgpr_workgroup_id_x = 1
34; GCN-NEXT:     enable_sgpr_workgroup_id_y = 1
35; GCN-NEXT:     enable_sgpr_workgroup_id_z = 1
36; GCN-NEXT:     enable_sgpr_workgroup_info = 0
37; GCN-NEXT:     enable_vgpr_workitem_id = 2
38; GCN-NEXT:     enable_exception_msb = 0
39; GCN-NEXT:     granulated_lds_size = 0
40; GCN-NEXT:     enable_exception = 0
41; GCN-NEXT:     enable_sgpr_private_segment_buffer = 1
42; GCN-NEXT:     enable_sgpr_dispatch_ptr = 1
43; GCN-NEXT:     enable_sgpr_queue_ptr = 1
44; GCN-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
45; GCN-NEXT:     enable_sgpr_dispatch_id = 1
46; GCN-NEXT:     enable_sgpr_flat_scratch_init = 1
47; GCN-NEXT:     enable_sgpr_private_segment_size = 0
48; GCN-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
49; GCN-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
50; GCN-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
51; GCN-NEXT:     enable_wavefront_size32 = 0
52; GCN-NEXT:     enable_ordered_append_gds = 0
53; GCN-NEXT:     private_element_size = 1
54; GCN-NEXT:     is_ptr64 = 1
55; GCN-NEXT:     is_dynamic_callstack = 1
56; GCN-NEXT:     is_debug_enabled = 0
57; GCN-NEXT:     is_xnack_enabled = 0
58; GCN-NEXT:     workitem_private_segment_byte_size = 16384
59; GCN-NEXT:     workgroup_group_segment_byte_size = 0
60; GCN-NEXT:     gds_segment_byte_size = 0
61; GCN-NEXT:     kernarg_segment_byte_size = 64
62; GCN-NEXT:     workgroup_fbarrier_count = 0
63; GCN-NEXT:     wavefront_sgpr_count = 37
64; GCN-NEXT:     workitem_vgpr_count = 32
65; GCN-NEXT:     reserved_vgpr_first = 0
66; GCN-NEXT:     reserved_vgpr_count = 0
67; GCN-NEXT:     reserved_sgpr_first = 0
68; GCN-NEXT:     reserved_sgpr_count = 0
69; GCN-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
70; GCN-NEXT:     debug_private_segment_buffer_sgpr = 0
71; GCN-NEXT:     kernarg_segment_alignment = 4
72; GCN-NEXT:     group_segment_alignment = 4
73; GCN-NEXT:     private_segment_alignment = 4
74; GCN-NEXT:     wavefront_size = 6
75; GCN-NEXT:     call_convention = -1
76; GCN-NEXT:     runtime_loader_kernel_symbol = 0
77; GCN-NEXT:    .end_amd_kernel_code_t
78; GCN-NEXT:  ; %bb.0:
79; GCN-NEXT:    s_mov_b32 s32, 0
80; GCN-NEXT:    s_mov_b32 flat_scratch_lo, s13
81; GCN-NEXT:    s_add_i32 s12, s12, s17
82; GCN-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
83; GCN-NEXT:    s_add_u32 s0, s0, s17
84; GCN-NEXT:    s_addc_u32 s1, s1, 0
85; GCN-NEXT:    s_mov_b32 s13, s15
86; GCN-NEXT:    s_mov_b32 s12, s14
87; GCN-NEXT:    s_getpc_b64 s[14:15]
88; GCN-NEXT:    s_add_u32 s14, s14, gv.fptr0@rel32@lo+4
89; GCN-NEXT:    s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12
90; GCN-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
91; GCN-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
92; GCN-NEXT:    s_add_u32 s8, s8, 8
93; GCN-NEXT:    s_addc_u32 s9, s9, 0
94; GCN-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
95; GCN-NEXT:    v_or_b32_e32 v0, v0, v1
96; GCN-NEXT:    v_or_b32_e32 v31, v0, v2
97; GCN-NEXT:    s_mov_b32 s14, s16
98; GCN-NEXT:    s_waitcnt lgkmcnt(0)
99; GCN-NEXT:    s_swappc_b64 s[30:31], s[18:19]
100; GCN-NEXT:    s_endpgm
101;
102; GISEL-LABEL: test_indirect_call_sgpr_ptr:
103; GISEL:         .amd_kernel_code_t
104; GISEL-NEXT:     amd_code_version_major = 1
105; GISEL-NEXT:     amd_code_version_minor = 2
106; GISEL-NEXT:     amd_machine_kind = 1
107; GISEL-NEXT:     amd_machine_version_major = 7
108; GISEL-NEXT:     amd_machine_version_minor = 0
109; GISEL-NEXT:     amd_machine_version_stepping = 0
110; GISEL-NEXT:     kernel_code_entry_byte_offset = 256
111; GISEL-NEXT:     kernel_code_prefetch_byte_size = 0
112; GISEL-NEXT:     granulated_workitem_vgpr_count = 7
113; GISEL-NEXT:     granulated_wavefront_sgpr_count = 4
114; GISEL-NEXT:     priority = 0
115; GISEL-NEXT:     float_mode = 240
116; GISEL-NEXT:     priv = 0
117; GISEL-NEXT:     enable_dx10_clamp = 1
118; GISEL-NEXT:     debug_mode = 0
119; GISEL-NEXT:     enable_ieee_mode = 1
120; GISEL-NEXT:     enable_wgp_mode = 0
121; GISEL-NEXT:     enable_mem_ordered = 0
122; GISEL-NEXT:     enable_fwd_progress = 0
123; GISEL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
124; GISEL-NEXT:     user_sgpr_count = 14
125; GISEL-NEXT:     enable_trap_handler = 0
126; GISEL-NEXT:     enable_sgpr_workgroup_id_x = 1
127; GISEL-NEXT:     enable_sgpr_workgroup_id_y = 1
128; GISEL-NEXT:     enable_sgpr_workgroup_id_z = 1
129; GISEL-NEXT:     enable_sgpr_workgroup_info = 0
130; GISEL-NEXT:     enable_vgpr_workitem_id = 2
131; GISEL-NEXT:     enable_exception_msb = 0
132; GISEL-NEXT:     granulated_lds_size = 0
133; GISEL-NEXT:     enable_exception = 0
134; GISEL-NEXT:     enable_sgpr_private_segment_buffer = 1
135; GISEL-NEXT:     enable_sgpr_dispatch_ptr = 1
136; GISEL-NEXT:     enable_sgpr_queue_ptr = 1
137; GISEL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
138; GISEL-NEXT:     enable_sgpr_dispatch_id = 1
139; GISEL-NEXT:     enable_sgpr_flat_scratch_init = 1
140; GISEL-NEXT:     enable_sgpr_private_segment_size = 0
141; GISEL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
142; GISEL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
143; GISEL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
144; GISEL-NEXT:     enable_wavefront_size32 = 0
145; GISEL-NEXT:     enable_ordered_append_gds = 0
146; GISEL-NEXT:     private_element_size = 1
147; GISEL-NEXT:     is_ptr64 = 1
148; GISEL-NEXT:     is_dynamic_callstack = 1
149; GISEL-NEXT:     is_debug_enabled = 0
150; GISEL-NEXT:     is_xnack_enabled = 0
151; GISEL-NEXT:     workitem_private_segment_byte_size = 16384
152; GISEL-NEXT:     workgroup_group_segment_byte_size = 0
153; GISEL-NEXT:     gds_segment_byte_size = 0
154; GISEL-NEXT:     kernarg_segment_byte_size = 64
155; GISEL-NEXT:     workgroup_fbarrier_count = 0
156; GISEL-NEXT:     wavefront_sgpr_count = 37
157; GISEL-NEXT:     workitem_vgpr_count = 32
158; GISEL-NEXT:     reserved_vgpr_first = 0
159; GISEL-NEXT:     reserved_vgpr_count = 0
160; GISEL-NEXT:     reserved_sgpr_first = 0
161; GISEL-NEXT:     reserved_sgpr_count = 0
162; GISEL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
163; GISEL-NEXT:     debug_private_segment_buffer_sgpr = 0
164; GISEL-NEXT:     kernarg_segment_alignment = 4
165; GISEL-NEXT:     group_segment_alignment = 4
166; GISEL-NEXT:     private_segment_alignment = 4
167; GISEL-NEXT:     wavefront_size = 6
168; GISEL-NEXT:     call_convention = -1
169; GISEL-NEXT:     runtime_loader_kernel_symbol = 0
170; GISEL-NEXT:    .end_amd_kernel_code_t
171; GISEL-NEXT:  ; %bb.0:
172; GISEL-NEXT:    s_mov_b32 s32, 0
173; GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
174; GISEL-NEXT:    s_add_i32 s12, s12, s17
175; GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
176; GISEL-NEXT:    s_add_u32 s0, s0, s17
177; GISEL-NEXT:    s_addc_u32 s1, s1, 0
178; GISEL-NEXT:    s_mov_b32 s13, s15
179; GISEL-NEXT:    s_mov_b32 s12, s14
180; GISEL-NEXT:    s_getpc_b64 s[14:15]
181; GISEL-NEXT:    s_add_u32 s14, s14, gv.fptr0@rel32@lo+4
182; GISEL-NEXT:    s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12
183; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
184; GISEL-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
185; GISEL-NEXT:    s_add_u32 s8, s8, 8
186; GISEL-NEXT:    s_addc_u32 s9, s9, 0
187; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
188; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 20, v2
189; GISEL-NEXT:    v_or_b32_e32 v31, v0, v1
190; GISEL-NEXT:    s_mov_b32 s14, s16
191; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
192; GISEL-NEXT:    s_swappc_b64 s[30:31], s[18:19]
193; GISEL-NEXT:    s_endpgm
194  %fptr = load void()*, void()* addrspace(4)* @gv.fptr0
195  call void %fptr()
196  ret void
197}
198
199define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg(i8) {
200; GCN-LABEL: test_indirect_call_sgpr_ptr_arg:
201; GCN:         .amd_kernel_code_t
202; GCN-NEXT:     amd_code_version_major = 1
203; GCN-NEXT:     amd_code_version_minor = 2
204; GCN-NEXT:     amd_machine_kind = 1
205; GCN-NEXT:     amd_machine_version_major = 7
206; GCN-NEXT:     amd_machine_version_minor = 0
207; GCN-NEXT:     amd_machine_version_stepping = 0
208; GCN-NEXT:     kernel_code_entry_byte_offset = 256
209; GCN-NEXT:     kernel_code_prefetch_byte_size = 0
210; GCN-NEXT:     granulated_workitem_vgpr_count = 7
211; GCN-NEXT:     granulated_wavefront_sgpr_count = 4
212; GCN-NEXT:     priority = 0
213; GCN-NEXT:     float_mode = 240
214; GCN-NEXT:     priv = 0
215; GCN-NEXT:     enable_dx10_clamp = 1
216; GCN-NEXT:     debug_mode = 0
217; GCN-NEXT:     enable_ieee_mode = 1
218; GCN-NEXT:     enable_wgp_mode = 0
219; GCN-NEXT:     enable_mem_ordered = 0
220; GCN-NEXT:     enable_fwd_progress = 0
221; GCN-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
222; GCN-NEXT:     user_sgpr_count = 14
223; GCN-NEXT:     enable_trap_handler = 0
224; GCN-NEXT:     enable_sgpr_workgroup_id_x = 1
225; GCN-NEXT:     enable_sgpr_workgroup_id_y = 1
226; GCN-NEXT:     enable_sgpr_workgroup_id_z = 1
227; GCN-NEXT:     enable_sgpr_workgroup_info = 0
228; GCN-NEXT:     enable_vgpr_workitem_id = 2
229; GCN-NEXT:     enable_exception_msb = 0
230; GCN-NEXT:     granulated_lds_size = 0
231; GCN-NEXT:     enable_exception = 0
232; GCN-NEXT:     enable_sgpr_private_segment_buffer = 1
233; GCN-NEXT:     enable_sgpr_dispatch_ptr = 1
234; GCN-NEXT:     enable_sgpr_queue_ptr = 1
235; GCN-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
236; GCN-NEXT:     enable_sgpr_dispatch_id = 1
237; GCN-NEXT:     enable_sgpr_flat_scratch_init = 1
238; GCN-NEXT:     enable_sgpr_private_segment_size = 0
239; GCN-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
240; GCN-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
241; GCN-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
242; GCN-NEXT:     enable_wavefront_size32 = 0
243; GCN-NEXT:     enable_ordered_append_gds = 0
244; GCN-NEXT:     private_element_size = 1
245; GCN-NEXT:     is_ptr64 = 1
246; GCN-NEXT:     is_dynamic_callstack = 1
247; GCN-NEXT:     is_debug_enabled = 0
248; GCN-NEXT:     is_xnack_enabled = 0
249; GCN-NEXT:     workitem_private_segment_byte_size = 16384
250; GCN-NEXT:     workgroup_group_segment_byte_size = 0
251; GCN-NEXT:     gds_segment_byte_size = 0
252; GCN-NEXT:     kernarg_segment_byte_size = 64
253; GCN-NEXT:     workgroup_fbarrier_count = 0
254; GCN-NEXT:     wavefront_sgpr_count = 37
255; GCN-NEXT:     workitem_vgpr_count = 32
256; GCN-NEXT:     reserved_vgpr_first = 0
257; GCN-NEXT:     reserved_vgpr_count = 0
258; GCN-NEXT:     reserved_sgpr_first = 0
259; GCN-NEXT:     reserved_sgpr_count = 0
260; GCN-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
261; GCN-NEXT:     debug_private_segment_buffer_sgpr = 0
262; GCN-NEXT:     kernarg_segment_alignment = 4
263; GCN-NEXT:     group_segment_alignment = 4
264; GCN-NEXT:     private_segment_alignment = 4
265; GCN-NEXT:     wavefront_size = 6
266; GCN-NEXT:     call_convention = -1
267; GCN-NEXT:     runtime_loader_kernel_symbol = 0
268; GCN-NEXT:    .end_amd_kernel_code_t
269; GCN-NEXT:  ; %bb.0:
270; GCN-NEXT:    s_mov_b32 s32, 0
271; GCN-NEXT:    s_mov_b32 flat_scratch_lo, s13
272; GCN-NEXT:    s_add_i32 s12, s12, s17
273; GCN-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
274; GCN-NEXT:    s_add_u32 s0, s0, s17
275; GCN-NEXT:    s_addc_u32 s1, s1, 0
276; GCN-NEXT:    s_mov_b32 s13, s15
277; GCN-NEXT:    s_mov_b32 s12, s14
278; GCN-NEXT:    s_getpc_b64 s[14:15]
279; GCN-NEXT:    s_add_u32 s14, s14, gv.fptr1@rel32@lo+4
280; GCN-NEXT:    s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12
281; GCN-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
282; GCN-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
283; GCN-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
284; GCN-NEXT:    s_add_u32 s8, s8, 8
285; GCN-NEXT:    s_addc_u32 s9, s9, 0
286; GCN-NEXT:    v_or_b32_e32 v0, v0, v1
287; GCN-NEXT:    v_or_b32_e32 v31, v0, v2
288; GCN-NEXT:    v_mov_b32_e32 v0, 0x7b
289; GCN-NEXT:    s_mov_b32 s14, s16
290; GCN-NEXT:    s_waitcnt lgkmcnt(0)
291; GCN-NEXT:    s_swappc_b64 s[30:31], s[18:19]
292; GCN-NEXT:    s_endpgm
293;
294; GISEL-LABEL: test_indirect_call_sgpr_ptr_arg:
295; GISEL:         .amd_kernel_code_t
296; GISEL-NEXT:     amd_code_version_major = 1
297; GISEL-NEXT:     amd_code_version_minor = 2
298; GISEL-NEXT:     amd_machine_kind = 1
299; GISEL-NEXT:     amd_machine_version_major = 7
300; GISEL-NEXT:     amd_machine_version_minor = 0
301; GISEL-NEXT:     amd_machine_version_stepping = 0
302; GISEL-NEXT:     kernel_code_entry_byte_offset = 256
303; GISEL-NEXT:     kernel_code_prefetch_byte_size = 0
304; GISEL-NEXT:     granulated_workitem_vgpr_count = 7
305; GISEL-NEXT:     granulated_wavefront_sgpr_count = 4
306; GISEL-NEXT:     priority = 0
307; GISEL-NEXT:     float_mode = 240
308; GISEL-NEXT:     priv = 0
309; GISEL-NEXT:     enable_dx10_clamp = 1
310; GISEL-NEXT:     debug_mode = 0
311; GISEL-NEXT:     enable_ieee_mode = 1
312; GISEL-NEXT:     enable_wgp_mode = 0
313; GISEL-NEXT:     enable_mem_ordered = 0
314; GISEL-NEXT:     enable_fwd_progress = 0
315; GISEL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
316; GISEL-NEXT:     user_sgpr_count = 14
317; GISEL-NEXT:     enable_trap_handler = 0
318; GISEL-NEXT:     enable_sgpr_workgroup_id_x = 1
319; GISEL-NEXT:     enable_sgpr_workgroup_id_y = 1
320; GISEL-NEXT:     enable_sgpr_workgroup_id_z = 1
321; GISEL-NEXT:     enable_sgpr_workgroup_info = 0
322; GISEL-NEXT:     enable_vgpr_workitem_id = 2
323; GISEL-NEXT:     enable_exception_msb = 0
324; GISEL-NEXT:     granulated_lds_size = 0
325; GISEL-NEXT:     enable_exception = 0
326; GISEL-NEXT:     enable_sgpr_private_segment_buffer = 1
327; GISEL-NEXT:     enable_sgpr_dispatch_ptr = 1
328; GISEL-NEXT:     enable_sgpr_queue_ptr = 1
329; GISEL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
330; GISEL-NEXT:     enable_sgpr_dispatch_id = 1
331; GISEL-NEXT:     enable_sgpr_flat_scratch_init = 1
332; GISEL-NEXT:     enable_sgpr_private_segment_size = 0
333; GISEL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
334; GISEL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
335; GISEL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
336; GISEL-NEXT:     enable_wavefront_size32 = 0
337; GISEL-NEXT:     enable_ordered_append_gds = 0
338; GISEL-NEXT:     private_element_size = 1
339; GISEL-NEXT:     is_ptr64 = 1
340; GISEL-NEXT:     is_dynamic_callstack = 1
341; GISEL-NEXT:     is_debug_enabled = 0
342; GISEL-NEXT:     is_xnack_enabled = 0
343; GISEL-NEXT:     workitem_private_segment_byte_size = 16384
344; GISEL-NEXT:     workgroup_group_segment_byte_size = 0
345; GISEL-NEXT:     gds_segment_byte_size = 0
346; GISEL-NEXT:     kernarg_segment_byte_size = 64
347; GISEL-NEXT:     workgroup_fbarrier_count = 0
348; GISEL-NEXT:     wavefront_sgpr_count = 37
349; GISEL-NEXT:     workitem_vgpr_count = 32
350; GISEL-NEXT:     reserved_vgpr_first = 0
351; GISEL-NEXT:     reserved_vgpr_count = 0
352; GISEL-NEXT:     reserved_sgpr_first = 0
353; GISEL-NEXT:     reserved_sgpr_count = 0
354; GISEL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
355; GISEL-NEXT:     debug_private_segment_buffer_sgpr = 0
356; GISEL-NEXT:     kernarg_segment_alignment = 4
357; GISEL-NEXT:     group_segment_alignment = 4
358; GISEL-NEXT:     private_segment_alignment = 4
359; GISEL-NEXT:     wavefront_size = 6
360; GISEL-NEXT:     call_convention = -1
361; GISEL-NEXT:     runtime_loader_kernel_symbol = 0
362; GISEL-NEXT:    .end_amd_kernel_code_t
363; GISEL-NEXT:  ; %bb.0:
364; GISEL-NEXT:    s_mov_b32 s32, 0
365; GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
366; GISEL-NEXT:    s_add_i32 s12, s12, s17
367; GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
368; GISEL-NEXT:    s_add_u32 s0, s0, s17
369; GISEL-NEXT:    s_addc_u32 s1, s1, 0
370; GISEL-NEXT:    s_mov_b32 s13, s15
371; GISEL-NEXT:    s_mov_b32 s12, s14
372; GISEL-NEXT:    s_getpc_b64 s[14:15]
373; GISEL-NEXT:    s_add_u32 s14, s14, gv.fptr1@rel32@lo+4
374; GISEL-NEXT:    s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12
375; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
376; GISEL-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
377; GISEL-NEXT:    s_add_u32 s8, s8, 8
378; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
379; GISEL-NEXT:    s_addc_u32 s9, s9, 0
380; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 20, v2
381; GISEL-NEXT:    v_or_b32_e32 v31, v0, v1
382; GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
383; GISEL-NEXT:    s_mov_b32 s14, s16
384; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
385; GISEL-NEXT:    s_swappc_b64 s[30:31], s[18:19]
386; GISEL-NEXT:    s_endpgm
387  %fptr = load void(i32)*, void(i32)* addrspace(4)* @gv.fptr1
388  call void %fptr(i32 123)
389  ret void
390}
391
392define void @test_indirect_call_vgpr_ptr(void()* %fptr) {
393; GCN-LABEL: test_indirect_call_vgpr_ptr:
394; GCN:       ; %bb.0:
395; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
396; GCN-NEXT:    s_or_saveexec_b64 s[16:17], -1
397; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
398; GCN-NEXT:    s_mov_b64 exec, s[16:17]
399; GCN-NEXT:    v_writelane_b32 v40, s33, 17
400; GCN-NEXT:    s_mov_b32 s33, s32
401; GCN-NEXT:    s_addk_i32 s32, 0x400
402; GCN-NEXT:    v_writelane_b32 v40, s34, 0
403; GCN-NEXT:    v_writelane_b32 v40, s35, 1
404; GCN-NEXT:    v_writelane_b32 v40, s36, 2
405; GCN-NEXT:    v_writelane_b32 v40, s37, 3
406; GCN-NEXT:    v_writelane_b32 v40, s38, 4
407; GCN-NEXT:    v_writelane_b32 v40, s39, 5
408; GCN-NEXT:    v_writelane_b32 v40, s40, 6
409; GCN-NEXT:    v_writelane_b32 v40, s41, 7
410; GCN-NEXT:    v_writelane_b32 v40, s42, 8
411; GCN-NEXT:    v_writelane_b32 v40, s43, 9
412; GCN-NEXT:    v_writelane_b32 v40, s44, 10
413; GCN-NEXT:    v_writelane_b32 v40, s46, 11
414; GCN-NEXT:    v_writelane_b32 v40, s47, 12
415; GCN-NEXT:    v_writelane_b32 v40, s48, 13
416; GCN-NEXT:    v_writelane_b32 v40, s49, 14
417; GCN-NEXT:    v_writelane_b32 v40, s30, 15
418; GCN-NEXT:    v_writelane_b32 v40, s31, 16
419; GCN-NEXT:    s_mov_b32 s42, s14
420; GCN-NEXT:    s_mov_b32 s43, s13
421; GCN-NEXT:    s_mov_b32 s44, s12
422; GCN-NEXT:    s_mov_b64 s[34:35], s[10:11]
423; GCN-NEXT:    s_mov_b64 s[36:37], s[8:9]
424; GCN-NEXT:    s_mov_b64 s[38:39], s[6:7]
425; GCN-NEXT:    s_mov_b64 s[40:41], s[4:5]
426; GCN-NEXT:    s_mov_b64 s[46:47], exec
427; GCN-NEXT:  .LBB2_1: ; =>This Inner Loop Header: Depth=1
428; GCN-NEXT:    v_readfirstlane_b32 s16, v0
429; GCN-NEXT:    v_readfirstlane_b32 s17, v1
430; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
431; GCN-NEXT:    s_and_saveexec_b64 s[48:49], vcc
432; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
433; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
434; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
435; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
436; GCN-NEXT:    s_mov_b32 s12, s44
437; GCN-NEXT:    s_mov_b32 s13, s43
438; GCN-NEXT:    s_mov_b32 s14, s42
439; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
440; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
441; GCN-NEXT:    ; implicit-def: $vgpr31
442; GCN-NEXT:    s_xor_b64 exec, exec, s[48:49]
443; GCN-NEXT:    s_cbranch_execnz .LBB2_1
444; GCN-NEXT:  ; %bb.2:
445; GCN-NEXT:    s_mov_b64 exec, s[46:47]
446; GCN-NEXT:    v_readlane_b32 s4, v40, 15
447; GCN-NEXT:    v_readlane_b32 s5, v40, 16
448; GCN-NEXT:    v_readlane_b32 s49, v40, 14
449; GCN-NEXT:    v_readlane_b32 s48, v40, 13
450; GCN-NEXT:    v_readlane_b32 s47, v40, 12
451; GCN-NEXT:    v_readlane_b32 s46, v40, 11
452; GCN-NEXT:    v_readlane_b32 s44, v40, 10
453; GCN-NEXT:    v_readlane_b32 s43, v40, 9
454; GCN-NEXT:    v_readlane_b32 s42, v40, 8
455; GCN-NEXT:    v_readlane_b32 s41, v40, 7
456; GCN-NEXT:    v_readlane_b32 s40, v40, 6
457; GCN-NEXT:    v_readlane_b32 s39, v40, 5
458; GCN-NEXT:    v_readlane_b32 s38, v40, 4
459; GCN-NEXT:    v_readlane_b32 s37, v40, 3
460; GCN-NEXT:    v_readlane_b32 s36, v40, 2
461; GCN-NEXT:    v_readlane_b32 s35, v40, 1
462; GCN-NEXT:    v_readlane_b32 s34, v40, 0
463; GCN-NEXT:    s_addk_i32 s32, 0xfc00
464; GCN-NEXT:    v_readlane_b32 s33, v40, 17
465; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
466; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
467; GCN-NEXT:    s_mov_b64 exec, s[6:7]
468; GCN-NEXT:    s_waitcnt vmcnt(0)
469; GCN-NEXT:    s_setpc_b64 s[4:5]
470;
471; GISEL-LABEL: test_indirect_call_vgpr_ptr:
472; GISEL:       ; %bb.0:
473; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
474; GISEL-NEXT:    s_or_saveexec_b64 s[16:17], -1
475; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
476; GISEL-NEXT:    s_mov_b64 exec, s[16:17]
477; GISEL-NEXT:    v_writelane_b32 v40, s33, 17
478; GISEL-NEXT:    s_mov_b32 s33, s32
479; GISEL-NEXT:    s_addk_i32 s32, 0x400
480; GISEL-NEXT:    v_writelane_b32 v40, s34, 0
481; GISEL-NEXT:    v_writelane_b32 v40, s35, 1
482; GISEL-NEXT:    v_writelane_b32 v40, s36, 2
483; GISEL-NEXT:    v_writelane_b32 v40, s37, 3
484; GISEL-NEXT:    v_writelane_b32 v40, s38, 4
485; GISEL-NEXT:    v_writelane_b32 v40, s39, 5
486; GISEL-NEXT:    v_writelane_b32 v40, s40, 6
487; GISEL-NEXT:    v_writelane_b32 v40, s41, 7
488; GISEL-NEXT:    v_writelane_b32 v40, s42, 8
489; GISEL-NEXT:    v_writelane_b32 v40, s43, 9
490; GISEL-NEXT:    v_writelane_b32 v40, s44, 10
491; GISEL-NEXT:    v_writelane_b32 v40, s46, 11
492; GISEL-NEXT:    v_writelane_b32 v40, s47, 12
493; GISEL-NEXT:    v_writelane_b32 v40, s48, 13
494; GISEL-NEXT:    v_writelane_b32 v40, s49, 14
495; GISEL-NEXT:    s_mov_b32 s42, s14
496; GISEL-NEXT:    s_mov_b32 s43, s13
497; GISEL-NEXT:    s_mov_b32 s44, s12
498; GISEL-NEXT:    s_mov_b64 s[34:35], s[10:11]
499; GISEL-NEXT:    s_mov_b64 s[36:37], s[8:9]
500; GISEL-NEXT:    s_mov_b64 s[38:39], s[6:7]
501; GISEL-NEXT:    s_mov_b64 s[40:41], s[4:5]
502; GISEL-NEXT:    v_writelane_b32 v40, s30, 15
503; GISEL-NEXT:    v_writelane_b32 v40, s31, 16
504; GISEL-NEXT:    s_mov_b64 s[46:47], exec
505; GISEL-NEXT:  .LBB2_1: ; =>This Inner Loop Header: Depth=1
506; GISEL-NEXT:    v_readfirstlane_b32 s16, v0
507; GISEL-NEXT:    v_readfirstlane_b32 s17, v1
508; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
509; GISEL-NEXT:    s_and_saveexec_b64 s[48:49], vcc
510; GISEL-NEXT:    s_mov_b64 s[4:5], s[40:41]
511; GISEL-NEXT:    s_mov_b64 s[6:7], s[38:39]
512; GISEL-NEXT:    s_mov_b64 s[8:9], s[36:37]
513; GISEL-NEXT:    s_mov_b64 s[10:11], s[34:35]
514; GISEL-NEXT:    s_mov_b32 s12, s44
515; GISEL-NEXT:    s_mov_b32 s13, s43
516; GISEL-NEXT:    s_mov_b32 s14, s42
517; GISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
518; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1
519; GISEL-NEXT:    ; implicit-def: $vgpr31
520; GISEL-NEXT:    s_xor_b64 exec, exec, s[48:49]
521; GISEL-NEXT:    s_cbranch_execnz .LBB2_1
522; GISEL-NEXT:  ; %bb.2:
523; GISEL-NEXT:    s_mov_b64 exec, s[46:47]
524; GISEL-NEXT:    v_readlane_b32 s4, v40, 15
525; GISEL-NEXT:    v_readlane_b32 s5, v40, 16
526; GISEL-NEXT:    v_readlane_b32 s49, v40, 14
527; GISEL-NEXT:    v_readlane_b32 s48, v40, 13
528; GISEL-NEXT:    v_readlane_b32 s47, v40, 12
529; GISEL-NEXT:    v_readlane_b32 s46, v40, 11
530; GISEL-NEXT:    v_readlane_b32 s44, v40, 10
531; GISEL-NEXT:    v_readlane_b32 s43, v40, 9
532; GISEL-NEXT:    v_readlane_b32 s42, v40, 8
533; GISEL-NEXT:    v_readlane_b32 s41, v40, 7
534; GISEL-NEXT:    v_readlane_b32 s40, v40, 6
535; GISEL-NEXT:    v_readlane_b32 s39, v40, 5
536; GISEL-NEXT:    v_readlane_b32 s38, v40, 4
537; GISEL-NEXT:    v_readlane_b32 s37, v40, 3
538; GISEL-NEXT:    v_readlane_b32 s36, v40, 2
539; GISEL-NEXT:    v_readlane_b32 s35, v40, 1
540; GISEL-NEXT:    v_readlane_b32 s34, v40, 0
541; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
542; GISEL-NEXT:    v_readlane_b32 s33, v40, 17
543; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
544; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
545; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
546; GISEL-NEXT:    s_waitcnt vmcnt(0)
547; GISEL-NEXT:    s_setpc_b64 s[4:5]
548  call void %fptr()
549  ret void
550}
551
552define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) {
553; GCN-LABEL: test_indirect_call_vgpr_ptr_arg:
554; GCN:       ; %bb.0:
555; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
556; GCN-NEXT:    s_or_saveexec_b64 s[16:17], -1
557; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
558; GCN-NEXT:    s_mov_b64 exec, s[16:17]
559; GCN-NEXT:    v_writelane_b32 v40, s33, 17
560; GCN-NEXT:    s_mov_b32 s33, s32
561; GCN-NEXT:    s_addk_i32 s32, 0x400
562; GCN-NEXT:    v_writelane_b32 v40, s34, 0
563; GCN-NEXT:    v_writelane_b32 v40, s35, 1
564; GCN-NEXT:    v_writelane_b32 v40, s36, 2
565; GCN-NEXT:    v_writelane_b32 v40, s37, 3
566; GCN-NEXT:    v_writelane_b32 v40, s38, 4
567; GCN-NEXT:    v_writelane_b32 v40, s39, 5
568; GCN-NEXT:    v_writelane_b32 v40, s40, 6
569; GCN-NEXT:    v_writelane_b32 v40, s41, 7
570; GCN-NEXT:    v_writelane_b32 v40, s42, 8
571; GCN-NEXT:    v_writelane_b32 v40, s43, 9
572; GCN-NEXT:    v_writelane_b32 v40, s44, 10
573; GCN-NEXT:    v_writelane_b32 v40, s46, 11
574; GCN-NEXT:    v_writelane_b32 v40, s47, 12
575; GCN-NEXT:    v_writelane_b32 v40, s48, 13
576; GCN-NEXT:    v_writelane_b32 v40, s49, 14
577; GCN-NEXT:    v_writelane_b32 v40, s30, 15
578; GCN-NEXT:    v_writelane_b32 v40, s31, 16
579; GCN-NEXT:    s_mov_b32 s42, s14
580; GCN-NEXT:    s_mov_b32 s43, s13
581; GCN-NEXT:    s_mov_b32 s44, s12
582; GCN-NEXT:    s_mov_b64 s[34:35], s[10:11]
583; GCN-NEXT:    s_mov_b64 s[36:37], s[8:9]
584; GCN-NEXT:    s_mov_b64 s[38:39], s[6:7]
585; GCN-NEXT:    s_mov_b64 s[40:41], s[4:5]
586; GCN-NEXT:    s_mov_b64 s[46:47], exec
587; GCN-NEXT:    v_mov_b32_e32 v2, 0x7b
588; GCN-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
589; GCN-NEXT:    v_readfirstlane_b32 s16, v0
590; GCN-NEXT:    v_readfirstlane_b32 s17, v1
591; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
592; GCN-NEXT:    s_and_saveexec_b64 s[48:49], vcc
593; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
594; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
595; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
596; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
597; GCN-NEXT:    s_mov_b32 s12, s44
598; GCN-NEXT:    s_mov_b32 s13, s43
599; GCN-NEXT:    s_mov_b32 s14, s42
600; GCN-NEXT:    v_mov_b32_e32 v0, v2
601; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
602; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
603; GCN-NEXT:    ; implicit-def: $vgpr31
604; GCN-NEXT:    ; implicit-def: $vgpr2
605; GCN-NEXT:    s_xor_b64 exec, exec, s[48:49]
606; GCN-NEXT:    s_cbranch_execnz .LBB3_1
607; GCN-NEXT:  ; %bb.2:
608; GCN-NEXT:    s_mov_b64 exec, s[46:47]
609; GCN-NEXT:    v_readlane_b32 s4, v40, 15
610; GCN-NEXT:    v_readlane_b32 s5, v40, 16
611; GCN-NEXT:    v_readlane_b32 s49, v40, 14
612; GCN-NEXT:    v_readlane_b32 s48, v40, 13
613; GCN-NEXT:    v_readlane_b32 s47, v40, 12
614; GCN-NEXT:    v_readlane_b32 s46, v40, 11
615; GCN-NEXT:    v_readlane_b32 s44, v40, 10
616; GCN-NEXT:    v_readlane_b32 s43, v40, 9
617; GCN-NEXT:    v_readlane_b32 s42, v40, 8
618; GCN-NEXT:    v_readlane_b32 s41, v40, 7
619; GCN-NEXT:    v_readlane_b32 s40, v40, 6
620; GCN-NEXT:    v_readlane_b32 s39, v40, 5
621; GCN-NEXT:    v_readlane_b32 s38, v40, 4
622; GCN-NEXT:    v_readlane_b32 s37, v40, 3
623; GCN-NEXT:    v_readlane_b32 s36, v40, 2
624; GCN-NEXT:    v_readlane_b32 s35, v40, 1
625; GCN-NEXT:    v_readlane_b32 s34, v40, 0
626; GCN-NEXT:    s_addk_i32 s32, 0xfc00
627; GCN-NEXT:    v_readlane_b32 s33, v40, 17
628; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
629; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
630; GCN-NEXT:    s_mov_b64 exec, s[6:7]
631; GCN-NEXT:    s_waitcnt vmcnt(0)
632; GCN-NEXT:    s_setpc_b64 s[4:5]
633;
634; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg:
635; GISEL:       ; %bb.0:
636; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
637; GISEL-NEXT:    s_or_saveexec_b64 s[16:17], -1
638; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
639; GISEL-NEXT:    s_mov_b64 exec, s[16:17]
640; GISEL-NEXT:    v_writelane_b32 v40, s33, 17
641; GISEL-NEXT:    s_mov_b32 s33, s32
642; GISEL-NEXT:    s_addk_i32 s32, 0x400
643; GISEL-NEXT:    v_writelane_b32 v40, s34, 0
644; GISEL-NEXT:    v_writelane_b32 v40, s35, 1
645; GISEL-NEXT:    v_writelane_b32 v40, s36, 2
646; GISEL-NEXT:    v_writelane_b32 v40, s37, 3
647; GISEL-NEXT:    v_writelane_b32 v40, s38, 4
648; GISEL-NEXT:    v_writelane_b32 v40, s39, 5
649; GISEL-NEXT:    v_writelane_b32 v40, s40, 6
650; GISEL-NEXT:    v_writelane_b32 v40, s41, 7
651; GISEL-NEXT:    v_writelane_b32 v40, s42, 8
652; GISEL-NEXT:    v_writelane_b32 v40, s43, 9
653; GISEL-NEXT:    v_writelane_b32 v40, s44, 10
654; GISEL-NEXT:    v_writelane_b32 v40, s46, 11
655; GISEL-NEXT:    v_writelane_b32 v40, s47, 12
656; GISEL-NEXT:    v_writelane_b32 v40, s48, 13
657; GISEL-NEXT:    v_writelane_b32 v40, s49, 14
658; GISEL-NEXT:    s_mov_b32 s42, s14
659; GISEL-NEXT:    s_mov_b32 s43, s13
660; GISEL-NEXT:    s_mov_b32 s44, s12
661; GISEL-NEXT:    s_mov_b64 s[34:35], s[10:11]
662; GISEL-NEXT:    s_mov_b64 s[36:37], s[8:9]
663; GISEL-NEXT:    s_mov_b64 s[38:39], s[6:7]
664; GISEL-NEXT:    s_mov_b64 s[40:41], s[4:5]
665; GISEL-NEXT:    v_writelane_b32 v40, s30, 15
666; GISEL-NEXT:    v_writelane_b32 v40, s31, 16
667; GISEL-NEXT:    s_mov_b64 s[46:47], exec
668; GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
669; GISEL-NEXT:    v_readfirstlane_b32 s16, v0
670; GISEL-NEXT:    v_readfirstlane_b32 s17, v1
671; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
672; GISEL-NEXT:    s_and_saveexec_b64 s[48:49], vcc
673; GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
674; GISEL-NEXT:    s_mov_b64 s[4:5], s[40:41]
675; GISEL-NEXT:    s_mov_b64 s[6:7], s[38:39]
676; GISEL-NEXT:    s_mov_b64 s[8:9], s[36:37]
677; GISEL-NEXT:    s_mov_b64 s[10:11], s[34:35]
678; GISEL-NEXT:    s_mov_b32 s12, s44
679; GISEL-NEXT:    s_mov_b32 s13, s43
680; GISEL-NEXT:    s_mov_b32 s14, s42
681; GISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
682; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1
683; GISEL-NEXT:    ; implicit-def: $vgpr31
684; GISEL-NEXT:    s_xor_b64 exec, exec, s[48:49]
685; GISEL-NEXT:    s_cbranch_execnz .LBB3_1
686; GISEL-NEXT:  ; %bb.2:
687; GISEL-NEXT:    s_mov_b64 exec, s[46:47]
688; GISEL-NEXT:    v_readlane_b32 s4, v40, 15
689; GISEL-NEXT:    v_readlane_b32 s5, v40, 16
690; GISEL-NEXT:    v_readlane_b32 s49, v40, 14
691; GISEL-NEXT:    v_readlane_b32 s48, v40, 13
692; GISEL-NEXT:    v_readlane_b32 s47, v40, 12
693; GISEL-NEXT:    v_readlane_b32 s46, v40, 11
694; GISEL-NEXT:    v_readlane_b32 s44, v40, 10
695; GISEL-NEXT:    v_readlane_b32 s43, v40, 9
696; GISEL-NEXT:    v_readlane_b32 s42, v40, 8
697; GISEL-NEXT:    v_readlane_b32 s41, v40, 7
698; GISEL-NEXT:    v_readlane_b32 s40, v40, 6
699; GISEL-NEXT:    v_readlane_b32 s39, v40, 5
700; GISEL-NEXT:    v_readlane_b32 s38, v40, 4
701; GISEL-NEXT:    v_readlane_b32 s37, v40, 3
702; GISEL-NEXT:    v_readlane_b32 s36, v40, 2
703; GISEL-NEXT:    v_readlane_b32 s35, v40, 1
704; GISEL-NEXT:    v_readlane_b32 s34, v40, 0
705; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
706; GISEL-NEXT:    v_readlane_b32 s33, v40, 17
707; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
708; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
709; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
710; GISEL-NEXT:    s_waitcnt vmcnt(0)
711; GISEL-NEXT:    s_setpc_b64 s[4:5]
712  call void %fptr(i32 123)
713  ret void
714}
715
716define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) {
717; GCN-LABEL: test_indirect_call_vgpr_ptr_ret:
718; GCN:       ; %bb.0:
719; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
720; GCN-NEXT:    s_or_saveexec_b64 s[16:17], -1
721; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
722; GCN-NEXT:    s_mov_b64 exec, s[16:17]
723; GCN-NEXT:    v_writelane_b32 v40, s33, 17
724; GCN-NEXT:    s_mov_b32 s33, s32
725; GCN-NEXT:    s_addk_i32 s32, 0x400
726; GCN-NEXT:    v_writelane_b32 v40, s34, 0
727; GCN-NEXT:    v_writelane_b32 v40, s35, 1
728; GCN-NEXT:    v_writelane_b32 v40, s36, 2
729; GCN-NEXT:    v_writelane_b32 v40, s37, 3
730; GCN-NEXT:    v_writelane_b32 v40, s38, 4
731; GCN-NEXT:    v_writelane_b32 v40, s39, 5
732; GCN-NEXT:    v_writelane_b32 v40, s40, 6
733; GCN-NEXT:    v_writelane_b32 v40, s41, 7
734; GCN-NEXT:    v_writelane_b32 v40, s42, 8
735; GCN-NEXT:    v_writelane_b32 v40, s43, 9
736; GCN-NEXT:    v_writelane_b32 v40, s44, 10
737; GCN-NEXT:    v_writelane_b32 v40, s46, 11
738; GCN-NEXT:    v_writelane_b32 v40, s47, 12
739; GCN-NEXT:    v_writelane_b32 v40, s48, 13
740; GCN-NEXT:    v_writelane_b32 v40, s49, 14
741; GCN-NEXT:    v_writelane_b32 v40, s30, 15
742; GCN-NEXT:    v_writelane_b32 v40, s31, 16
743; GCN-NEXT:    s_mov_b32 s42, s14
744; GCN-NEXT:    s_mov_b32 s43, s13
745; GCN-NEXT:    s_mov_b32 s44, s12
746; GCN-NEXT:    s_mov_b64 s[34:35], s[10:11]
747; GCN-NEXT:    s_mov_b64 s[36:37], s[8:9]
748; GCN-NEXT:    s_mov_b64 s[38:39], s[6:7]
749; GCN-NEXT:    s_mov_b64 s[40:41], s[4:5]
750; GCN-NEXT:    s_mov_b64 s[46:47], exec
751; GCN-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
752; GCN-NEXT:    v_readfirstlane_b32 s16, v0
753; GCN-NEXT:    v_readfirstlane_b32 s17, v1
754; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
755; GCN-NEXT:    s_and_saveexec_b64 s[48:49], vcc
756; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
757; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
758; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
759; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
760; GCN-NEXT:    s_mov_b32 s12, s44
761; GCN-NEXT:    s_mov_b32 s13, s43
762; GCN-NEXT:    s_mov_b32 s14, s42
763; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
764; GCN-NEXT:    v_mov_b32_e32 v2, v0
765; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
766; GCN-NEXT:    ; implicit-def: $vgpr31
767; GCN-NEXT:    s_xor_b64 exec, exec, s[48:49]
768; GCN-NEXT:    s_cbranch_execnz .LBB4_1
769; GCN-NEXT:  ; %bb.2:
770; GCN-NEXT:    s_mov_b64 exec, s[46:47]
771; GCN-NEXT:    v_add_i32_e32 v0, vcc, 1, v2
772; GCN-NEXT:    v_readlane_b32 s4, v40, 15
773; GCN-NEXT:    v_readlane_b32 s5, v40, 16
774; GCN-NEXT:    v_readlane_b32 s49, v40, 14
775; GCN-NEXT:    v_readlane_b32 s48, v40, 13
776; GCN-NEXT:    v_readlane_b32 s47, v40, 12
777; GCN-NEXT:    v_readlane_b32 s46, v40, 11
778; GCN-NEXT:    v_readlane_b32 s44, v40, 10
779; GCN-NEXT:    v_readlane_b32 s43, v40, 9
780; GCN-NEXT:    v_readlane_b32 s42, v40, 8
781; GCN-NEXT:    v_readlane_b32 s41, v40, 7
782; GCN-NEXT:    v_readlane_b32 s40, v40, 6
783; GCN-NEXT:    v_readlane_b32 s39, v40, 5
784; GCN-NEXT:    v_readlane_b32 s38, v40, 4
785; GCN-NEXT:    v_readlane_b32 s37, v40, 3
786; GCN-NEXT:    v_readlane_b32 s36, v40, 2
787; GCN-NEXT:    v_readlane_b32 s35, v40, 1
788; GCN-NEXT:    v_readlane_b32 s34, v40, 0
789; GCN-NEXT:    s_addk_i32 s32, 0xfc00
790; GCN-NEXT:    v_readlane_b32 s33, v40, 17
791; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
792; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
793; GCN-NEXT:    s_mov_b64 exec, s[6:7]
794; GCN-NEXT:    s_waitcnt vmcnt(0)
795; GCN-NEXT:    s_setpc_b64 s[4:5]
796;
797; GISEL-LABEL: test_indirect_call_vgpr_ptr_ret:
798; GISEL:       ; %bb.0:
799; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
800; GISEL-NEXT:    s_or_saveexec_b64 s[16:17], -1
801; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
802; GISEL-NEXT:    s_mov_b64 exec, s[16:17]
803; GISEL-NEXT:    v_writelane_b32 v40, s33, 17
804; GISEL-NEXT:    s_mov_b32 s33, s32
805; GISEL-NEXT:    s_addk_i32 s32, 0x400
806; GISEL-NEXT:    v_writelane_b32 v40, s34, 0
807; GISEL-NEXT:    v_writelane_b32 v40, s35, 1
808; GISEL-NEXT:    v_writelane_b32 v40, s36, 2
809; GISEL-NEXT:    v_writelane_b32 v40, s37, 3
810; GISEL-NEXT:    v_writelane_b32 v40, s38, 4
811; GISEL-NEXT:    v_writelane_b32 v40, s39, 5
812; GISEL-NEXT:    v_writelane_b32 v40, s40, 6
813; GISEL-NEXT:    v_writelane_b32 v40, s41, 7
814; GISEL-NEXT:    v_writelane_b32 v40, s42, 8
815; GISEL-NEXT:    v_writelane_b32 v40, s43, 9
816; GISEL-NEXT:    v_writelane_b32 v40, s44, 10
817; GISEL-NEXT:    v_writelane_b32 v40, s46, 11
818; GISEL-NEXT:    v_writelane_b32 v40, s47, 12
819; GISEL-NEXT:    v_writelane_b32 v40, s48, 13
820; GISEL-NEXT:    v_writelane_b32 v40, s49, 14
821; GISEL-NEXT:    s_mov_b32 s42, s14
822; GISEL-NEXT:    s_mov_b32 s43, s13
823; GISEL-NEXT:    s_mov_b32 s44, s12
824; GISEL-NEXT:    s_mov_b64 s[34:35], s[10:11]
825; GISEL-NEXT:    s_mov_b64 s[36:37], s[8:9]
826; GISEL-NEXT:    s_mov_b64 s[38:39], s[6:7]
827; GISEL-NEXT:    s_mov_b64 s[40:41], s[4:5]
828; GISEL-NEXT:    v_writelane_b32 v40, s30, 15
829; GISEL-NEXT:    v_writelane_b32 v40, s31, 16
830; GISEL-NEXT:    s_mov_b64 s[46:47], exec
831; GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
832; GISEL-NEXT:    v_readfirstlane_b32 s16, v0
833; GISEL-NEXT:    v_readfirstlane_b32 s17, v1
834; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
835; GISEL-NEXT:    s_and_saveexec_b64 s[48:49], vcc
836; GISEL-NEXT:    s_mov_b64 s[4:5], s[40:41]
837; GISEL-NEXT:    s_mov_b64 s[6:7], s[38:39]
838; GISEL-NEXT:    s_mov_b64 s[8:9], s[36:37]
839; GISEL-NEXT:    s_mov_b64 s[10:11], s[34:35]
840; GISEL-NEXT:    s_mov_b32 s12, s44
841; GISEL-NEXT:    s_mov_b32 s13, s43
842; GISEL-NEXT:    s_mov_b32 s14, s42
843; GISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
844; GISEL-NEXT:    v_mov_b32_e32 v2, v0
845; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1
846; GISEL-NEXT:    ; implicit-def: $vgpr31
847; GISEL-NEXT:    s_xor_b64 exec, exec, s[48:49]
848; GISEL-NEXT:    s_cbranch_execnz .LBB4_1
849; GISEL-NEXT:  ; %bb.2:
850; GISEL-NEXT:    s_mov_b64 exec, s[46:47]
851; GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v2
852; GISEL-NEXT:    v_readlane_b32 s4, v40, 15
853; GISEL-NEXT:    v_readlane_b32 s5, v40, 16
854; GISEL-NEXT:    v_readlane_b32 s49, v40, 14
855; GISEL-NEXT:    v_readlane_b32 s48, v40, 13
856; GISEL-NEXT:    v_readlane_b32 s47, v40, 12
857; GISEL-NEXT:    v_readlane_b32 s46, v40, 11
858; GISEL-NEXT:    v_readlane_b32 s44, v40, 10
859; GISEL-NEXT:    v_readlane_b32 s43, v40, 9
860; GISEL-NEXT:    v_readlane_b32 s42, v40, 8
861; GISEL-NEXT:    v_readlane_b32 s41, v40, 7
862; GISEL-NEXT:    v_readlane_b32 s40, v40, 6
863; GISEL-NEXT:    v_readlane_b32 s39, v40, 5
864; GISEL-NEXT:    v_readlane_b32 s38, v40, 4
865; GISEL-NEXT:    v_readlane_b32 s37, v40, 3
866; GISEL-NEXT:    v_readlane_b32 s36, v40, 2
867; GISEL-NEXT:    v_readlane_b32 s35, v40, 1
868; GISEL-NEXT:    v_readlane_b32 s34, v40, 0
869; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
870; GISEL-NEXT:    v_readlane_b32 s33, v40, 17
871; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
872; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
873; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
874; GISEL-NEXT:    s_waitcnt vmcnt(0)
875; GISEL-NEXT:    s_setpc_b64 s[4:5]
876  %a = call i32 %fptr()
877  %b = add i32 %a, 1
878  ret i32 %b
879}
880
881define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) {
882; GCN-LABEL: test_indirect_call_vgpr_ptr_in_branch:
883; GCN:       ; %bb.0: ; %bb0
884; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
885; GCN-NEXT:    s_or_saveexec_b64 s[16:17], -1
886; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
887; GCN-NEXT:    s_mov_b64 exec, s[16:17]
888; GCN-NEXT:    v_writelane_b32 v40, s33, 19
889; GCN-NEXT:    s_mov_b32 s33, s32
890; GCN-NEXT:    s_addk_i32 s32, 0x400
891; GCN-NEXT:    v_writelane_b32 v40, s34, 0
892; GCN-NEXT:    v_writelane_b32 v40, s35, 1
893; GCN-NEXT:    v_writelane_b32 v40, s36, 2
894; GCN-NEXT:    v_writelane_b32 v40, s37, 3
895; GCN-NEXT:    v_writelane_b32 v40, s38, 4
896; GCN-NEXT:    v_writelane_b32 v40, s39, 5
897; GCN-NEXT:    v_writelane_b32 v40, s40, 6
898; GCN-NEXT:    v_writelane_b32 v40, s41, 7
899; GCN-NEXT:    v_writelane_b32 v40, s42, 8
900; GCN-NEXT:    v_writelane_b32 v40, s43, 9
901; GCN-NEXT:    v_writelane_b32 v40, s44, 10
902; GCN-NEXT:    v_writelane_b32 v40, s46, 11
903; GCN-NEXT:    v_writelane_b32 v40, s47, 12
904; GCN-NEXT:    v_writelane_b32 v40, s48, 13
905; GCN-NEXT:    v_writelane_b32 v40, s49, 14
906; GCN-NEXT:    v_writelane_b32 v40, s50, 15
907; GCN-NEXT:    v_writelane_b32 v40, s51, 16
908; GCN-NEXT:    s_mov_b32 s42, s14
909; GCN-NEXT:    s_mov_b32 s43, s13
910; GCN-NEXT:    s_mov_b32 s44, s12
911; GCN-NEXT:    s_mov_b64 s[34:35], s[10:11]
912; GCN-NEXT:    s_mov_b64 s[36:37], s[8:9]
913; GCN-NEXT:    s_mov_b64 s[38:39], s[6:7]
914; GCN-NEXT:    s_mov_b64 s[40:41], s[4:5]
915; GCN-NEXT:    v_and_b32_e32 v2, 1, v2
916; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
917; GCN-NEXT:    s_and_saveexec_b64 s[46:47], vcc
918; GCN-NEXT:    s_cbranch_execz .LBB5_4
919; GCN-NEXT:  ; %bb.1: ; %bb1
920; GCN-NEXT:    v_writelane_b32 v40, s30, 17
921; GCN-NEXT:    v_writelane_b32 v40, s31, 18
922; GCN-NEXT:    s_mov_b64 s[48:49], exec
923; GCN-NEXT:  .LBB5_2: ; =>This Inner Loop Header: Depth=1
924; GCN-NEXT:    v_readfirstlane_b32 s16, v0
925; GCN-NEXT:    v_readfirstlane_b32 s17, v1
926; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
927; GCN-NEXT:    s_and_saveexec_b64 s[50:51], vcc
928; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
929; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
930; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
931; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
932; GCN-NEXT:    s_mov_b32 s12, s44
933; GCN-NEXT:    s_mov_b32 s13, s43
934; GCN-NEXT:    s_mov_b32 s14, s42
935; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
936; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
937; GCN-NEXT:    ; implicit-def: $vgpr31
938; GCN-NEXT:    s_xor_b64 exec, exec, s[50:51]
939; GCN-NEXT:    s_cbranch_execnz .LBB5_2
940; GCN-NEXT:  ; %bb.3:
941; GCN-NEXT:    s_mov_b64 exec, s[48:49]
942; GCN-NEXT:    v_readlane_b32 s30, v40, 17
943; GCN-NEXT:    v_readlane_b32 s31, v40, 18
944; GCN-NEXT:  .LBB5_4: ; %bb2
945; GCN-NEXT:    s_or_b64 exec, exec, s[46:47]
946; GCN-NEXT:    v_readlane_b32 s51, v40, 16
947; GCN-NEXT:    v_readlane_b32 s50, v40, 15
948; GCN-NEXT:    v_readlane_b32 s49, v40, 14
949; GCN-NEXT:    v_readlane_b32 s48, v40, 13
950; GCN-NEXT:    v_readlane_b32 s47, v40, 12
951; GCN-NEXT:    v_readlane_b32 s46, v40, 11
952; GCN-NEXT:    v_readlane_b32 s44, v40, 10
953; GCN-NEXT:    v_readlane_b32 s43, v40, 9
954; GCN-NEXT:    v_readlane_b32 s42, v40, 8
955; GCN-NEXT:    v_readlane_b32 s41, v40, 7
956; GCN-NEXT:    v_readlane_b32 s40, v40, 6
957; GCN-NEXT:    v_readlane_b32 s39, v40, 5
958; GCN-NEXT:    v_readlane_b32 s38, v40, 4
959; GCN-NEXT:    v_readlane_b32 s37, v40, 3
960; GCN-NEXT:    v_readlane_b32 s36, v40, 2
961; GCN-NEXT:    v_readlane_b32 s35, v40, 1
962; GCN-NEXT:    v_readlane_b32 s34, v40, 0
963; GCN-NEXT:    s_addk_i32 s32, 0xfc00
964; GCN-NEXT:    v_readlane_b32 s33, v40, 19
965; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
966; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
967; GCN-NEXT:    s_mov_b64 exec, s[4:5]
968; GCN-NEXT:    s_waitcnt vmcnt(0)
969; GCN-NEXT:    s_setpc_b64 s[30:31]
970;
971; GISEL-LABEL: test_indirect_call_vgpr_ptr_in_branch:
972; GISEL:       ; %bb.0: ; %bb0
973; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
974; GISEL-NEXT:    s_or_saveexec_b64 s[16:17], -1
975; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
976; GISEL-NEXT:    s_mov_b64 exec, s[16:17]
977; GISEL-NEXT:    v_writelane_b32 v40, s33, 19
978; GISEL-NEXT:    s_mov_b32 s33, s32
979; GISEL-NEXT:    s_addk_i32 s32, 0x400
980; GISEL-NEXT:    v_writelane_b32 v40, s34, 0
981; GISEL-NEXT:    v_writelane_b32 v40, s35, 1
982; GISEL-NEXT:    v_writelane_b32 v40, s36, 2
983; GISEL-NEXT:    v_writelane_b32 v40, s37, 3
984; GISEL-NEXT:    v_writelane_b32 v40, s38, 4
985; GISEL-NEXT:    v_writelane_b32 v40, s39, 5
986; GISEL-NEXT:    v_writelane_b32 v40, s40, 6
987; GISEL-NEXT:    v_writelane_b32 v40, s41, 7
988; GISEL-NEXT:    v_writelane_b32 v40, s42, 8
989; GISEL-NEXT:    v_writelane_b32 v40, s43, 9
990; GISEL-NEXT:    v_writelane_b32 v40, s44, 10
991; GISEL-NEXT:    v_writelane_b32 v40, s46, 11
992; GISEL-NEXT:    v_writelane_b32 v40, s47, 12
993; GISEL-NEXT:    v_writelane_b32 v40, s48, 13
994; GISEL-NEXT:    v_writelane_b32 v40, s49, 14
995; GISEL-NEXT:    v_writelane_b32 v40, s50, 15
996; GISEL-NEXT:    v_writelane_b32 v40, s51, 16
997; GISEL-NEXT:    s_mov_b32 s42, s14
998; GISEL-NEXT:    s_mov_b32 s43, s13
999; GISEL-NEXT:    s_mov_b32 s44, s12
1000; GISEL-NEXT:    s_mov_b64 s[34:35], s[10:11]
1001; GISEL-NEXT:    s_mov_b64 s[36:37], s[8:9]
1002; GISEL-NEXT:    s_mov_b64 s[38:39], s[6:7]
1003; GISEL-NEXT:    s_mov_b64 s[40:41], s[4:5]
1004; GISEL-NEXT:    v_and_b32_e32 v2, 1, v2
1005; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
1006; GISEL-NEXT:    s_and_saveexec_b64 s[46:47], vcc
1007; GISEL-NEXT:    s_cbranch_execz .LBB5_4
1008; GISEL-NEXT:  ; %bb.1: ; %bb1
1009; GISEL-NEXT:    v_writelane_b32 v40, s30, 17
1010; GISEL-NEXT:    v_writelane_b32 v40, s31, 18
1011; GISEL-NEXT:    s_mov_b64 s[48:49], exec
1012; GISEL-NEXT:  .LBB5_2: ; =>This Inner Loop Header: Depth=1
1013; GISEL-NEXT:    v_readfirstlane_b32 s16, v0
1014; GISEL-NEXT:    v_readfirstlane_b32 s17, v1
1015; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
1016; GISEL-NEXT:    s_and_saveexec_b64 s[50:51], vcc
1017; GISEL-NEXT:    s_mov_b64 s[4:5], s[40:41]
1018; GISEL-NEXT:    s_mov_b64 s[6:7], s[38:39]
1019; GISEL-NEXT:    s_mov_b64 s[8:9], s[36:37]
1020; GISEL-NEXT:    s_mov_b64 s[10:11], s[34:35]
1021; GISEL-NEXT:    s_mov_b32 s12, s44
1022; GISEL-NEXT:    s_mov_b32 s13, s43
1023; GISEL-NEXT:    s_mov_b32 s14, s42
1024; GISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
1025; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1
1026; GISEL-NEXT:    ; implicit-def: $vgpr31
1027; GISEL-NEXT:    s_xor_b64 exec, exec, s[50:51]
1028; GISEL-NEXT:    s_cbranch_execnz .LBB5_2
1029; GISEL-NEXT:  ; %bb.3:
1030; GISEL-NEXT:    s_mov_b64 exec, s[48:49]
1031; GISEL-NEXT:    v_readlane_b32 s30, v40, 17
1032; GISEL-NEXT:    v_readlane_b32 s31, v40, 18
1033; GISEL-NEXT:  .LBB5_4: ; %bb2
1034; GISEL-NEXT:    s_or_b64 exec, exec, s[46:47]
1035; GISEL-NEXT:    v_readlane_b32 s51, v40, 16
1036; GISEL-NEXT:    v_readlane_b32 s50, v40, 15
1037; GISEL-NEXT:    v_readlane_b32 s49, v40, 14
1038; GISEL-NEXT:    v_readlane_b32 s48, v40, 13
1039; GISEL-NEXT:    v_readlane_b32 s47, v40, 12
1040; GISEL-NEXT:    v_readlane_b32 s46, v40, 11
1041; GISEL-NEXT:    v_readlane_b32 s44, v40, 10
1042; GISEL-NEXT:    v_readlane_b32 s43, v40, 9
1043; GISEL-NEXT:    v_readlane_b32 s42, v40, 8
1044; GISEL-NEXT:    v_readlane_b32 s41, v40, 7
1045; GISEL-NEXT:    v_readlane_b32 s40, v40, 6
1046; GISEL-NEXT:    v_readlane_b32 s39, v40, 5
1047; GISEL-NEXT:    v_readlane_b32 s38, v40, 4
1048; GISEL-NEXT:    v_readlane_b32 s37, v40, 3
1049; GISEL-NEXT:    v_readlane_b32 s36, v40, 2
1050; GISEL-NEXT:    v_readlane_b32 s35, v40, 1
1051; GISEL-NEXT:    v_readlane_b32 s34, v40, 0
1052; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
1053; GISEL-NEXT:    v_readlane_b32 s33, v40, 19
1054; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
1055; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1056; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
1057; GISEL-NEXT:    s_waitcnt vmcnt(0)
1058; GISEL-NEXT:    s_setpc_b64 s[30:31]
1059bb0:
1060  br i1 %cond, label %bb1, label %bb2
1061
1062bb1:
1063  call void %fptr()
1064  br label %bb2
1065
1066bb2:
1067  ret void
1068}
1069
1070define void @test_indirect_call_vgpr_ptr_inreg_arg(void(i32)* %fptr) {
1071; GCN-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
1072; GCN:       ; %bb.0:
1073; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1074; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
1075; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1076; GCN-NEXT:    s_mov_b64 exec, s[4:5]
1077; GCN-NEXT:    v_writelane_b32 v40, s33, 30
1078; GCN-NEXT:    s_mov_b32 s33, s32
1079; GCN-NEXT:    s_addk_i32 s32, 0x400
1080; GCN-NEXT:    v_writelane_b32 v40, s34, 0
1081; GCN-NEXT:    v_writelane_b32 v40, s35, 1
1082; GCN-NEXT:    v_writelane_b32 v40, s36, 2
1083; GCN-NEXT:    v_writelane_b32 v40, s37, 3
1084; GCN-NEXT:    v_writelane_b32 v40, s38, 4
1085; GCN-NEXT:    v_writelane_b32 v40, s39, 5
1086; GCN-NEXT:    v_writelane_b32 v40, s40, 6
1087; GCN-NEXT:    v_writelane_b32 v40, s41, 7
1088; GCN-NEXT:    v_writelane_b32 v40, s42, 8
1089; GCN-NEXT:    v_writelane_b32 v40, s43, 9
1090; GCN-NEXT:    v_writelane_b32 v40, s44, 10
1091; GCN-NEXT:    v_writelane_b32 v40, s45, 11
1092; GCN-NEXT:    v_writelane_b32 v40, s46, 12
1093; GCN-NEXT:    v_writelane_b32 v40, s47, 13
1094; GCN-NEXT:    v_writelane_b32 v40, s48, 14
1095; GCN-NEXT:    v_writelane_b32 v40, s49, 15
1096; GCN-NEXT:    v_writelane_b32 v40, s50, 16
1097; GCN-NEXT:    v_writelane_b32 v40, s51, 17
1098; GCN-NEXT:    v_writelane_b32 v40, s52, 18
1099; GCN-NEXT:    v_writelane_b32 v40, s53, 19
1100; GCN-NEXT:    v_writelane_b32 v40, s54, 20
1101; GCN-NEXT:    v_writelane_b32 v40, s55, 21
1102; GCN-NEXT:    v_writelane_b32 v40, s56, 22
1103; GCN-NEXT:    v_writelane_b32 v40, s57, 23
1104; GCN-NEXT:    v_writelane_b32 v40, s58, 24
1105; GCN-NEXT:    v_writelane_b32 v40, s59, 25
1106; GCN-NEXT:    v_writelane_b32 v40, s60, 26
1107; GCN-NEXT:    v_writelane_b32 v40, s61, 27
1108; GCN-NEXT:    v_writelane_b32 v40, s62, 28
1109; GCN-NEXT:    v_writelane_b32 v40, s63, 29
1110; GCN-NEXT:    s_mov_b64 s[6:7], s[30:31]
1111; GCN-NEXT:    s_mov_b64 s[8:9], exec
1112; GCN-NEXT:    s_movk_i32 s4, 0x7b
1113; GCN-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
1114; GCN-NEXT:    v_readfirstlane_b32 s12, v0
1115; GCN-NEXT:    v_readfirstlane_b32 s13, v1
1116; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[12:13], v[0:1]
1117; GCN-NEXT:    s_and_saveexec_b64 s[10:11], vcc
1118; GCN-NEXT:    s_swappc_b64 s[30:31], s[12:13]
1119; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
1120; GCN-NEXT:    s_xor_b64 exec, exec, s[10:11]
1121; GCN-NEXT:    s_cbranch_execnz .LBB6_1
1122; GCN-NEXT:  ; %bb.2:
1123; GCN-NEXT:    s_mov_b64 exec, s[8:9]
1124; GCN-NEXT:    v_readlane_b32 s63, v40, 29
1125; GCN-NEXT:    v_readlane_b32 s62, v40, 28
1126; GCN-NEXT:    v_readlane_b32 s61, v40, 27
1127; GCN-NEXT:    v_readlane_b32 s60, v40, 26
1128; GCN-NEXT:    v_readlane_b32 s59, v40, 25
1129; GCN-NEXT:    v_readlane_b32 s58, v40, 24
1130; GCN-NEXT:    v_readlane_b32 s57, v40, 23
1131; GCN-NEXT:    v_readlane_b32 s56, v40, 22
1132; GCN-NEXT:    v_readlane_b32 s55, v40, 21
1133; GCN-NEXT:    v_readlane_b32 s54, v40, 20
1134; GCN-NEXT:    v_readlane_b32 s53, v40, 19
1135; GCN-NEXT:    v_readlane_b32 s52, v40, 18
1136; GCN-NEXT:    v_readlane_b32 s51, v40, 17
1137; GCN-NEXT:    v_readlane_b32 s50, v40, 16
1138; GCN-NEXT:    v_readlane_b32 s49, v40, 15
1139; GCN-NEXT:    v_readlane_b32 s48, v40, 14
1140; GCN-NEXT:    v_readlane_b32 s47, v40, 13
1141; GCN-NEXT:    v_readlane_b32 s46, v40, 12
1142; GCN-NEXT:    v_readlane_b32 s45, v40, 11
1143; GCN-NEXT:    v_readlane_b32 s44, v40, 10
1144; GCN-NEXT:    v_readlane_b32 s43, v40, 9
1145; GCN-NEXT:    v_readlane_b32 s42, v40, 8
1146; GCN-NEXT:    v_readlane_b32 s41, v40, 7
1147; GCN-NEXT:    v_readlane_b32 s40, v40, 6
1148; GCN-NEXT:    v_readlane_b32 s39, v40, 5
1149; GCN-NEXT:    v_readlane_b32 s38, v40, 4
1150; GCN-NEXT:    v_readlane_b32 s37, v40, 3
1151; GCN-NEXT:    v_readlane_b32 s36, v40, 2
1152; GCN-NEXT:    v_readlane_b32 s35, v40, 1
1153; GCN-NEXT:    v_readlane_b32 s34, v40, 0
1154; GCN-NEXT:    s_addk_i32 s32, 0xfc00
1155; GCN-NEXT:    v_readlane_b32 s33, v40, 30
1156; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
1157; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1158; GCN-NEXT:    s_mov_b64 exec, s[4:5]
1159; GCN-NEXT:    s_waitcnt vmcnt(0)
1160; GCN-NEXT:    s_setpc_b64 s[6:7]
1161;
1162; GISEL-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
1163; GISEL:       ; %bb.0:
1164; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1165; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
1166; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1167; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
1168; GISEL-NEXT:    v_writelane_b32 v40, s33, 30
1169; GISEL-NEXT:    s_mov_b32 s33, s32
1170; GISEL-NEXT:    s_addk_i32 s32, 0x400
1171; GISEL-NEXT:    v_writelane_b32 v40, s34, 0
1172; GISEL-NEXT:    v_writelane_b32 v40, s35, 1
1173; GISEL-NEXT:    v_writelane_b32 v40, s36, 2
1174; GISEL-NEXT:    v_writelane_b32 v40, s37, 3
1175; GISEL-NEXT:    v_writelane_b32 v40, s38, 4
1176; GISEL-NEXT:    v_writelane_b32 v40, s39, 5
1177; GISEL-NEXT:    v_writelane_b32 v40, s40, 6
1178; GISEL-NEXT:    v_writelane_b32 v40, s41, 7
1179; GISEL-NEXT:    v_writelane_b32 v40, s42, 8
1180; GISEL-NEXT:    v_writelane_b32 v40, s43, 9
1181; GISEL-NEXT:    v_writelane_b32 v40, s44, 10
1182; GISEL-NEXT:    v_writelane_b32 v40, s45, 11
1183; GISEL-NEXT:    v_writelane_b32 v40, s46, 12
1184; GISEL-NEXT:    v_writelane_b32 v40, s47, 13
1185; GISEL-NEXT:    v_writelane_b32 v40, s48, 14
1186; GISEL-NEXT:    v_writelane_b32 v40, s49, 15
1187; GISEL-NEXT:    v_writelane_b32 v40, s50, 16
1188; GISEL-NEXT:    v_writelane_b32 v40, s51, 17
1189; GISEL-NEXT:    v_writelane_b32 v40, s52, 18
1190; GISEL-NEXT:    v_writelane_b32 v40, s53, 19
1191; GISEL-NEXT:    v_writelane_b32 v40, s54, 20
1192; GISEL-NEXT:    v_writelane_b32 v40, s55, 21
1193; GISEL-NEXT:    v_writelane_b32 v40, s56, 22
1194; GISEL-NEXT:    v_writelane_b32 v40, s57, 23
1195; GISEL-NEXT:    v_writelane_b32 v40, s58, 24
1196; GISEL-NEXT:    v_writelane_b32 v40, s59, 25
1197; GISEL-NEXT:    v_writelane_b32 v40, s60, 26
1198; GISEL-NEXT:    v_writelane_b32 v40, s61, 27
1199; GISEL-NEXT:    v_writelane_b32 v40, s62, 28
1200; GISEL-NEXT:    v_writelane_b32 v40, s63, 29
1201; GISEL-NEXT:    s_mov_b64 s[6:7], s[30:31]
1202; GISEL-NEXT:    s_mov_b64 s[8:9], exec
1203; GISEL-NEXT:    s_movk_i32 s4, 0x7b
1204; GISEL-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
1205; GISEL-NEXT:    v_readfirstlane_b32 s10, v0
1206; GISEL-NEXT:    v_readfirstlane_b32 s11, v1
1207; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1]
1208; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], vcc
1209; GISEL-NEXT:    s_swappc_b64 s[30:31], s[10:11]
1210; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1
1211; GISEL-NEXT:    s_xor_b64 exec, exec, s[12:13]
1212; GISEL-NEXT:    s_cbranch_execnz .LBB6_1
1213; GISEL-NEXT:  ; %bb.2:
1214; GISEL-NEXT:    s_mov_b64 exec, s[8:9]
1215; GISEL-NEXT:    v_readlane_b32 s63, v40, 29
1216; GISEL-NEXT:    v_readlane_b32 s62, v40, 28
1217; GISEL-NEXT:    v_readlane_b32 s61, v40, 27
1218; GISEL-NEXT:    v_readlane_b32 s60, v40, 26
1219; GISEL-NEXT:    v_readlane_b32 s59, v40, 25
1220; GISEL-NEXT:    v_readlane_b32 s58, v40, 24
1221; GISEL-NEXT:    v_readlane_b32 s57, v40, 23
1222; GISEL-NEXT:    v_readlane_b32 s56, v40, 22
1223; GISEL-NEXT:    v_readlane_b32 s55, v40, 21
1224; GISEL-NEXT:    v_readlane_b32 s54, v40, 20
1225; GISEL-NEXT:    v_readlane_b32 s53, v40, 19
1226; GISEL-NEXT:    v_readlane_b32 s52, v40, 18
1227; GISEL-NEXT:    v_readlane_b32 s51, v40, 17
1228; GISEL-NEXT:    v_readlane_b32 s50, v40, 16
1229; GISEL-NEXT:    v_readlane_b32 s49, v40, 15
1230; GISEL-NEXT:    v_readlane_b32 s48, v40, 14
1231; GISEL-NEXT:    v_readlane_b32 s47, v40, 13
1232; GISEL-NEXT:    v_readlane_b32 s46, v40, 12
1233; GISEL-NEXT:    v_readlane_b32 s45, v40, 11
1234; GISEL-NEXT:    v_readlane_b32 s44, v40, 10
1235; GISEL-NEXT:    v_readlane_b32 s43, v40, 9
1236; GISEL-NEXT:    v_readlane_b32 s42, v40, 8
1237; GISEL-NEXT:    v_readlane_b32 s41, v40, 7
1238; GISEL-NEXT:    v_readlane_b32 s40, v40, 6
1239; GISEL-NEXT:    v_readlane_b32 s39, v40, 5
1240; GISEL-NEXT:    v_readlane_b32 s38, v40, 4
1241; GISEL-NEXT:    v_readlane_b32 s37, v40, 3
1242; GISEL-NEXT:    v_readlane_b32 s36, v40, 2
1243; GISEL-NEXT:    v_readlane_b32 s35, v40, 1
1244; GISEL-NEXT:    v_readlane_b32 s34, v40, 0
1245; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
1246; GISEL-NEXT:    v_readlane_b32 s33, v40, 30
1247; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
1248; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1249; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
1250; GISEL-NEXT:    s_waitcnt vmcnt(0)
1251; GISEL-NEXT:    s_setpc_b64 s[6:7]
1252  call amdgpu_gfx void %fptr(i32 inreg 123)
1253  ret void
1254}
1255
1256define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, void(i32)* %fptr) {
1257; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
1258; GCN:       ; %bb.0:
1259; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1260; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
1261; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
1262; GCN-NEXT:    s_mov_b64 exec, s[4:5]
1263; GCN-NEXT:    v_writelane_b32 v40, s33, 30
1264; GCN-NEXT:    s_mov_b32 s33, s32
1265; GCN-NEXT:    s_addk_i32 s32, 0x400
1266; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
1267; GCN-NEXT:    v_writelane_b32 v40, s34, 0
1268; GCN-NEXT:    v_writelane_b32 v40, s35, 1
1269; GCN-NEXT:    v_writelane_b32 v40, s36, 2
1270; GCN-NEXT:    v_writelane_b32 v40, s37, 3
1271; GCN-NEXT:    v_writelane_b32 v40, s38, 4
1272; GCN-NEXT:    v_writelane_b32 v40, s39, 5
1273; GCN-NEXT:    v_writelane_b32 v40, s40, 6
1274; GCN-NEXT:    v_writelane_b32 v40, s41, 7
1275; GCN-NEXT:    v_writelane_b32 v40, s42, 8
1276; GCN-NEXT:    v_writelane_b32 v40, s43, 9
1277; GCN-NEXT:    v_writelane_b32 v40, s44, 10
1278; GCN-NEXT:    v_writelane_b32 v40, s45, 11
1279; GCN-NEXT:    v_writelane_b32 v40, s46, 12
1280; GCN-NEXT:    v_writelane_b32 v40, s47, 13
1281; GCN-NEXT:    v_writelane_b32 v40, s48, 14
1282; GCN-NEXT:    v_writelane_b32 v40, s49, 15
1283; GCN-NEXT:    v_writelane_b32 v40, s50, 16
1284; GCN-NEXT:    v_writelane_b32 v40, s51, 17
1285; GCN-NEXT:    v_writelane_b32 v40, s52, 18
1286; GCN-NEXT:    v_writelane_b32 v40, s53, 19
1287; GCN-NEXT:    v_writelane_b32 v40, s54, 20
1288; GCN-NEXT:    v_writelane_b32 v40, s55, 21
1289; GCN-NEXT:    v_writelane_b32 v40, s56, 22
1290; GCN-NEXT:    v_writelane_b32 v40, s57, 23
1291; GCN-NEXT:    v_writelane_b32 v40, s58, 24
1292; GCN-NEXT:    v_writelane_b32 v40, s59, 25
1293; GCN-NEXT:    v_writelane_b32 v40, s60, 26
1294; GCN-NEXT:    v_writelane_b32 v40, s61, 27
1295; GCN-NEXT:    v_writelane_b32 v40, s62, 28
1296; GCN-NEXT:    v_writelane_b32 v40, s63, 29
1297; GCN-NEXT:    s_mov_b64 s[4:5], s[30:31]
1298; GCN-NEXT:    v_mov_b32_e32 v41, v0
1299; GCN-NEXT:    s_mov_b64 s[6:7], exec
1300; GCN-NEXT:  .LBB7_1: ; =>This Inner Loop Header: Depth=1
1301; GCN-NEXT:    v_readfirstlane_b32 s10, v1
1302; GCN-NEXT:    v_readfirstlane_b32 s11, v2
1303; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[10:11], v[1:2]
1304; GCN-NEXT:    s_and_saveexec_b64 s[8:9], vcc
1305; GCN-NEXT:    v_mov_b32_e32 v0, v41
1306; GCN-NEXT:    s_swappc_b64 s[30:31], s[10:11]
1307; GCN-NEXT:    ; implicit-def: $vgpr1_vgpr2
1308; GCN-NEXT:    s_xor_b64 exec, exec, s[8:9]
1309; GCN-NEXT:    s_cbranch_execnz .LBB7_1
1310; GCN-NEXT:  ; %bb.2:
1311; GCN-NEXT:    s_mov_b64 exec, s[6:7]
1312; GCN-NEXT:    v_mov_b32_e32 v0, v41
1313; GCN-NEXT:    v_readlane_b32 s63, v40, 29
1314; GCN-NEXT:    v_readlane_b32 s62, v40, 28
1315; GCN-NEXT:    v_readlane_b32 s61, v40, 27
1316; GCN-NEXT:    v_readlane_b32 s60, v40, 26
1317; GCN-NEXT:    v_readlane_b32 s59, v40, 25
1318; GCN-NEXT:    v_readlane_b32 s58, v40, 24
1319; GCN-NEXT:    v_readlane_b32 s57, v40, 23
1320; GCN-NEXT:    v_readlane_b32 s56, v40, 22
1321; GCN-NEXT:    v_readlane_b32 s55, v40, 21
1322; GCN-NEXT:    v_readlane_b32 s54, v40, 20
1323; GCN-NEXT:    v_readlane_b32 s53, v40, 19
1324; GCN-NEXT:    v_readlane_b32 s52, v40, 18
1325; GCN-NEXT:    v_readlane_b32 s51, v40, 17
1326; GCN-NEXT:    v_readlane_b32 s50, v40, 16
1327; GCN-NEXT:    v_readlane_b32 s49, v40, 15
1328; GCN-NEXT:    v_readlane_b32 s48, v40, 14
1329; GCN-NEXT:    v_readlane_b32 s47, v40, 13
1330; GCN-NEXT:    v_readlane_b32 s46, v40, 12
1331; GCN-NEXT:    v_readlane_b32 s45, v40, 11
1332; GCN-NEXT:    v_readlane_b32 s44, v40, 10
1333; GCN-NEXT:    v_readlane_b32 s43, v40, 9
1334; GCN-NEXT:    v_readlane_b32 s42, v40, 8
1335; GCN-NEXT:    v_readlane_b32 s41, v40, 7
1336; GCN-NEXT:    v_readlane_b32 s40, v40, 6
1337; GCN-NEXT:    v_readlane_b32 s39, v40, 5
1338; GCN-NEXT:    v_readlane_b32 s38, v40, 4
1339; GCN-NEXT:    v_readlane_b32 s37, v40, 3
1340; GCN-NEXT:    v_readlane_b32 s36, v40, 2
1341; GCN-NEXT:    v_readlane_b32 s35, v40, 1
1342; GCN-NEXT:    v_readlane_b32 s34, v40, 0
1343; GCN-NEXT:    buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
1344; GCN-NEXT:    s_addk_i32 s32, 0xfc00
1345; GCN-NEXT:    v_readlane_b32 s33, v40, 30
1346; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
1347; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
1348; GCN-NEXT:    s_mov_b64 exec, s[6:7]
1349; GCN-NEXT:    s_waitcnt vmcnt(0)
1350; GCN-NEXT:    s_setpc_b64 s[4:5]
1351;
1352; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
1353; GISEL:       ; %bb.0:
1354; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1355; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
1356; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
1357; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
1358; GISEL-NEXT:    v_writelane_b32 v40, s33, 30
1359; GISEL-NEXT:    s_mov_b32 s33, s32
1360; GISEL-NEXT:    s_addk_i32 s32, 0x400
1361; GISEL-NEXT:    buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
1362; GISEL-NEXT:    v_writelane_b32 v40, s34, 0
1363; GISEL-NEXT:    v_writelane_b32 v40, s35, 1
1364; GISEL-NEXT:    v_writelane_b32 v40, s36, 2
1365; GISEL-NEXT:    v_writelane_b32 v40, s37, 3
1366; GISEL-NEXT:    v_writelane_b32 v40, s38, 4
1367; GISEL-NEXT:    v_writelane_b32 v40, s39, 5
1368; GISEL-NEXT:    v_writelane_b32 v40, s40, 6
1369; GISEL-NEXT:    v_writelane_b32 v40, s41, 7
1370; GISEL-NEXT:    v_writelane_b32 v40, s42, 8
1371; GISEL-NEXT:    v_writelane_b32 v40, s43, 9
1372; GISEL-NEXT:    v_writelane_b32 v40, s44, 10
1373; GISEL-NEXT:    v_writelane_b32 v40, s45, 11
1374; GISEL-NEXT:    v_writelane_b32 v40, s46, 12
1375; GISEL-NEXT:    v_writelane_b32 v40, s47, 13
1376; GISEL-NEXT:    v_writelane_b32 v40, s48, 14
1377; GISEL-NEXT:    v_writelane_b32 v40, s49, 15
1378; GISEL-NEXT:    v_writelane_b32 v40, s50, 16
1379; GISEL-NEXT:    v_writelane_b32 v40, s51, 17
1380; GISEL-NEXT:    v_writelane_b32 v40, s52, 18
1381; GISEL-NEXT:    v_writelane_b32 v40, s53, 19
1382; GISEL-NEXT:    v_writelane_b32 v40, s54, 20
1383; GISEL-NEXT:    v_writelane_b32 v40, s55, 21
1384; GISEL-NEXT:    v_writelane_b32 v40, s56, 22
1385; GISEL-NEXT:    v_writelane_b32 v40, s57, 23
1386; GISEL-NEXT:    v_writelane_b32 v40, s58, 24
1387; GISEL-NEXT:    v_writelane_b32 v40, s59, 25
1388; GISEL-NEXT:    v_writelane_b32 v40, s60, 26
1389; GISEL-NEXT:    v_writelane_b32 v40, s61, 27
1390; GISEL-NEXT:    v_writelane_b32 v40, s62, 28
1391; GISEL-NEXT:    v_writelane_b32 v40, s63, 29
1392; GISEL-NEXT:    v_mov_b32_e32 v41, v0
1393; GISEL-NEXT:    s_mov_b64 s[4:5], s[30:31]
1394; GISEL-NEXT:    s_mov_b64 s[6:7], exec
1395; GISEL-NEXT:  .LBB7_1: ; =>This Inner Loop Header: Depth=1
1396; GISEL-NEXT:    v_readfirstlane_b32 s8, v1
1397; GISEL-NEXT:    v_readfirstlane_b32 s9, v2
1398; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
1399; GISEL-NEXT:    s_and_saveexec_b64 s[10:11], vcc
1400; GISEL-NEXT:    v_mov_b32_e32 v0, v41
1401; GISEL-NEXT:    s_swappc_b64 s[30:31], s[8:9]
1402; GISEL-NEXT:    ; implicit-def: $vgpr1_vgpr2
1403; GISEL-NEXT:    s_xor_b64 exec, exec, s[10:11]
1404; GISEL-NEXT:    s_cbranch_execnz .LBB7_1
1405; GISEL-NEXT:  ; %bb.2:
1406; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
1407; GISEL-NEXT:    v_mov_b32_e32 v0, v41
1408; GISEL-NEXT:    v_readlane_b32 s63, v40, 29
1409; GISEL-NEXT:    v_readlane_b32 s62, v40, 28
1410; GISEL-NEXT:    v_readlane_b32 s61, v40, 27
1411; GISEL-NEXT:    v_readlane_b32 s60, v40, 26
1412; GISEL-NEXT:    v_readlane_b32 s59, v40, 25
1413; GISEL-NEXT:    v_readlane_b32 s58, v40, 24
1414; GISEL-NEXT:    v_readlane_b32 s57, v40, 23
1415; GISEL-NEXT:    v_readlane_b32 s56, v40, 22
1416; GISEL-NEXT:    v_readlane_b32 s55, v40, 21
1417; GISEL-NEXT:    v_readlane_b32 s54, v40, 20
1418; GISEL-NEXT:    v_readlane_b32 s53, v40, 19
1419; GISEL-NEXT:    v_readlane_b32 s52, v40, 18
1420; GISEL-NEXT:    v_readlane_b32 s51, v40, 17
1421; GISEL-NEXT:    v_readlane_b32 s50, v40, 16
1422; GISEL-NEXT:    v_readlane_b32 s49, v40, 15
1423; GISEL-NEXT:    v_readlane_b32 s48, v40, 14
1424; GISEL-NEXT:    v_readlane_b32 s47, v40, 13
1425; GISEL-NEXT:    v_readlane_b32 s46, v40, 12
1426; GISEL-NEXT:    v_readlane_b32 s45, v40, 11
1427; GISEL-NEXT:    v_readlane_b32 s44, v40, 10
1428; GISEL-NEXT:    v_readlane_b32 s43, v40, 9
1429; GISEL-NEXT:    v_readlane_b32 s42, v40, 8
1430; GISEL-NEXT:    v_readlane_b32 s41, v40, 7
1431; GISEL-NEXT:    v_readlane_b32 s40, v40, 6
1432; GISEL-NEXT:    v_readlane_b32 s39, v40, 5
1433; GISEL-NEXT:    v_readlane_b32 s38, v40, 4
1434; GISEL-NEXT:    v_readlane_b32 s37, v40, 3
1435; GISEL-NEXT:    v_readlane_b32 s36, v40, 2
1436; GISEL-NEXT:    v_readlane_b32 s35, v40, 1
1437; GISEL-NEXT:    v_readlane_b32 s34, v40, 0
1438; GISEL-NEXT:    buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
1439; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
1440; GISEL-NEXT:    v_readlane_b32 s33, v40, 30
1441; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
1442; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
1443; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
1444; GISEL-NEXT:    s_waitcnt vmcnt(0)
1445; GISEL-NEXT:    s_setpc_b64 s[4:5]
1446  call amdgpu_gfx void %fptr(i32 %i)
1447  ret i32 %i
1448}
1449
1450; Use a variable inside a waterfall loop and use the return variable after the loop.
1451; TODO The argument and return variable could be in the same physical register, but the register
1452; allocator is not able to do that because the return value clashes with the liverange of an
1453; IMPLICIT_DEF of the argument.
1454define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, i32(i32)* %fptr) {
1455; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
1456; GCN:       ; %bb.0:
1457; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1458; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
1459; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1460; GCN-NEXT:    s_mov_b64 exec, s[4:5]
1461; GCN-NEXT:    v_writelane_b32 v40, s33, 30
1462; GCN-NEXT:    s_mov_b32 s33, s32
1463; GCN-NEXT:    s_addk_i32 s32, 0x400
1464; GCN-NEXT:    v_writelane_b32 v40, s34, 0
1465; GCN-NEXT:    v_writelane_b32 v40, s35, 1
1466; GCN-NEXT:    v_writelane_b32 v40, s36, 2
1467; GCN-NEXT:    v_writelane_b32 v40, s37, 3
1468; GCN-NEXT:    v_writelane_b32 v40, s38, 4
1469; GCN-NEXT:    v_writelane_b32 v40, s39, 5
1470; GCN-NEXT:    v_writelane_b32 v40, s40, 6
1471; GCN-NEXT:    v_writelane_b32 v40, s41, 7
1472; GCN-NEXT:    v_writelane_b32 v40, s42, 8
1473; GCN-NEXT:    v_writelane_b32 v40, s43, 9
1474; GCN-NEXT:    v_writelane_b32 v40, s44, 10
1475; GCN-NEXT:    v_writelane_b32 v40, s45, 11
1476; GCN-NEXT:    v_writelane_b32 v40, s46, 12
1477; GCN-NEXT:    v_writelane_b32 v40, s47, 13
1478; GCN-NEXT:    v_writelane_b32 v40, s48, 14
1479; GCN-NEXT:    v_writelane_b32 v40, s49, 15
1480; GCN-NEXT:    v_writelane_b32 v40, s50, 16
1481; GCN-NEXT:    v_writelane_b32 v40, s51, 17
1482; GCN-NEXT:    v_writelane_b32 v40, s52, 18
1483; GCN-NEXT:    v_writelane_b32 v40, s53, 19
1484; GCN-NEXT:    v_writelane_b32 v40, s54, 20
1485; GCN-NEXT:    v_writelane_b32 v40, s55, 21
1486; GCN-NEXT:    v_writelane_b32 v40, s56, 22
1487; GCN-NEXT:    v_writelane_b32 v40, s57, 23
1488; GCN-NEXT:    v_writelane_b32 v40, s58, 24
1489; GCN-NEXT:    v_writelane_b32 v40, s59, 25
1490; GCN-NEXT:    v_writelane_b32 v40, s60, 26
1491; GCN-NEXT:    v_writelane_b32 v40, s61, 27
1492; GCN-NEXT:    v_writelane_b32 v40, s62, 28
1493; GCN-NEXT:    v_writelane_b32 v40, s63, 29
1494; GCN-NEXT:    s_mov_b64 s[4:5], s[30:31]
1495; GCN-NEXT:    s_mov_b64 s[6:7], exec
1496; GCN-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
1497; GCN-NEXT:    v_readfirstlane_b32 s10, v1
1498; GCN-NEXT:    v_readfirstlane_b32 s11, v2
1499; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[10:11], v[1:2]
1500; GCN-NEXT:    s_and_saveexec_b64 s[8:9], vcc
1501; GCN-NEXT:    s_swappc_b64 s[30:31], s[10:11]
1502; GCN-NEXT:    v_mov_b32_e32 v3, v0
1503; GCN-NEXT:    ; implicit-def: $vgpr1_vgpr2
1504; GCN-NEXT:    ; implicit-def: $vgpr0
1505; GCN-NEXT:    s_xor_b64 exec, exec, s[8:9]
1506; GCN-NEXT:    s_cbranch_execnz .LBB8_1
1507; GCN-NEXT:  ; %bb.2:
1508; GCN-NEXT:    s_mov_b64 exec, s[6:7]
1509; GCN-NEXT:    v_mov_b32_e32 v0, v3
1510; GCN-NEXT:    v_readlane_b32 s63, v40, 29
1511; GCN-NEXT:    v_readlane_b32 s62, v40, 28
1512; GCN-NEXT:    v_readlane_b32 s61, v40, 27
1513; GCN-NEXT:    v_readlane_b32 s60, v40, 26
1514; GCN-NEXT:    v_readlane_b32 s59, v40, 25
1515; GCN-NEXT:    v_readlane_b32 s58, v40, 24
1516; GCN-NEXT:    v_readlane_b32 s57, v40, 23
1517; GCN-NEXT:    v_readlane_b32 s56, v40, 22
1518; GCN-NEXT:    v_readlane_b32 s55, v40, 21
1519; GCN-NEXT:    v_readlane_b32 s54, v40, 20
1520; GCN-NEXT:    v_readlane_b32 s53, v40, 19
1521; GCN-NEXT:    v_readlane_b32 s52, v40, 18
1522; GCN-NEXT:    v_readlane_b32 s51, v40, 17
1523; GCN-NEXT:    v_readlane_b32 s50, v40, 16
1524; GCN-NEXT:    v_readlane_b32 s49, v40, 15
1525; GCN-NEXT:    v_readlane_b32 s48, v40, 14
1526; GCN-NEXT:    v_readlane_b32 s47, v40, 13
1527; GCN-NEXT:    v_readlane_b32 s46, v40, 12
1528; GCN-NEXT:    v_readlane_b32 s45, v40, 11
1529; GCN-NEXT:    v_readlane_b32 s44, v40, 10
1530; GCN-NEXT:    v_readlane_b32 s43, v40, 9
1531; GCN-NEXT:    v_readlane_b32 s42, v40, 8
1532; GCN-NEXT:    v_readlane_b32 s41, v40, 7
1533; GCN-NEXT:    v_readlane_b32 s40, v40, 6
1534; GCN-NEXT:    v_readlane_b32 s39, v40, 5
1535; GCN-NEXT:    v_readlane_b32 s38, v40, 4
1536; GCN-NEXT:    v_readlane_b32 s37, v40, 3
1537; GCN-NEXT:    v_readlane_b32 s36, v40, 2
1538; GCN-NEXT:    v_readlane_b32 s35, v40, 1
1539; GCN-NEXT:    v_readlane_b32 s34, v40, 0
1540; GCN-NEXT:    s_addk_i32 s32, 0xfc00
1541; GCN-NEXT:    v_readlane_b32 s33, v40, 30
1542; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
1543; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1544; GCN-NEXT:    s_mov_b64 exec, s[6:7]
1545; GCN-NEXT:    s_waitcnt vmcnt(0)
1546; GCN-NEXT:    s_setpc_b64 s[4:5]
1547;
1548; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
1549; GISEL:       ; %bb.0:
1550; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1551; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
1552; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1553; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
1554; GISEL-NEXT:    v_writelane_b32 v40, s33, 30
1555; GISEL-NEXT:    s_mov_b32 s33, s32
1556; GISEL-NEXT:    s_addk_i32 s32, 0x400
1557; GISEL-NEXT:    v_writelane_b32 v40, s34, 0
1558; GISEL-NEXT:    v_writelane_b32 v40, s35, 1
1559; GISEL-NEXT:    v_writelane_b32 v40, s36, 2
1560; GISEL-NEXT:    v_writelane_b32 v40, s37, 3
1561; GISEL-NEXT:    v_writelane_b32 v40, s38, 4
1562; GISEL-NEXT:    v_writelane_b32 v40, s39, 5
1563; GISEL-NEXT:    v_writelane_b32 v40, s40, 6
1564; GISEL-NEXT:    v_writelane_b32 v40, s41, 7
1565; GISEL-NEXT:    v_writelane_b32 v40, s42, 8
1566; GISEL-NEXT:    v_writelane_b32 v40, s43, 9
1567; GISEL-NEXT:    v_writelane_b32 v40, s44, 10
1568; GISEL-NEXT:    v_writelane_b32 v40, s45, 11
1569; GISEL-NEXT:    v_writelane_b32 v40, s46, 12
1570; GISEL-NEXT:    v_writelane_b32 v40, s47, 13
1571; GISEL-NEXT:    v_writelane_b32 v40, s48, 14
1572; GISEL-NEXT:    v_writelane_b32 v40, s49, 15
1573; GISEL-NEXT:    v_writelane_b32 v40, s50, 16
1574; GISEL-NEXT:    v_writelane_b32 v40, s51, 17
1575; GISEL-NEXT:    v_writelane_b32 v40, s52, 18
1576; GISEL-NEXT:    v_writelane_b32 v40, s53, 19
1577; GISEL-NEXT:    v_writelane_b32 v40, s54, 20
1578; GISEL-NEXT:    v_writelane_b32 v40, s55, 21
1579; GISEL-NEXT:    v_writelane_b32 v40, s56, 22
1580; GISEL-NEXT:    v_writelane_b32 v40, s57, 23
1581; GISEL-NEXT:    v_writelane_b32 v40, s58, 24
1582; GISEL-NEXT:    v_writelane_b32 v40, s59, 25
1583; GISEL-NEXT:    v_writelane_b32 v40, s60, 26
1584; GISEL-NEXT:    v_writelane_b32 v40, s61, 27
1585; GISEL-NEXT:    v_writelane_b32 v40, s62, 28
1586; GISEL-NEXT:    v_writelane_b32 v40, s63, 29
1587; GISEL-NEXT:    s_mov_b64 s[4:5], s[30:31]
1588; GISEL-NEXT:    s_mov_b64 s[6:7], exec
1589; GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
1590; GISEL-NEXT:    v_readfirstlane_b32 s8, v1
1591; GISEL-NEXT:    v_readfirstlane_b32 s9, v2
1592; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
1593; GISEL-NEXT:    s_and_saveexec_b64 s[10:11], vcc
1594; GISEL-NEXT:    s_swappc_b64 s[30:31], s[8:9]
1595; GISEL-NEXT:    v_mov_b32_e32 v3, v0
1596; GISEL-NEXT:    ; implicit-def: $vgpr1_vgpr2
1597; GISEL-NEXT:    ; implicit-def: $vgpr0
1598; GISEL-NEXT:    s_xor_b64 exec, exec, s[10:11]
1599; GISEL-NEXT:    s_cbranch_execnz .LBB8_1
1600; GISEL-NEXT:  ; %bb.2:
1601; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
1602; GISEL-NEXT:    v_mov_b32_e32 v0, v3
1603; GISEL-NEXT:    v_readlane_b32 s63, v40, 29
1604; GISEL-NEXT:    v_readlane_b32 s62, v40, 28
1605; GISEL-NEXT:    v_readlane_b32 s61, v40, 27
1606; GISEL-NEXT:    v_readlane_b32 s60, v40, 26
1607; GISEL-NEXT:    v_readlane_b32 s59, v40, 25
1608; GISEL-NEXT:    v_readlane_b32 s58, v40, 24
1609; GISEL-NEXT:    v_readlane_b32 s57, v40, 23
1610; GISEL-NEXT:    v_readlane_b32 s56, v40, 22
1611; GISEL-NEXT:    v_readlane_b32 s55, v40, 21
1612; GISEL-NEXT:    v_readlane_b32 s54, v40, 20
1613; GISEL-NEXT:    v_readlane_b32 s53, v40, 19
1614; GISEL-NEXT:    v_readlane_b32 s52, v40, 18
1615; GISEL-NEXT:    v_readlane_b32 s51, v40, 17
1616; GISEL-NEXT:    v_readlane_b32 s50, v40, 16
1617; GISEL-NEXT:    v_readlane_b32 s49, v40, 15
1618; GISEL-NEXT:    v_readlane_b32 s48, v40, 14
1619; GISEL-NEXT:    v_readlane_b32 s47, v40, 13
1620; GISEL-NEXT:    v_readlane_b32 s46, v40, 12
1621; GISEL-NEXT:    v_readlane_b32 s45, v40, 11
1622; GISEL-NEXT:    v_readlane_b32 s44, v40, 10
1623; GISEL-NEXT:    v_readlane_b32 s43, v40, 9
1624; GISEL-NEXT:    v_readlane_b32 s42, v40, 8
1625; GISEL-NEXT:    v_readlane_b32 s41, v40, 7
1626; GISEL-NEXT:    v_readlane_b32 s40, v40, 6
1627; GISEL-NEXT:    v_readlane_b32 s39, v40, 5
1628; GISEL-NEXT:    v_readlane_b32 s38, v40, 4
1629; GISEL-NEXT:    v_readlane_b32 s37, v40, 3
1630; GISEL-NEXT:    v_readlane_b32 s36, v40, 2
1631; GISEL-NEXT:    v_readlane_b32 s35, v40, 1
1632; GISEL-NEXT:    v_readlane_b32 s34, v40, 0
1633; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
1634; GISEL-NEXT:    v_readlane_b32 s33, v40, 30
1635; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
1636; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1637; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
1638; GISEL-NEXT:    s_waitcnt vmcnt(0)
1639; GISEL-NEXT:    s_setpc_b64 s[4:5]
1640  %ret = call amdgpu_gfx i32 %fptr(i32 %i)
1641  ret i32 %ret
1642}
1643
1644; Calling a vgpr can never be a tail call.
1645define void @test_indirect_tail_call_vgpr_ptr(void()* %fptr) {
1646; GCN-LABEL: test_indirect_tail_call_vgpr_ptr:
1647; GCN:       ; %bb.0:
1648; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1649; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
1650; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1651; GCN-NEXT:    s_mov_b64 exec, s[4:5]
1652; GCN-NEXT:    v_writelane_b32 v40, s33, 30
1653; GCN-NEXT:    s_mov_b32 s33, s32
1654; GCN-NEXT:    s_addk_i32 s32, 0x400
1655; GCN-NEXT:    v_writelane_b32 v40, s34, 0
1656; GCN-NEXT:    v_writelane_b32 v40, s35, 1
1657; GCN-NEXT:    v_writelane_b32 v40, s36, 2
1658; GCN-NEXT:    v_writelane_b32 v40, s37, 3
1659; GCN-NEXT:    v_writelane_b32 v40, s38, 4
1660; GCN-NEXT:    v_writelane_b32 v40, s39, 5
1661; GCN-NEXT:    v_writelane_b32 v40, s40, 6
1662; GCN-NEXT:    v_writelane_b32 v40, s41, 7
1663; GCN-NEXT:    v_writelane_b32 v40, s42, 8
1664; GCN-NEXT:    v_writelane_b32 v40, s43, 9
1665; GCN-NEXT:    v_writelane_b32 v40, s44, 10
1666; GCN-NEXT:    v_writelane_b32 v40, s45, 11
1667; GCN-NEXT:    v_writelane_b32 v40, s46, 12
1668; GCN-NEXT:    v_writelane_b32 v40, s47, 13
1669; GCN-NEXT:    v_writelane_b32 v40, s48, 14
1670; GCN-NEXT:    v_writelane_b32 v40, s49, 15
1671; GCN-NEXT:    v_writelane_b32 v40, s50, 16
1672; GCN-NEXT:    v_writelane_b32 v40, s51, 17
1673; GCN-NEXT:    v_writelane_b32 v40, s52, 18
1674; GCN-NEXT:    v_writelane_b32 v40, s53, 19
1675; GCN-NEXT:    v_writelane_b32 v40, s54, 20
1676; GCN-NEXT:    v_writelane_b32 v40, s55, 21
1677; GCN-NEXT:    v_writelane_b32 v40, s56, 22
1678; GCN-NEXT:    v_writelane_b32 v40, s57, 23
1679; GCN-NEXT:    v_writelane_b32 v40, s58, 24
1680; GCN-NEXT:    v_writelane_b32 v40, s59, 25
1681; GCN-NEXT:    v_writelane_b32 v40, s60, 26
1682; GCN-NEXT:    v_writelane_b32 v40, s61, 27
1683; GCN-NEXT:    v_writelane_b32 v40, s62, 28
1684; GCN-NEXT:    v_writelane_b32 v40, s63, 29
1685; GCN-NEXT:    s_mov_b64 s[4:5], s[30:31]
1686; GCN-NEXT:    s_mov_b64 s[6:7], exec
1687; GCN-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
1688; GCN-NEXT:    v_readfirstlane_b32 s10, v0
1689; GCN-NEXT:    v_readfirstlane_b32 s11, v1
1690; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1]
1691; GCN-NEXT:    s_and_saveexec_b64 s[8:9], vcc
1692; GCN-NEXT:    s_swappc_b64 s[30:31], s[10:11]
1693; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
1694; GCN-NEXT:    s_xor_b64 exec, exec, s[8:9]
1695; GCN-NEXT:    s_cbranch_execnz .LBB9_1
1696; GCN-NEXT:  ; %bb.2:
1697; GCN-NEXT:    s_mov_b64 exec, s[6:7]
1698; GCN-NEXT:    v_readlane_b32 s63, v40, 29
1699; GCN-NEXT:    v_readlane_b32 s62, v40, 28
1700; GCN-NEXT:    v_readlane_b32 s61, v40, 27
1701; GCN-NEXT:    v_readlane_b32 s60, v40, 26
1702; GCN-NEXT:    v_readlane_b32 s59, v40, 25
1703; GCN-NEXT:    v_readlane_b32 s58, v40, 24
1704; GCN-NEXT:    v_readlane_b32 s57, v40, 23
1705; GCN-NEXT:    v_readlane_b32 s56, v40, 22
1706; GCN-NEXT:    v_readlane_b32 s55, v40, 21
1707; GCN-NEXT:    v_readlane_b32 s54, v40, 20
1708; GCN-NEXT:    v_readlane_b32 s53, v40, 19
1709; GCN-NEXT:    v_readlane_b32 s52, v40, 18
1710; GCN-NEXT:    v_readlane_b32 s51, v40, 17
1711; GCN-NEXT:    v_readlane_b32 s50, v40, 16
1712; GCN-NEXT:    v_readlane_b32 s49, v40, 15
1713; GCN-NEXT:    v_readlane_b32 s48, v40, 14
1714; GCN-NEXT:    v_readlane_b32 s47, v40, 13
1715; GCN-NEXT:    v_readlane_b32 s46, v40, 12
1716; GCN-NEXT:    v_readlane_b32 s45, v40, 11
1717; GCN-NEXT:    v_readlane_b32 s44, v40, 10
1718; GCN-NEXT:    v_readlane_b32 s43, v40, 9
1719; GCN-NEXT:    v_readlane_b32 s42, v40, 8
1720; GCN-NEXT:    v_readlane_b32 s41, v40, 7
1721; GCN-NEXT:    v_readlane_b32 s40, v40, 6
1722; GCN-NEXT:    v_readlane_b32 s39, v40, 5
1723; GCN-NEXT:    v_readlane_b32 s38, v40, 4
1724; GCN-NEXT:    v_readlane_b32 s37, v40, 3
1725; GCN-NEXT:    v_readlane_b32 s36, v40, 2
1726; GCN-NEXT:    v_readlane_b32 s35, v40, 1
1727; GCN-NEXT:    v_readlane_b32 s34, v40, 0
1728; GCN-NEXT:    s_addk_i32 s32, 0xfc00
1729; GCN-NEXT:    v_readlane_b32 s33, v40, 30
1730; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
1731; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1732; GCN-NEXT:    s_mov_b64 exec, s[6:7]
1733; GCN-NEXT:    s_waitcnt vmcnt(0)
1734; GCN-NEXT:    s_setpc_b64 s[4:5]
1735;
1736; GISEL-LABEL: test_indirect_tail_call_vgpr_ptr:
1737; GISEL:       ; %bb.0:
1738; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1739; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
1740; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1741; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
1742; GISEL-NEXT:    v_writelane_b32 v40, s33, 30
1743; GISEL-NEXT:    s_mov_b32 s33, s32
1744; GISEL-NEXT:    s_addk_i32 s32, 0x400
1745; GISEL-NEXT:    v_writelane_b32 v40, s34, 0
1746; GISEL-NEXT:    v_writelane_b32 v40, s35, 1
1747; GISEL-NEXT:    v_writelane_b32 v40, s36, 2
1748; GISEL-NEXT:    v_writelane_b32 v40, s37, 3
1749; GISEL-NEXT:    v_writelane_b32 v40, s38, 4
1750; GISEL-NEXT:    v_writelane_b32 v40, s39, 5
1751; GISEL-NEXT:    v_writelane_b32 v40, s40, 6
1752; GISEL-NEXT:    v_writelane_b32 v40, s41, 7
1753; GISEL-NEXT:    v_writelane_b32 v40, s42, 8
1754; GISEL-NEXT:    v_writelane_b32 v40, s43, 9
1755; GISEL-NEXT:    v_writelane_b32 v40, s44, 10
1756; GISEL-NEXT:    v_writelane_b32 v40, s45, 11
1757; GISEL-NEXT:    v_writelane_b32 v40, s46, 12
1758; GISEL-NEXT:    v_writelane_b32 v40, s47, 13
1759; GISEL-NEXT:    v_writelane_b32 v40, s48, 14
1760; GISEL-NEXT:    v_writelane_b32 v40, s49, 15
1761; GISEL-NEXT:    v_writelane_b32 v40, s50, 16
1762; GISEL-NEXT:    v_writelane_b32 v40, s51, 17
1763; GISEL-NEXT:    v_writelane_b32 v40, s52, 18
1764; GISEL-NEXT:    v_writelane_b32 v40, s53, 19
1765; GISEL-NEXT:    v_writelane_b32 v40, s54, 20
1766; GISEL-NEXT:    v_writelane_b32 v40, s55, 21
1767; GISEL-NEXT:    v_writelane_b32 v40, s56, 22
1768; GISEL-NEXT:    v_writelane_b32 v40, s57, 23
1769; GISEL-NEXT:    v_writelane_b32 v40, s58, 24
1770; GISEL-NEXT:    v_writelane_b32 v40, s59, 25
1771; GISEL-NEXT:    v_writelane_b32 v40, s60, 26
1772; GISEL-NEXT:    v_writelane_b32 v40, s61, 27
1773; GISEL-NEXT:    v_writelane_b32 v40, s62, 28
1774; GISEL-NEXT:    v_writelane_b32 v40, s63, 29
1775; GISEL-NEXT:    s_mov_b64 s[4:5], s[30:31]
1776; GISEL-NEXT:    s_mov_b64 s[6:7], exec
1777; GISEL-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
1778; GISEL-NEXT:    v_readfirstlane_b32 s8, v0
1779; GISEL-NEXT:    v_readfirstlane_b32 s9, v1
1780; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1]
1781; GISEL-NEXT:    s_and_saveexec_b64 s[10:11], vcc
1782; GISEL-NEXT:    s_swappc_b64 s[30:31], s[8:9]
1783; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1
1784; GISEL-NEXT:    s_xor_b64 exec, exec, s[10:11]
1785; GISEL-NEXT:    s_cbranch_execnz .LBB9_1
1786; GISEL-NEXT:  ; %bb.2:
1787; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
1788; GISEL-NEXT:    v_readlane_b32 s63, v40, 29
1789; GISEL-NEXT:    v_readlane_b32 s62, v40, 28
1790; GISEL-NEXT:    v_readlane_b32 s61, v40, 27
1791; GISEL-NEXT:    v_readlane_b32 s60, v40, 26
1792; GISEL-NEXT:    v_readlane_b32 s59, v40, 25
1793; GISEL-NEXT:    v_readlane_b32 s58, v40, 24
1794; GISEL-NEXT:    v_readlane_b32 s57, v40, 23
1795; GISEL-NEXT:    v_readlane_b32 s56, v40, 22
1796; GISEL-NEXT:    v_readlane_b32 s55, v40, 21
1797; GISEL-NEXT:    v_readlane_b32 s54, v40, 20
1798; GISEL-NEXT:    v_readlane_b32 s53, v40, 19
1799; GISEL-NEXT:    v_readlane_b32 s52, v40, 18
1800; GISEL-NEXT:    v_readlane_b32 s51, v40, 17
1801; GISEL-NEXT:    v_readlane_b32 s50, v40, 16
1802; GISEL-NEXT:    v_readlane_b32 s49, v40, 15
1803; GISEL-NEXT:    v_readlane_b32 s48, v40, 14
1804; GISEL-NEXT:    v_readlane_b32 s47, v40, 13
1805; GISEL-NEXT:    v_readlane_b32 s46, v40, 12
1806; GISEL-NEXT:    v_readlane_b32 s45, v40, 11
1807; GISEL-NEXT:    v_readlane_b32 s44, v40, 10
1808; GISEL-NEXT:    v_readlane_b32 s43, v40, 9
1809; GISEL-NEXT:    v_readlane_b32 s42, v40, 8
1810; GISEL-NEXT:    v_readlane_b32 s41, v40, 7
1811; GISEL-NEXT:    v_readlane_b32 s40, v40, 6
1812; GISEL-NEXT:    v_readlane_b32 s39, v40, 5
1813; GISEL-NEXT:    v_readlane_b32 s38, v40, 4
1814; GISEL-NEXT:    v_readlane_b32 s37, v40, 3
1815; GISEL-NEXT:    v_readlane_b32 s36, v40, 2
1816; GISEL-NEXT:    v_readlane_b32 s35, v40, 1
1817; GISEL-NEXT:    v_readlane_b32 s34, v40, 0
1818; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
1819; GISEL-NEXT:    v_readlane_b32 s33, v40, 30
1820; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
1821; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
1822; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
1823; GISEL-NEXT:    s_waitcnt vmcnt(0)
1824; GISEL-NEXT:    s_setpc_b64 s[4:5]
1825  tail call amdgpu_gfx void %fptr()
1826  ret void
1827}
1828