1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=GISEL %s 4 5@gv.fptr0 = external hidden unnamed_addr addrspace(4) constant void()*, align 4 6@gv.fptr1 = external hidden unnamed_addr addrspace(4) constant void(i32)*, align 4 7 8define amdgpu_kernel void @test_indirect_call_sgpr_ptr(i8) { 9; GCN-LABEL: test_indirect_call_sgpr_ptr: 10; GCN: .amd_kernel_code_t 11; GCN-NEXT: amd_code_version_major = 1 12; GCN-NEXT: amd_code_version_minor = 2 13; GCN-NEXT: amd_machine_kind = 1 14; GCN-NEXT: amd_machine_version_major = 7 15; GCN-NEXT: amd_machine_version_minor = 0 16; GCN-NEXT: amd_machine_version_stepping = 0 17; GCN-NEXT: kernel_code_entry_byte_offset = 256 18; GCN-NEXT: kernel_code_prefetch_byte_size = 0 19; GCN-NEXT: granulated_workitem_vgpr_count = 10 20; GCN-NEXT: granulated_wavefront_sgpr_count = 8 21; GCN-NEXT: priority = 0 22; GCN-NEXT: float_mode = 240 23; GCN-NEXT: priv = 0 24; GCN-NEXT: enable_dx10_clamp = 1 25; GCN-NEXT: debug_mode = 0 26; GCN-NEXT: enable_ieee_mode = 1 27; GCN-NEXT: enable_wgp_mode = 0 28; GCN-NEXT: enable_mem_ordered = 0 29; GCN-NEXT: enable_fwd_progress = 0 30; GCN-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1 31; GCN-NEXT: user_sgpr_count = 14 32; GCN-NEXT: enable_trap_handler = 0 33; GCN-NEXT: enable_sgpr_workgroup_id_x = 1 34; GCN-NEXT: enable_sgpr_workgroup_id_y = 1 35; GCN-NEXT: enable_sgpr_workgroup_id_z = 1 36; GCN-NEXT: enable_sgpr_workgroup_info = 0 37; GCN-NEXT: enable_vgpr_workitem_id = 2 38; GCN-NEXT: enable_exception_msb = 0 39; GCN-NEXT: granulated_lds_size = 0 40; GCN-NEXT: enable_exception = 0 41; GCN-NEXT: enable_sgpr_private_segment_buffer = 1 42; GCN-NEXT: enable_sgpr_dispatch_ptr = 1 43; GCN-NEXT: enable_sgpr_queue_ptr = 1 44; GCN-NEXT: enable_sgpr_kernarg_segment_ptr = 1 45; GCN-NEXT: enable_sgpr_dispatch_id = 1 46; GCN-NEXT: enable_sgpr_flat_scratch_init = 1 47; GCN-NEXT: enable_sgpr_private_segment_size = 0 48; GCN-NEXT: enable_sgpr_grid_workgroup_count_x = 0 49; GCN-NEXT: enable_sgpr_grid_workgroup_count_y = 0 50; GCN-NEXT: enable_sgpr_grid_workgroup_count_z = 0 51; GCN-NEXT: enable_wavefront_size32 = 0 52; GCN-NEXT: enable_ordered_append_gds = 0 53; GCN-NEXT: private_element_size = 1 54; GCN-NEXT: is_ptr64 = 1 55; GCN-NEXT: is_dynamic_callstack = 1 56; GCN-NEXT: is_debug_enabled = 0 57; GCN-NEXT: is_xnack_enabled = 0 58; GCN-NEXT: workitem_private_segment_byte_size = 16384 59; GCN-NEXT: workgroup_group_segment_byte_size = 0 60; GCN-NEXT: gds_segment_byte_size = 0 61; GCN-NEXT: kernarg_segment_byte_size = 64 62; GCN-NEXT: workgroup_fbarrier_count = 0 63; GCN-NEXT: wavefront_sgpr_count = 68 64; GCN-NEXT: workitem_vgpr_count = 42 65; GCN-NEXT: reserved_vgpr_first = 0 66; GCN-NEXT: reserved_vgpr_count = 0 67; GCN-NEXT: reserved_sgpr_first = 0 68; GCN-NEXT: reserved_sgpr_count = 0 69; GCN-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 70; GCN-NEXT: debug_private_segment_buffer_sgpr = 0 71; GCN-NEXT: kernarg_segment_alignment = 4 72; GCN-NEXT: group_segment_alignment = 4 73; GCN-NEXT: private_segment_alignment = 4 74; GCN-NEXT: wavefront_size = 6 75; GCN-NEXT: call_convention = -1 76; GCN-NEXT: runtime_loader_kernel_symbol = 0 77; GCN-NEXT: .end_amd_kernel_code_t 78; GCN-NEXT: ; %bb.0: 79; GCN-NEXT: s_mov_b32 s32, 0 80; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13 81; GCN-NEXT: s_add_i32 s12, s12, s17 82; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 83; GCN-NEXT: s_add_u32 s0, s0, s17 84; GCN-NEXT: s_addc_u32 s1, s1, 0 85; GCN-NEXT: s_mov_b32 s13, s15 86; GCN-NEXT: s_mov_b32 s12, s14 87; GCN-NEXT: s_getpc_b64 s[14:15] 88; GCN-NEXT: s_add_u32 s14, s14, gv.fptr0@rel32@lo+4 89; GCN-NEXT: s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12 90; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2 91; GCN-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 92; GCN-NEXT: s_add_u32 s8, s8, 8 93; GCN-NEXT: s_addc_u32 s9, s9, 0 94; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 95; GCN-NEXT: v_or_b32_e32 v0, v0, v1 96; GCN-NEXT: v_or_b32_e32 v31, v0, v2 97; GCN-NEXT: s_mov_b32 s14, s16 98; GCN-NEXT: s_waitcnt lgkmcnt(0) 99; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19] 100; GCN-NEXT: s_endpgm 101; 102; GISEL-LABEL: test_indirect_call_sgpr_ptr: 103; GISEL: .amd_kernel_code_t 104; GISEL-NEXT: amd_code_version_major = 1 105; GISEL-NEXT: amd_code_version_minor = 2 106; GISEL-NEXT: amd_machine_kind = 1 107; GISEL-NEXT: amd_machine_version_major = 7 108; GISEL-NEXT: amd_machine_version_minor = 0 109; GISEL-NEXT: amd_machine_version_stepping = 0 110; GISEL-NEXT: kernel_code_entry_byte_offset = 256 111; GISEL-NEXT: kernel_code_prefetch_byte_size = 0 112; GISEL-NEXT: granulated_workitem_vgpr_count = 10 113; GISEL-NEXT: granulated_wavefront_sgpr_count = 8 114; GISEL-NEXT: priority = 0 115; GISEL-NEXT: float_mode = 240 116; GISEL-NEXT: priv = 0 117; GISEL-NEXT: enable_dx10_clamp = 1 118; GISEL-NEXT: debug_mode = 0 119; GISEL-NEXT: enable_ieee_mode = 1 120; GISEL-NEXT: enable_wgp_mode = 0 121; GISEL-NEXT: enable_mem_ordered = 0 122; GISEL-NEXT: enable_fwd_progress = 0 123; GISEL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1 124; GISEL-NEXT: user_sgpr_count = 14 125; GISEL-NEXT: enable_trap_handler = 0 126; GISEL-NEXT: enable_sgpr_workgroup_id_x = 1 127; GISEL-NEXT: enable_sgpr_workgroup_id_y = 1 128; GISEL-NEXT: enable_sgpr_workgroup_id_z = 1 129; GISEL-NEXT: enable_sgpr_workgroup_info = 0 130; GISEL-NEXT: enable_vgpr_workitem_id = 2 131; GISEL-NEXT: enable_exception_msb = 0 132; GISEL-NEXT: granulated_lds_size = 0 133; GISEL-NEXT: enable_exception = 0 134; GISEL-NEXT: enable_sgpr_private_segment_buffer = 1 135; GISEL-NEXT: enable_sgpr_dispatch_ptr = 1 136; GISEL-NEXT: enable_sgpr_queue_ptr = 1 137; GISEL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 138; GISEL-NEXT: enable_sgpr_dispatch_id = 1 139; GISEL-NEXT: enable_sgpr_flat_scratch_init = 1 140; GISEL-NEXT: enable_sgpr_private_segment_size = 0 141; GISEL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 142; GISEL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 143; GISEL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 144; GISEL-NEXT: enable_wavefront_size32 = 0 145; GISEL-NEXT: enable_ordered_append_gds = 0 146; GISEL-NEXT: private_element_size = 1 147; GISEL-NEXT: is_ptr64 = 1 148; GISEL-NEXT: is_dynamic_callstack = 1 149; GISEL-NEXT: is_debug_enabled = 0 150; GISEL-NEXT: is_xnack_enabled = 0 151; GISEL-NEXT: workitem_private_segment_byte_size = 16384 152; GISEL-NEXT: workgroup_group_segment_byte_size = 0 153; GISEL-NEXT: gds_segment_byte_size = 0 154; GISEL-NEXT: kernarg_segment_byte_size = 64 155; GISEL-NEXT: workgroup_fbarrier_count = 0 156; GISEL-NEXT: wavefront_sgpr_count = 68 157; GISEL-NEXT: workitem_vgpr_count = 42 158; GISEL-NEXT: reserved_vgpr_first = 0 159; GISEL-NEXT: reserved_vgpr_count = 0 160; GISEL-NEXT: reserved_sgpr_first = 0 161; GISEL-NEXT: reserved_sgpr_count = 0 162; GISEL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 163; GISEL-NEXT: debug_private_segment_buffer_sgpr = 0 164; GISEL-NEXT: kernarg_segment_alignment = 4 165; GISEL-NEXT: group_segment_alignment = 4 166; GISEL-NEXT: private_segment_alignment = 4 167; GISEL-NEXT: wavefront_size = 6 168; GISEL-NEXT: call_convention = -1 169; GISEL-NEXT: runtime_loader_kernel_symbol = 0 170; GISEL-NEXT: .end_amd_kernel_code_t 171; GISEL-NEXT: ; %bb.0: 172; GISEL-NEXT: s_mov_b32 s32, 0 173; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 174; GISEL-NEXT: s_add_i32 s12, s12, s17 175; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 176; GISEL-NEXT: s_add_u32 s0, s0, s17 177; GISEL-NEXT: s_addc_u32 s1, s1, 0 178; GISEL-NEXT: s_mov_b32 s13, s15 179; GISEL-NEXT: s_mov_b32 s12, s14 180; GISEL-NEXT: s_getpc_b64 s[14:15] 181; GISEL-NEXT: s_add_u32 s14, s14, gv.fptr0@rel32@lo+4 182; GISEL-NEXT: s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12 183; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 184; GISEL-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 185; GISEL-NEXT: s_add_u32 s8, s8, 8 186; GISEL-NEXT: s_addc_u32 s9, s9, 0 187; GISEL-NEXT: v_or_b32_e32 v0, v0, v1 188; GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2 189; GISEL-NEXT: v_or_b32_e32 v31, v0, v1 190; GISEL-NEXT: s_mov_b32 s14, s16 191; GISEL-NEXT: s_waitcnt lgkmcnt(0) 192; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19] 193; GISEL-NEXT: s_endpgm 194 %fptr = load void()*, void()* addrspace(4)* @gv.fptr0 195 call void %fptr() 196 ret void 197} 198 199define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg(i8) { 200; GCN-LABEL: test_indirect_call_sgpr_ptr_arg: 201; GCN: .amd_kernel_code_t 202; GCN-NEXT: amd_code_version_major = 1 203; GCN-NEXT: amd_code_version_minor = 2 204; GCN-NEXT: amd_machine_kind = 1 205; GCN-NEXT: amd_machine_version_major = 7 206; GCN-NEXT: amd_machine_version_minor = 0 207; GCN-NEXT: amd_machine_version_stepping = 0 208; GCN-NEXT: kernel_code_entry_byte_offset = 256 209; GCN-NEXT: kernel_code_prefetch_byte_size = 0 210; GCN-NEXT: granulated_workitem_vgpr_count = 10 211; GCN-NEXT: granulated_wavefront_sgpr_count = 8 212; GCN-NEXT: priority = 0 213; GCN-NEXT: float_mode = 240 214; GCN-NEXT: priv = 0 215; GCN-NEXT: enable_dx10_clamp = 1 216; GCN-NEXT: debug_mode = 0 217; GCN-NEXT: enable_ieee_mode = 1 218; GCN-NEXT: enable_wgp_mode = 0 219; GCN-NEXT: enable_mem_ordered = 0 220; GCN-NEXT: enable_fwd_progress = 0 221; GCN-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1 222; GCN-NEXT: user_sgpr_count = 14 223; GCN-NEXT: enable_trap_handler = 0 224; GCN-NEXT: enable_sgpr_workgroup_id_x = 1 225; GCN-NEXT: enable_sgpr_workgroup_id_y = 1 226; GCN-NEXT: enable_sgpr_workgroup_id_z = 1 227; GCN-NEXT: enable_sgpr_workgroup_info = 0 228; GCN-NEXT: enable_vgpr_workitem_id = 2 229; GCN-NEXT: enable_exception_msb = 0 230; GCN-NEXT: granulated_lds_size = 0 231; GCN-NEXT: enable_exception = 0 232; GCN-NEXT: enable_sgpr_private_segment_buffer = 1 233; GCN-NEXT: enable_sgpr_dispatch_ptr = 1 234; GCN-NEXT: enable_sgpr_queue_ptr = 1 235; GCN-NEXT: enable_sgpr_kernarg_segment_ptr = 1 236; GCN-NEXT: enable_sgpr_dispatch_id = 1 237; GCN-NEXT: enable_sgpr_flat_scratch_init = 1 238; GCN-NEXT: enable_sgpr_private_segment_size = 0 239; GCN-NEXT: enable_sgpr_grid_workgroup_count_x = 0 240; GCN-NEXT: enable_sgpr_grid_workgroup_count_y = 0 241; GCN-NEXT: enable_sgpr_grid_workgroup_count_z = 0 242; GCN-NEXT: enable_wavefront_size32 = 0 243; GCN-NEXT: enable_ordered_append_gds = 0 244; GCN-NEXT: private_element_size = 1 245; GCN-NEXT: is_ptr64 = 1 246; GCN-NEXT: is_dynamic_callstack = 1 247; GCN-NEXT: is_debug_enabled = 0 248; GCN-NEXT: is_xnack_enabled = 0 249; GCN-NEXT: workitem_private_segment_byte_size = 16384 250; GCN-NEXT: workgroup_group_segment_byte_size = 0 251; GCN-NEXT: gds_segment_byte_size = 0 252; GCN-NEXT: kernarg_segment_byte_size = 64 253; GCN-NEXT: workgroup_fbarrier_count = 0 254; GCN-NEXT: wavefront_sgpr_count = 68 255; GCN-NEXT: workitem_vgpr_count = 42 256; GCN-NEXT: reserved_vgpr_first = 0 257; GCN-NEXT: reserved_vgpr_count = 0 258; GCN-NEXT: reserved_sgpr_first = 0 259; GCN-NEXT: reserved_sgpr_count = 0 260; GCN-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 261; GCN-NEXT: debug_private_segment_buffer_sgpr = 0 262; GCN-NEXT: kernarg_segment_alignment = 4 263; GCN-NEXT: group_segment_alignment = 4 264; GCN-NEXT: private_segment_alignment = 4 265; GCN-NEXT: wavefront_size = 6 266; GCN-NEXT: call_convention = -1 267; GCN-NEXT: runtime_loader_kernel_symbol = 0 268; GCN-NEXT: .end_amd_kernel_code_t 269; GCN-NEXT: ; %bb.0: 270; GCN-NEXT: s_mov_b32 s32, 0 271; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13 272; GCN-NEXT: s_add_i32 s12, s12, s17 273; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 274; GCN-NEXT: s_add_u32 s0, s0, s17 275; GCN-NEXT: s_addc_u32 s1, s1, 0 276; GCN-NEXT: s_mov_b32 s13, s15 277; GCN-NEXT: s_mov_b32 s12, s14 278; GCN-NEXT: s_getpc_b64 s[14:15] 279; GCN-NEXT: s_add_u32 s14, s14, gv.fptr1@rel32@lo+4 280; GCN-NEXT: s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12 281; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2 282; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 283; GCN-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 284; GCN-NEXT: s_add_u32 s8, s8, 8 285; GCN-NEXT: s_addc_u32 s9, s9, 0 286; GCN-NEXT: v_or_b32_e32 v0, v0, v1 287; GCN-NEXT: v_or_b32_e32 v31, v0, v2 288; GCN-NEXT: v_mov_b32_e32 v0, 0x7b 289; GCN-NEXT: s_mov_b32 s14, s16 290; GCN-NEXT: s_waitcnt lgkmcnt(0) 291; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19] 292; GCN-NEXT: s_endpgm 293; 294; GISEL-LABEL: test_indirect_call_sgpr_ptr_arg: 295; GISEL: .amd_kernel_code_t 296; GISEL-NEXT: amd_code_version_major = 1 297; GISEL-NEXT: amd_code_version_minor = 2 298; GISEL-NEXT: amd_machine_kind = 1 299; GISEL-NEXT: amd_machine_version_major = 7 300; GISEL-NEXT: amd_machine_version_minor = 0 301; GISEL-NEXT: amd_machine_version_stepping = 0 302; GISEL-NEXT: kernel_code_entry_byte_offset = 256 303; GISEL-NEXT: kernel_code_prefetch_byte_size = 0 304; GISEL-NEXT: granulated_workitem_vgpr_count = 10 305; GISEL-NEXT: granulated_wavefront_sgpr_count = 8 306; GISEL-NEXT: priority = 0 307; GISEL-NEXT: float_mode = 240 308; GISEL-NEXT: priv = 0 309; GISEL-NEXT: enable_dx10_clamp = 1 310; GISEL-NEXT: debug_mode = 0 311; GISEL-NEXT: enable_ieee_mode = 1 312; GISEL-NEXT: enable_wgp_mode = 0 313; GISEL-NEXT: enable_mem_ordered = 0 314; GISEL-NEXT: enable_fwd_progress = 0 315; GISEL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1 316; GISEL-NEXT: user_sgpr_count = 14 317; GISEL-NEXT: enable_trap_handler = 0 318; GISEL-NEXT: enable_sgpr_workgroup_id_x = 1 319; GISEL-NEXT: enable_sgpr_workgroup_id_y = 1 320; GISEL-NEXT: enable_sgpr_workgroup_id_z = 1 321; GISEL-NEXT: enable_sgpr_workgroup_info = 0 322; GISEL-NEXT: enable_vgpr_workitem_id = 2 323; GISEL-NEXT: enable_exception_msb = 0 324; GISEL-NEXT: granulated_lds_size = 0 325; GISEL-NEXT: enable_exception = 0 326; GISEL-NEXT: enable_sgpr_private_segment_buffer = 1 327; GISEL-NEXT: enable_sgpr_dispatch_ptr = 1 328; GISEL-NEXT: enable_sgpr_queue_ptr = 1 329; GISEL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 330; GISEL-NEXT: enable_sgpr_dispatch_id = 1 331; GISEL-NEXT: enable_sgpr_flat_scratch_init = 1 332; GISEL-NEXT: enable_sgpr_private_segment_size = 0 333; GISEL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 334; GISEL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 335; GISEL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 336; GISEL-NEXT: enable_wavefront_size32 = 0 337; GISEL-NEXT: enable_ordered_append_gds = 0 338; GISEL-NEXT: private_element_size = 1 339; GISEL-NEXT: is_ptr64 = 1 340; GISEL-NEXT: is_dynamic_callstack = 1 341; GISEL-NEXT: is_debug_enabled = 0 342; GISEL-NEXT: is_xnack_enabled = 0 343; GISEL-NEXT: workitem_private_segment_byte_size = 16384 344; GISEL-NEXT: workgroup_group_segment_byte_size = 0 345; GISEL-NEXT: gds_segment_byte_size = 0 346; GISEL-NEXT: kernarg_segment_byte_size = 64 347; GISEL-NEXT: workgroup_fbarrier_count = 0 348; GISEL-NEXT: wavefront_sgpr_count = 68 349; GISEL-NEXT: workitem_vgpr_count = 42 350; GISEL-NEXT: reserved_vgpr_first = 0 351; GISEL-NEXT: reserved_vgpr_count = 0 352; GISEL-NEXT: reserved_sgpr_first = 0 353; GISEL-NEXT: reserved_sgpr_count = 0 354; GISEL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 355; GISEL-NEXT: debug_private_segment_buffer_sgpr = 0 356; GISEL-NEXT: kernarg_segment_alignment = 4 357; GISEL-NEXT: group_segment_alignment = 4 358; GISEL-NEXT: private_segment_alignment = 4 359; GISEL-NEXT: wavefront_size = 6 360; GISEL-NEXT: call_convention = -1 361; GISEL-NEXT: runtime_loader_kernel_symbol = 0 362; GISEL-NEXT: .end_amd_kernel_code_t 363; GISEL-NEXT: ; %bb.0: 364; GISEL-NEXT: s_mov_b32 s32, 0 365; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 366; GISEL-NEXT: s_add_i32 s12, s12, s17 367; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 368; GISEL-NEXT: s_add_u32 s0, s0, s17 369; GISEL-NEXT: s_addc_u32 s1, s1, 0 370; GISEL-NEXT: s_mov_b32 s13, s15 371; GISEL-NEXT: s_mov_b32 s12, s14 372; GISEL-NEXT: s_getpc_b64 s[14:15] 373; GISEL-NEXT: s_add_u32 s14, s14, gv.fptr1@rel32@lo+4 374; GISEL-NEXT: s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12 375; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 376; GISEL-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 377; GISEL-NEXT: s_add_u32 s8, s8, 8 378; GISEL-NEXT: v_or_b32_e32 v0, v0, v1 379; GISEL-NEXT: s_addc_u32 s9, s9, 0 380; GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2 381; GISEL-NEXT: v_or_b32_e32 v31, v0, v1 382; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b 383; GISEL-NEXT: s_mov_b32 s14, s16 384; GISEL-NEXT: s_waitcnt lgkmcnt(0) 385; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19] 386; GISEL-NEXT: s_endpgm 387 %fptr = load void(i32)*, void(i32)* addrspace(4)* @gv.fptr1 388 call void %fptr(i32 123) 389 ret void 390} 391 392define void @test_indirect_call_vgpr_ptr(void()* %fptr) { 393; GCN-LABEL: test_indirect_call_vgpr_ptr: 394; GCN: ; %bb.0: 395; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 396; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 397; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 398; GCN-NEXT: s_mov_b64 exec, s[16:17] 399; GCN-NEXT: v_writelane_b32 v40, s33, 18 400; GCN-NEXT: s_mov_b32 s33, s32 401; GCN-NEXT: s_addk_i32 s32, 0x400 402; GCN-NEXT: v_writelane_b32 v40, s30, 0 403; GCN-NEXT: v_writelane_b32 v40, s31, 1 404; GCN-NEXT: v_writelane_b32 v40, s34, 2 405; GCN-NEXT: v_writelane_b32 v40, s35, 3 406; GCN-NEXT: v_writelane_b32 v40, s36, 4 407; GCN-NEXT: v_writelane_b32 v40, s37, 5 408; GCN-NEXT: v_writelane_b32 v40, s38, 6 409; GCN-NEXT: v_writelane_b32 v40, s39, 7 410; GCN-NEXT: v_writelane_b32 v40, s40, 8 411; GCN-NEXT: v_writelane_b32 v40, s41, 9 412; GCN-NEXT: v_writelane_b32 v40, s42, 10 413; GCN-NEXT: v_writelane_b32 v40, s43, 11 414; GCN-NEXT: v_writelane_b32 v40, s44, 12 415; GCN-NEXT: v_writelane_b32 v40, s45, 13 416; GCN-NEXT: v_writelane_b32 v40, s46, 14 417; GCN-NEXT: v_writelane_b32 v40, s47, 15 418; GCN-NEXT: v_writelane_b32 v40, s48, 16 419; GCN-NEXT: v_writelane_b32 v40, s49, 17 420; GCN-NEXT: s_mov_b32 s42, s15 421; GCN-NEXT: s_mov_b32 s43, s14 422; GCN-NEXT: s_mov_b32 s44, s13 423; GCN-NEXT: s_mov_b32 s45, s12 424; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] 425; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] 426; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] 427; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] 428; GCN-NEXT: s_mov_b64 s[46:47], exec 429; GCN-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 430; GCN-NEXT: v_readfirstlane_b32 s16, v0 431; GCN-NEXT: v_readfirstlane_b32 s17, v1 432; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] 433; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc 434; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] 435; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] 436; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] 437; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] 438; GCN-NEXT: s_mov_b32 s12, s45 439; GCN-NEXT: s_mov_b32 s13, s44 440; GCN-NEXT: s_mov_b32 s14, s43 441; GCN-NEXT: s_mov_b32 s15, s42 442; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] 443; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 444; GCN-NEXT: ; implicit-def: $vgpr31 445; GCN-NEXT: s_xor_b64 exec, exec, s[48:49] 446; GCN-NEXT: s_cbranch_execnz .LBB2_1 447; GCN-NEXT: ; %bb.2: 448; GCN-NEXT: s_mov_b64 exec, s[46:47] 449; GCN-NEXT: v_readlane_b32 s49, v40, 17 450; GCN-NEXT: v_readlane_b32 s48, v40, 16 451; GCN-NEXT: v_readlane_b32 s47, v40, 15 452; GCN-NEXT: v_readlane_b32 s46, v40, 14 453; GCN-NEXT: v_readlane_b32 s45, v40, 13 454; GCN-NEXT: v_readlane_b32 s44, v40, 12 455; GCN-NEXT: v_readlane_b32 s43, v40, 11 456; GCN-NEXT: v_readlane_b32 s42, v40, 10 457; GCN-NEXT: v_readlane_b32 s41, v40, 9 458; GCN-NEXT: v_readlane_b32 s40, v40, 8 459; GCN-NEXT: v_readlane_b32 s39, v40, 7 460; GCN-NEXT: v_readlane_b32 s38, v40, 6 461; GCN-NEXT: v_readlane_b32 s37, v40, 5 462; GCN-NEXT: v_readlane_b32 s36, v40, 4 463; GCN-NEXT: v_readlane_b32 s35, v40, 3 464; GCN-NEXT: v_readlane_b32 s34, v40, 2 465; GCN-NEXT: v_readlane_b32 s31, v40, 1 466; GCN-NEXT: v_readlane_b32 s30, v40, 0 467; GCN-NEXT: s_addk_i32 s32, 0xfc00 468; GCN-NEXT: v_readlane_b32 s33, v40, 18 469; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 470; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 471; GCN-NEXT: s_mov_b64 exec, s[4:5] 472; GCN-NEXT: s_waitcnt vmcnt(0) 473; GCN-NEXT: s_setpc_b64 s[30:31] 474; 475; GISEL-LABEL: test_indirect_call_vgpr_ptr: 476; GISEL: ; %bb.0: 477; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 478; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1 479; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 480; GISEL-NEXT: s_mov_b64 exec, s[16:17] 481; GISEL-NEXT: v_writelane_b32 v40, s33, 18 482; GISEL-NEXT: s_mov_b32 s33, s32 483; GISEL-NEXT: s_addk_i32 s32, 0x400 484; GISEL-NEXT: v_writelane_b32 v40, s30, 0 485; GISEL-NEXT: v_writelane_b32 v40, s31, 1 486; GISEL-NEXT: v_writelane_b32 v40, s34, 2 487; GISEL-NEXT: v_writelane_b32 v40, s35, 3 488; GISEL-NEXT: v_writelane_b32 v40, s36, 4 489; GISEL-NEXT: v_writelane_b32 v40, s37, 5 490; GISEL-NEXT: v_writelane_b32 v40, s38, 6 491; GISEL-NEXT: v_writelane_b32 v40, s39, 7 492; GISEL-NEXT: v_writelane_b32 v40, s40, 8 493; GISEL-NEXT: v_writelane_b32 v40, s41, 9 494; GISEL-NEXT: v_writelane_b32 v40, s42, 10 495; GISEL-NEXT: v_writelane_b32 v40, s43, 11 496; GISEL-NEXT: v_writelane_b32 v40, s44, 12 497; GISEL-NEXT: v_writelane_b32 v40, s45, 13 498; GISEL-NEXT: v_writelane_b32 v40, s46, 14 499; GISEL-NEXT: v_writelane_b32 v40, s47, 15 500; GISEL-NEXT: v_writelane_b32 v40, s48, 16 501; GISEL-NEXT: v_writelane_b32 v40, s49, 17 502; GISEL-NEXT: s_mov_b32 s42, s15 503; GISEL-NEXT: s_mov_b32 s43, s14 504; GISEL-NEXT: s_mov_b32 s44, s13 505; GISEL-NEXT: s_mov_b32 s45, s12 506; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] 507; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] 508; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] 509; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] 510; GISEL-NEXT: s_mov_b64 s[46:47], exec 511; GISEL-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 512; GISEL-NEXT: v_readfirstlane_b32 s16, v0 513; GISEL-NEXT: v_readfirstlane_b32 s17, v1 514; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] 515; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc 516; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41] 517; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] 518; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] 519; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] 520; GISEL-NEXT: s_mov_b32 s12, s45 521; GISEL-NEXT: s_mov_b32 s13, s44 522; GISEL-NEXT: s_mov_b32 s14, s43 523; GISEL-NEXT: s_mov_b32 s15, s42 524; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] 525; GISEL-NEXT: ; implicit-def: $vgpr0 526; GISEL-NEXT: ; implicit-def: $vgpr31 527; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49] 528; GISEL-NEXT: s_cbranch_execnz .LBB2_1 529; GISEL-NEXT: ; %bb.2: 530; GISEL-NEXT: s_mov_b64 exec, s[46:47] 531; GISEL-NEXT: v_readlane_b32 s49, v40, 17 532; GISEL-NEXT: v_readlane_b32 s48, v40, 16 533; GISEL-NEXT: v_readlane_b32 s47, v40, 15 534; GISEL-NEXT: v_readlane_b32 s46, v40, 14 535; GISEL-NEXT: v_readlane_b32 s45, v40, 13 536; GISEL-NEXT: v_readlane_b32 s44, v40, 12 537; GISEL-NEXT: v_readlane_b32 s43, v40, 11 538; GISEL-NEXT: v_readlane_b32 s42, v40, 10 539; GISEL-NEXT: v_readlane_b32 s41, v40, 9 540; GISEL-NEXT: v_readlane_b32 s40, v40, 8 541; GISEL-NEXT: v_readlane_b32 s39, v40, 7 542; GISEL-NEXT: v_readlane_b32 s38, v40, 6 543; GISEL-NEXT: v_readlane_b32 s37, v40, 5 544; GISEL-NEXT: v_readlane_b32 s36, v40, 4 545; GISEL-NEXT: v_readlane_b32 s35, v40, 3 546; GISEL-NEXT: v_readlane_b32 s34, v40, 2 547; GISEL-NEXT: v_readlane_b32 s31, v40, 1 548; GISEL-NEXT: v_readlane_b32 s30, v40, 0 549; GISEL-NEXT: s_addk_i32 s32, 0xfc00 550; GISEL-NEXT: v_readlane_b32 s33, v40, 18 551; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 552; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 553; GISEL-NEXT: s_mov_b64 exec, s[4:5] 554; GISEL-NEXT: s_waitcnt vmcnt(0) 555; GISEL-NEXT: s_setpc_b64 s[30:31] 556 call void %fptr() 557 ret void 558} 559 560define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) { 561; GCN-LABEL: test_indirect_call_vgpr_ptr_arg: 562; GCN: ; %bb.0: 563; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 564; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 565; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 566; GCN-NEXT: s_mov_b64 exec, s[16:17] 567; GCN-NEXT: v_writelane_b32 v40, s33, 18 568; GCN-NEXT: s_mov_b32 s33, s32 569; GCN-NEXT: s_addk_i32 s32, 0x400 570; GCN-NEXT: v_writelane_b32 v40, s30, 0 571; GCN-NEXT: v_writelane_b32 v40, s31, 1 572; GCN-NEXT: v_writelane_b32 v40, s34, 2 573; GCN-NEXT: v_writelane_b32 v40, s35, 3 574; GCN-NEXT: v_writelane_b32 v40, s36, 4 575; GCN-NEXT: v_writelane_b32 v40, s37, 5 576; GCN-NEXT: v_writelane_b32 v40, s38, 6 577; GCN-NEXT: v_writelane_b32 v40, s39, 7 578; GCN-NEXT: v_writelane_b32 v40, s40, 8 579; GCN-NEXT: v_writelane_b32 v40, s41, 9 580; GCN-NEXT: v_writelane_b32 v40, s42, 10 581; GCN-NEXT: v_writelane_b32 v40, s43, 11 582; GCN-NEXT: v_writelane_b32 v40, s44, 12 583; GCN-NEXT: v_writelane_b32 v40, s45, 13 584; GCN-NEXT: v_writelane_b32 v40, s46, 14 585; GCN-NEXT: v_writelane_b32 v40, s47, 15 586; GCN-NEXT: v_writelane_b32 v40, s48, 16 587; GCN-NEXT: v_writelane_b32 v40, s49, 17 588; GCN-NEXT: s_mov_b32 s42, s15 589; GCN-NEXT: s_mov_b32 s43, s14 590; GCN-NEXT: s_mov_b32 s44, s13 591; GCN-NEXT: s_mov_b32 s45, s12 592; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] 593; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] 594; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] 595; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] 596; GCN-NEXT: s_mov_b64 s[46:47], exec 597; GCN-NEXT: v_mov_b32_e32 v2, 0x7b 598; GCN-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 599; GCN-NEXT: v_readfirstlane_b32 s16, v0 600; GCN-NEXT: v_readfirstlane_b32 s17, v1 601; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] 602; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc 603; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] 604; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] 605; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] 606; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] 607; GCN-NEXT: s_mov_b32 s12, s45 608; GCN-NEXT: s_mov_b32 s13, s44 609; GCN-NEXT: s_mov_b32 s14, s43 610; GCN-NEXT: s_mov_b32 s15, s42 611; GCN-NEXT: v_mov_b32_e32 v0, v2 612; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] 613; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 614; GCN-NEXT: ; implicit-def: $vgpr31 615; GCN-NEXT: ; implicit-def: $vgpr2 616; GCN-NEXT: s_xor_b64 exec, exec, s[48:49] 617; GCN-NEXT: s_cbranch_execnz .LBB3_1 618; GCN-NEXT: ; %bb.2: 619; GCN-NEXT: s_mov_b64 exec, s[46:47] 620; GCN-NEXT: v_readlane_b32 s49, v40, 17 621; GCN-NEXT: v_readlane_b32 s48, v40, 16 622; GCN-NEXT: v_readlane_b32 s47, v40, 15 623; GCN-NEXT: v_readlane_b32 s46, v40, 14 624; GCN-NEXT: v_readlane_b32 s45, v40, 13 625; GCN-NEXT: v_readlane_b32 s44, v40, 12 626; GCN-NEXT: v_readlane_b32 s43, v40, 11 627; GCN-NEXT: v_readlane_b32 s42, v40, 10 628; GCN-NEXT: v_readlane_b32 s41, v40, 9 629; GCN-NEXT: v_readlane_b32 s40, v40, 8 630; GCN-NEXT: v_readlane_b32 s39, v40, 7 631; GCN-NEXT: v_readlane_b32 s38, v40, 6 632; GCN-NEXT: v_readlane_b32 s37, v40, 5 633; GCN-NEXT: v_readlane_b32 s36, v40, 4 634; GCN-NEXT: v_readlane_b32 s35, v40, 3 635; GCN-NEXT: v_readlane_b32 s34, v40, 2 636; GCN-NEXT: v_readlane_b32 s31, v40, 1 637; GCN-NEXT: v_readlane_b32 s30, v40, 0 638; GCN-NEXT: s_addk_i32 s32, 0xfc00 639; GCN-NEXT: v_readlane_b32 s33, v40, 18 640; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 641; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 642; GCN-NEXT: s_mov_b64 exec, s[4:5] 643; GCN-NEXT: s_waitcnt vmcnt(0) 644; GCN-NEXT: s_setpc_b64 s[30:31] 645; 646; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg: 647; GISEL: ; %bb.0: 648; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 649; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1 650; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 651; GISEL-NEXT: s_mov_b64 exec, s[16:17] 652; GISEL-NEXT: v_writelane_b32 v40, s33, 18 653; GISEL-NEXT: s_mov_b32 s33, s32 654; GISEL-NEXT: s_addk_i32 s32, 0x400 655; GISEL-NEXT: v_writelane_b32 v40, s30, 0 656; GISEL-NEXT: v_writelane_b32 v40, s31, 1 657; GISEL-NEXT: v_writelane_b32 v40, s34, 2 658; GISEL-NEXT: v_writelane_b32 v40, s35, 3 659; GISEL-NEXT: v_writelane_b32 v40, s36, 4 660; GISEL-NEXT: v_writelane_b32 v40, s37, 5 661; GISEL-NEXT: v_writelane_b32 v40, s38, 6 662; GISEL-NEXT: v_writelane_b32 v40, s39, 7 663; GISEL-NEXT: v_writelane_b32 v40, s40, 8 664; GISEL-NEXT: v_writelane_b32 v40, s41, 9 665; GISEL-NEXT: v_writelane_b32 v40, s42, 10 666; GISEL-NEXT: v_writelane_b32 v40, s43, 11 667; GISEL-NEXT: v_writelane_b32 v40, s44, 12 668; GISEL-NEXT: v_writelane_b32 v40, s45, 13 669; GISEL-NEXT: v_writelane_b32 v40, s46, 14 670; GISEL-NEXT: v_writelane_b32 v40, s47, 15 671; GISEL-NEXT: v_writelane_b32 v40, s48, 16 672; GISEL-NEXT: v_writelane_b32 v40, s49, 17 673; GISEL-NEXT: s_mov_b32 s42, s15 674; GISEL-NEXT: s_mov_b32 s43, s14 675; GISEL-NEXT: s_mov_b32 s44, s13 676; GISEL-NEXT: s_mov_b32 s45, s12 677; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] 678; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] 679; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] 680; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] 681; GISEL-NEXT: s_mov_b64 s[46:47], exec 682; GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 683; GISEL-NEXT: v_readfirstlane_b32 s16, v0 684; GISEL-NEXT: v_readfirstlane_b32 s17, v1 685; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] 686; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc 687; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b 688; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41] 689; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] 690; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] 691; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] 692; GISEL-NEXT: s_mov_b32 s12, s45 693; GISEL-NEXT: s_mov_b32 s13, s44 694; GISEL-NEXT: s_mov_b32 s14, s43 695; GISEL-NEXT: s_mov_b32 s15, s42 696; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] 697; GISEL-NEXT: ; implicit-def: $vgpr0 698; GISEL-NEXT: ; implicit-def: $vgpr31 699; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49] 700; GISEL-NEXT: s_cbranch_execnz .LBB3_1 701; GISEL-NEXT: ; %bb.2: 702; GISEL-NEXT: s_mov_b64 exec, s[46:47] 703; GISEL-NEXT: v_readlane_b32 s49, v40, 17 704; GISEL-NEXT: v_readlane_b32 s48, v40, 16 705; GISEL-NEXT: v_readlane_b32 s47, v40, 15 706; GISEL-NEXT: v_readlane_b32 s46, v40, 14 707; GISEL-NEXT: v_readlane_b32 s45, v40, 13 708; GISEL-NEXT: v_readlane_b32 s44, v40, 12 709; GISEL-NEXT: v_readlane_b32 s43, v40, 11 710; GISEL-NEXT: v_readlane_b32 s42, v40, 10 711; GISEL-NEXT: v_readlane_b32 s41, v40, 9 712; GISEL-NEXT: v_readlane_b32 s40, v40, 8 713; GISEL-NEXT: v_readlane_b32 s39, v40, 7 714; GISEL-NEXT: v_readlane_b32 s38, v40, 6 715; GISEL-NEXT: v_readlane_b32 s37, v40, 5 716; GISEL-NEXT: v_readlane_b32 s36, v40, 4 717; GISEL-NEXT: v_readlane_b32 s35, v40, 3 718; GISEL-NEXT: v_readlane_b32 s34, v40, 2 719; GISEL-NEXT: v_readlane_b32 s31, v40, 1 720; GISEL-NEXT: v_readlane_b32 s30, v40, 0 721; GISEL-NEXT: s_addk_i32 s32, 0xfc00 722; GISEL-NEXT: v_readlane_b32 s33, v40, 18 723; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 724; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 725; GISEL-NEXT: s_mov_b64 exec, s[4:5] 726; GISEL-NEXT: s_waitcnt vmcnt(0) 727; GISEL-NEXT: s_setpc_b64 s[30:31] 728 call void %fptr(i32 123) 729 ret void 730} 731 732define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) { 733; GCN-LABEL: test_indirect_call_vgpr_ptr_ret: 734; GCN: ; %bb.0: 735; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 736; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 737; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 738; GCN-NEXT: s_mov_b64 exec, s[16:17] 739; GCN-NEXT: v_writelane_b32 v40, s33, 18 740; GCN-NEXT: s_mov_b32 s33, s32 741; GCN-NEXT: s_addk_i32 s32, 0x400 742; GCN-NEXT: v_writelane_b32 v40, s30, 0 743; GCN-NEXT: v_writelane_b32 v40, s31, 1 744; GCN-NEXT: v_writelane_b32 v40, s34, 2 745; GCN-NEXT: v_writelane_b32 v40, s35, 3 746; GCN-NEXT: v_writelane_b32 v40, s36, 4 747; GCN-NEXT: v_writelane_b32 v40, s37, 5 748; GCN-NEXT: v_writelane_b32 v40, s38, 6 749; GCN-NEXT: v_writelane_b32 v40, s39, 7 750; GCN-NEXT: v_writelane_b32 v40, s40, 8 751; GCN-NEXT: v_writelane_b32 v40, s41, 9 752; GCN-NEXT: v_writelane_b32 v40, s42, 10 753; GCN-NEXT: v_writelane_b32 v40, s43, 11 754; GCN-NEXT: v_writelane_b32 v40, s44, 12 755; GCN-NEXT: v_writelane_b32 v40, s45, 13 756; GCN-NEXT: v_writelane_b32 v40, s46, 14 757; GCN-NEXT: v_writelane_b32 v40, s47, 15 758; GCN-NEXT: v_writelane_b32 v40, s48, 16 759; GCN-NEXT: v_writelane_b32 v40, s49, 17 760; GCN-NEXT: s_mov_b32 s42, s15 761; GCN-NEXT: s_mov_b32 s43, s14 762; GCN-NEXT: s_mov_b32 s44, s13 763; GCN-NEXT: s_mov_b32 s45, s12 764; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] 765; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] 766; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] 767; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] 768; GCN-NEXT: s_mov_b64 s[46:47], exec 769; GCN-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 770; GCN-NEXT: v_readfirstlane_b32 s16, v0 771; GCN-NEXT: v_readfirstlane_b32 s17, v1 772; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] 773; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc 774; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] 775; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] 776; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] 777; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] 778; GCN-NEXT: s_mov_b32 s12, s45 779; GCN-NEXT: s_mov_b32 s13, s44 780; GCN-NEXT: s_mov_b32 s14, s43 781; GCN-NEXT: s_mov_b32 s15, s42 782; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] 783; GCN-NEXT: v_mov_b32_e32 v2, v0 784; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 785; GCN-NEXT: ; implicit-def: $vgpr31 786; GCN-NEXT: s_xor_b64 exec, exec, s[48:49] 787; GCN-NEXT: s_cbranch_execnz .LBB4_1 788; GCN-NEXT: ; %bb.2: 789; GCN-NEXT: s_mov_b64 exec, s[46:47] 790; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v2 791; GCN-NEXT: v_readlane_b32 s49, v40, 17 792; GCN-NEXT: v_readlane_b32 s48, v40, 16 793; GCN-NEXT: v_readlane_b32 s47, v40, 15 794; GCN-NEXT: v_readlane_b32 s46, v40, 14 795; GCN-NEXT: v_readlane_b32 s45, v40, 13 796; GCN-NEXT: v_readlane_b32 s44, v40, 12 797; GCN-NEXT: v_readlane_b32 s43, v40, 11 798; GCN-NEXT: v_readlane_b32 s42, v40, 10 799; GCN-NEXT: v_readlane_b32 s41, v40, 9 800; GCN-NEXT: v_readlane_b32 s40, v40, 8 801; GCN-NEXT: v_readlane_b32 s39, v40, 7 802; GCN-NEXT: v_readlane_b32 s38, v40, 6 803; GCN-NEXT: v_readlane_b32 s37, v40, 5 804; GCN-NEXT: v_readlane_b32 s36, v40, 4 805; GCN-NEXT: v_readlane_b32 s35, v40, 3 806; GCN-NEXT: v_readlane_b32 s34, v40, 2 807; GCN-NEXT: v_readlane_b32 s31, v40, 1 808; GCN-NEXT: v_readlane_b32 s30, v40, 0 809; GCN-NEXT: s_addk_i32 s32, 0xfc00 810; GCN-NEXT: v_readlane_b32 s33, v40, 18 811; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 812; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 813; GCN-NEXT: s_mov_b64 exec, s[4:5] 814; GCN-NEXT: s_waitcnt vmcnt(0) 815; GCN-NEXT: s_setpc_b64 s[30:31] 816; 817; GISEL-LABEL: test_indirect_call_vgpr_ptr_ret: 818; GISEL: ; %bb.0: 819; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 820; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1 821; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 822; GISEL-NEXT: s_mov_b64 exec, s[16:17] 823; GISEL-NEXT: v_writelane_b32 v40, s33, 18 824; GISEL-NEXT: s_mov_b32 s33, s32 825; GISEL-NEXT: s_addk_i32 s32, 0x400 826; GISEL-NEXT: v_writelane_b32 v40, s30, 0 827; GISEL-NEXT: v_writelane_b32 v40, s31, 1 828; GISEL-NEXT: v_writelane_b32 v40, s34, 2 829; GISEL-NEXT: v_writelane_b32 v40, s35, 3 830; GISEL-NEXT: v_writelane_b32 v40, s36, 4 831; GISEL-NEXT: v_writelane_b32 v40, s37, 5 832; GISEL-NEXT: v_writelane_b32 v40, s38, 6 833; GISEL-NEXT: v_writelane_b32 v40, s39, 7 834; GISEL-NEXT: v_writelane_b32 v40, s40, 8 835; GISEL-NEXT: v_writelane_b32 v40, s41, 9 836; GISEL-NEXT: v_writelane_b32 v40, s42, 10 837; GISEL-NEXT: v_writelane_b32 v40, s43, 11 838; GISEL-NEXT: v_writelane_b32 v40, s44, 12 839; GISEL-NEXT: v_writelane_b32 v40, s45, 13 840; GISEL-NEXT: v_writelane_b32 v40, s46, 14 841; GISEL-NEXT: v_writelane_b32 v40, s47, 15 842; GISEL-NEXT: v_writelane_b32 v40, s48, 16 843; GISEL-NEXT: v_writelane_b32 v40, s49, 17 844; GISEL-NEXT: s_mov_b32 s42, s15 845; GISEL-NEXT: s_mov_b32 s43, s14 846; GISEL-NEXT: s_mov_b32 s44, s13 847; GISEL-NEXT: s_mov_b32 s45, s12 848; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] 849; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] 850; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] 851; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] 852; GISEL-NEXT: s_mov_b64 s[46:47], exec 853; GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 854; GISEL-NEXT: v_readfirstlane_b32 s16, v0 855; GISEL-NEXT: v_readfirstlane_b32 s17, v1 856; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] 857; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc 858; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41] 859; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] 860; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] 861; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] 862; GISEL-NEXT: s_mov_b32 s12, s45 863; GISEL-NEXT: s_mov_b32 s13, s44 864; GISEL-NEXT: s_mov_b32 s14, s43 865; GISEL-NEXT: s_mov_b32 s15, s42 866; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] 867; GISEL-NEXT: v_mov_b32_e32 v1, v0 868; GISEL-NEXT: ; implicit-def: $vgpr0 869; GISEL-NEXT: ; implicit-def: $vgpr31 870; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49] 871; GISEL-NEXT: s_cbranch_execnz .LBB4_1 872; GISEL-NEXT: ; %bb.2: 873; GISEL-NEXT: s_mov_b64 exec, s[46:47] 874; GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v1 875; GISEL-NEXT: v_readlane_b32 s49, v40, 17 876; GISEL-NEXT: v_readlane_b32 s48, v40, 16 877; GISEL-NEXT: v_readlane_b32 s47, v40, 15 878; GISEL-NEXT: v_readlane_b32 s46, v40, 14 879; GISEL-NEXT: v_readlane_b32 s45, v40, 13 880; GISEL-NEXT: v_readlane_b32 s44, v40, 12 881; GISEL-NEXT: v_readlane_b32 s43, v40, 11 882; GISEL-NEXT: v_readlane_b32 s42, v40, 10 883; GISEL-NEXT: v_readlane_b32 s41, v40, 9 884; GISEL-NEXT: v_readlane_b32 s40, v40, 8 885; GISEL-NEXT: v_readlane_b32 s39, v40, 7 886; GISEL-NEXT: v_readlane_b32 s38, v40, 6 887; GISEL-NEXT: v_readlane_b32 s37, v40, 5 888; GISEL-NEXT: v_readlane_b32 s36, v40, 4 889; GISEL-NEXT: v_readlane_b32 s35, v40, 3 890; GISEL-NEXT: v_readlane_b32 s34, v40, 2 891; GISEL-NEXT: v_readlane_b32 s31, v40, 1 892; GISEL-NEXT: v_readlane_b32 s30, v40, 0 893; GISEL-NEXT: s_addk_i32 s32, 0xfc00 894; GISEL-NEXT: v_readlane_b32 s33, v40, 18 895; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 896; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 897; GISEL-NEXT: s_mov_b64 exec, s[4:5] 898; GISEL-NEXT: s_waitcnt vmcnt(0) 899; GISEL-NEXT: s_setpc_b64 s[30:31] 900 %a = call i32 %fptr() 901 %b = add i32 %a, 1 902 ret i32 %b 903} 904 905define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) { 906; GCN-LABEL: test_indirect_call_vgpr_ptr_in_branch: 907; GCN: ; %bb.0: ; %bb0 908; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 909; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 910; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 911; GCN-NEXT: s_mov_b64 exec, s[16:17] 912; GCN-NEXT: v_writelane_b32 v40, s33, 20 913; GCN-NEXT: s_mov_b32 s33, s32 914; GCN-NEXT: s_addk_i32 s32, 0x400 915; GCN-NEXT: v_writelane_b32 v40, s30, 0 916; GCN-NEXT: v_writelane_b32 v40, s31, 1 917; GCN-NEXT: v_writelane_b32 v40, s34, 2 918; GCN-NEXT: v_writelane_b32 v40, s35, 3 919; GCN-NEXT: v_writelane_b32 v40, s36, 4 920; GCN-NEXT: v_writelane_b32 v40, s37, 5 921; GCN-NEXT: v_writelane_b32 v40, s38, 6 922; GCN-NEXT: v_writelane_b32 v40, s39, 7 923; GCN-NEXT: v_writelane_b32 v40, s40, 8 924; GCN-NEXT: v_writelane_b32 v40, s41, 9 925; GCN-NEXT: v_writelane_b32 v40, s42, 10 926; GCN-NEXT: v_writelane_b32 v40, s43, 11 927; GCN-NEXT: v_writelane_b32 v40, s44, 12 928; GCN-NEXT: v_writelane_b32 v40, s45, 13 929; GCN-NEXT: v_writelane_b32 v40, s46, 14 930; GCN-NEXT: v_writelane_b32 v40, s47, 15 931; GCN-NEXT: v_writelane_b32 v40, s48, 16 932; GCN-NEXT: v_writelane_b32 v40, s49, 17 933; GCN-NEXT: v_writelane_b32 v40, s50, 18 934; GCN-NEXT: v_writelane_b32 v40, s51, 19 935; GCN-NEXT: s_mov_b32 s42, s15 936; GCN-NEXT: s_mov_b32 s43, s14 937; GCN-NEXT: s_mov_b32 s44, s13 938; GCN-NEXT: s_mov_b32 s45, s12 939; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] 940; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] 941; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] 942; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] 943; GCN-NEXT: v_and_b32_e32 v2, 1, v2 944; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 945; GCN-NEXT: s_and_saveexec_b64 s[46:47], vcc 946; GCN-NEXT: s_cbranch_execz .LBB5_4 947; GCN-NEXT: ; %bb.1: ; %bb1 948; GCN-NEXT: s_mov_b64 s[48:49], exec 949; GCN-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1 950; GCN-NEXT: v_readfirstlane_b32 s16, v0 951; GCN-NEXT: v_readfirstlane_b32 s17, v1 952; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] 953; GCN-NEXT: s_and_saveexec_b64 s[50:51], vcc 954; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] 955; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] 956; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] 957; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] 958; GCN-NEXT: s_mov_b32 s12, s45 959; GCN-NEXT: s_mov_b32 s13, s44 960; GCN-NEXT: s_mov_b32 s14, s43 961; GCN-NEXT: s_mov_b32 s15, s42 962; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] 963; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 964; GCN-NEXT: ; implicit-def: $vgpr31 965; GCN-NEXT: s_xor_b64 exec, exec, s[50:51] 966; GCN-NEXT: s_cbranch_execnz .LBB5_2 967; GCN-NEXT: ; %bb.3: 968; GCN-NEXT: s_mov_b64 exec, s[48:49] 969; GCN-NEXT: .LBB5_4: ; %bb2 970; GCN-NEXT: s_or_b64 exec, exec, s[46:47] 971; GCN-NEXT: v_readlane_b32 s51, v40, 19 972; GCN-NEXT: v_readlane_b32 s50, v40, 18 973; GCN-NEXT: v_readlane_b32 s49, v40, 17 974; GCN-NEXT: v_readlane_b32 s48, v40, 16 975; GCN-NEXT: v_readlane_b32 s47, v40, 15 976; GCN-NEXT: v_readlane_b32 s46, v40, 14 977; GCN-NEXT: v_readlane_b32 s45, v40, 13 978; GCN-NEXT: v_readlane_b32 s44, v40, 12 979; GCN-NEXT: v_readlane_b32 s43, v40, 11 980; GCN-NEXT: v_readlane_b32 s42, v40, 10 981; GCN-NEXT: v_readlane_b32 s41, v40, 9 982; GCN-NEXT: v_readlane_b32 s40, v40, 8 983; GCN-NEXT: v_readlane_b32 s39, v40, 7 984; GCN-NEXT: v_readlane_b32 s38, v40, 6 985; GCN-NEXT: v_readlane_b32 s37, v40, 5 986; GCN-NEXT: v_readlane_b32 s36, v40, 4 987; GCN-NEXT: v_readlane_b32 s35, v40, 3 988; GCN-NEXT: v_readlane_b32 s34, v40, 2 989; GCN-NEXT: v_readlane_b32 s31, v40, 1 990; GCN-NEXT: v_readlane_b32 s30, v40, 0 991; GCN-NEXT: s_addk_i32 s32, 0xfc00 992; GCN-NEXT: v_readlane_b32 s33, v40, 20 993; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 994; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 995; GCN-NEXT: s_mov_b64 exec, s[4:5] 996; GCN-NEXT: s_waitcnt vmcnt(0) 997; GCN-NEXT: s_setpc_b64 s[30:31] 998; 999; GISEL-LABEL: test_indirect_call_vgpr_ptr_in_branch: 1000; GISEL: ; %bb.0: ; %bb0 1001; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1002; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1 1003; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1004; GISEL-NEXT: s_mov_b64 exec, s[16:17] 1005; GISEL-NEXT: v_writelane_b32 v40, s33, 20 1006; GISEL-NEXT: s_mov_b32 s33, s32 1007; GISEL-NEXT: s_addk_i32 s32, 0x400 1008; GISEL-NEXT: v_writelane_b32 v40, s30, 0 1009; GISEL-NEXT: v_writelane_b32 v40, s31, 1 1010; GISEL-NEXT: v_writelane_b32 v40, s34, 2 1011; GISEL-NEXT: v_writelane_b32 v40, s35, 3 1012; GISEL-NEXT: v_writelane_b32 v40, s36, 4 1013; GISEL-NEXT: v_writelane_b32 v40, s37, 5 1014; GISEL-NEXT: v_writelane_b32 v40, s38, 6 1015; GISEL-NEXT: v_writelane_b32 v40, s39, 7 1016; GISEL-NEXT: v_writelane_b32 v40, s40, 8 1017; GISEL-NEXT: v_writelane_b32 v40, s41, 9 1018; GISEL-NEXT: v_writelane_b32 v40, s42, 10 1019; GISEL-NEXT: v_writelane_b32 v40, s43, 11 1020; GISEL-NEXT: v_writelane_b32 v40, s44, 12 1021; GISEL-NEXT: v_writelane_b32 v40, s45, 13 1022; GISEL-NEXT: v_writelane_b32 v40, s46, 14 1023; GISEL-NEXT: v_writelane_b32 v40, s47, 15 1024; GISEL-NEXT: v_writelane_b32 v40, s48, 16 1025; GISEL-NEXT: v_writelane_b32 v40, s49, 17 1026; GISEL-NEXT: v_writelane_b32 v40, s50, 18 1027; GISEL-NEXT: v_writelane_b32 v40, s51, 19 1028; GISEL-NEXT: s_mov_b32 s42, s15 1029; GISEL-NEXT: s_mov_b32 s43, s14 1030; GISEL-NEXT: s_mov_b32 s44, s13 1031; GISEL-NEXT: s_mov_b32 s45, s12 1032; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] 1033; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] 1034; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] 1035; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] 1036; GISEL-NEXT: v_and_b32_e32 v2, 1, v2 1037; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 1038; GISEL-NEXT: s_and_saveexec_b64 s[46:47], vcc 1039; GISEL-NEXT: s_cbranch_execz .LBB5_4 1040; GISEL-NEXT: ; %bb.1: ; %bb1 1041; GISEL-NEXT: s_mov_b64 s[48:49], exec 1042; GISEL-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1 1043; GISEL-NEXT: v_readfirstlane_b32 s16, v0 1044; GISEL-NEXT: v_readfirstlane_b32 s17, v1 1045; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] 1046; GISEL-NEXT: s_and_saveexec_b64 s[50:51], vcc 1047; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41] 1048; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] 1049; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] 1050; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] 1051; GISEL-NEXT: s_mov_b32 s12, s45 1052; GISEL-NEXT: s_mov_b32 s13, s44 1053; GISEL-NEXT: s_mov_b32 s14, s43 1054; GISEL-NEXT: s_mov_b32 s15, s42 1055; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] 1056; GISEL-NEXT: ; implicit-def: $vgpr0 1057; GISEL-NEXT: ; implicit-def: $vgpr31 1058; GISEL-NEXT: s_xor_b64 exec, exec, s[50:51] 1059; GISEL-NEXT: s_cbranch_execnz .LBB5_2 1060; GISEL-NEXT: ; %bb.3: 1061; GISEL-NEXT: s_mov_b64 exec, s[48:49] 1062; GISEL-NEXT: .LBB5_4: ; %bb2 1063; GISEL-NEXT: s_or_b64 exec, exec, s[46:47] 1064; GISEL-NEXT: v_readlane_b32 s51, v40, 19 1065; GISEL-NEXT: v_readlane_b32 s50, v40, 18 1066; GISEL-NEXT: v_readlane_b32 s49, v40, 17 1067; GISEL-NEXT: v_readlane_b32 s48, v40, 16 1068; GISEL-NEXT: v_readlane_b32 s47, v40, 15 1069; GISEL-NEXT: v_readlane_b32 s46, v40, 14 1070; GISEL-NEXT: v_readlane_b32 s45, v40, 13 1071; GISEL-NEXT: v_readlane_b32 s44, v40, 12 1072; GISEL-NEXT: v_readlane_b32 s43, v40, 11 1073; GISEL-NEXT: v_readlane_b32 s42, v40, 10 1074; GISEL-NEXT: v_readlane_b32 s41, v40, 9 1075; GISEL-NEXT: v_readlane_b32 s40, v40, 8 1076; GISEL-NEXT: v_readlane_b32 s39, v40, 7 1077; GISEL-NEXT: v_readlane_b32 s38, v40, 6 1078; GISEL-NEXT: v_readlane_b32 s37, v40, 5 1079; GISEL-NEXT: v_readlane_b32 s36, v40, 4 1080; GISEL-NEXT: v_readlane_b32 s35, v40, 3 1081; GISEL-NEXT: v_readlane_b32 s34, v40, 2 1082; GISEL-NEXT: v_readlane_b32 s31, v40, 1 1083; GISEL-NEXT: v_readlane_b32 s30, v40, 0 1084; GISEL-NEXT: s_addk_i32 s32, 0xfc00 1085; GISEL-NEXT: v_readlane_b32 s33, v40, 20 1086; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 1087; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1088; GISEL-NEXT: s_mov_b64 exec, s[4:5] 1089; GISEL-NEXT: s_waitcnt vmcnt(0) 1090; GISEL-NEXT: s_setpc_b64 s[30:31] 1091bb0: 1092 br i1 %cond, label %bb1, label %bb2 1093 1094bb1: 1095 call void %fptr() 1096 br label %bb2 1097 1098bb2: 1099 ret void 1100} 1101 1102define void @test_indirect_call_vgpr_ptr_inreg_arg(void(i32)* %fptr) { 1103; GCN-LABEL: test_indirect_call_vgpr_ptr_inreg_arg: 1104; GCN: ; %bb.0: 1105; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1106; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 1107; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1108; GCN-NEXT: s_mov_b64 exec, s[4:5] 1109; GCN-NEXT: v_writelane_b32 v40, s33, 32 1110; GCN-NEXT: s_mov_b32 s33, s32 1111; GCN-NEXT: s_addk_i32 s32, 0x400 1112; GCN-NEXT: v_writelane_b32 v40, s30, 0 1113; GCN-NEXT: v_writelane_b32 v40, s31, 1 1114; GCN-NEXT: v_writelane_b32 v40, s34, 2 1115; GCN-NEXT: v_writelane_b32 v40, s35, 3 1116; GCN-NEXT: v_writelane_b32 v40, s36, 4 1117; GCN-NEXT: v_writelane_b32 v40, s37, 5 1118; GCN-NEXT: v_writelane_b32 v40, s38, 6 1119; GCN-NEXT: v_writelane_b32 v40, s39, 7 1120; GCN-NEXT: v_writelane_b32 v40, s40, 8 1121; GCN-NEXT: v_writelane_b32 v40, s41, 9 1122; GCN-NEXT: v_writelane_b32 v40, s42, 10 1123; GCN-NEXT: v_writelane_b32 v40, s43, 11 1124; GCN-NEXT: v_writelane_b32 v40, s44, 12 1125; GCN-NEXT: v_writelane_b32 v40, s45, 13 1126; GCN-NEXT: v_writelane_b32 v40, s46, 14 1127; GCN-NEXT: v_writelane_b32 v40, s47, 15 1128; GCN-NEXT: v_writelane_b32 v40, s48, 16 1129; GCN-NEXT: v_writelane_b32 v40, s49, 17 1130; GCN-NEXT: v_writelane_b32 v40, s50, 18 1131; GCN-NEXT: v_writelane_b32 v40, s51, 19 1132; GCN-NEXT: v_writelane_b32 v40, s52, 20 1133; GCN-NEXT: v_writelane_b32 v40, s53, 21 1134; GCN-NEXT: v_writelane_b32 v40, s54, 22 1135; GCN-NEXT: v_writelane_b32 v40, s55, 23 1136; GCN-NEXT: v_writelane_b32 v40, s56, 24 1137; GCN-NEXT: v_writelane_b32 v40, s57, 25 1138; GCN-NEXT: v_writelane_b32 v40, s58, 26 1139; GCN-NEXT: v_writelane_b32 v40, s59, 27 1140; GCN-NEXT: v_writelane_b32 v40, s60, 28 1141; GCN-NEXT: v_writelane_b32 v40, s61, 29 1142; GCN-NEXT: v_writelane_b32 v40, s62, 30 1143; GCN-NEXT: v_writelane_b32 v40, s63, 31 1144; GCN-NEXT: s_mov_b64 s[6:7], exec 1145; GCN-NEXT: s_movk_i32 s4, 0x7b 1146; GCN-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 1147; GCN-NEXT: v_readfirstlane_b32 s8, v0 1148; GCN-NEXT: v_readfirstlane_b32 s9, v1 1149; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] 1150; GCN-NEXT: s_and_saveexec_b64 s[10:11], vcc 1151; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] 1152; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 1153; GCN-NEXT: s_xor_b64 exec, exec, s[10:11] 1154; GCN-NEXT: s_cbranch_execnz .LBB6_1 1155; GCN-NEXT: ; %bb.2: 1156; GCN-NEXT: s_mov_b64 exec, s[6:7] 1157; GCN-NEXT: v_readlane_b32 s63, v40, 31 1158; GCN-NEXT: v_readlane_b32 s62, v40, 30 1159; GCN-NEXT: v_readlane_b32 s61, v40, 29 1160; GCN-NEXT: v_readlane_b32 s60, v40, 28 1161; GCN-NEXT: v_readlane_b32 s59, v40, 27 1162; GCN-NEXT: v_readlane_b32 s58, v40, 26 1163; GCN-NEXT: v_readlane_b32 s57, v40, 25 1164; GCN-NEXT: v_readlane_b32 s56, v40, 24 1165; GCN-NEXT: v_readlane_b32 s55, v40, 23 1166; GCN-NEXT: v_readlane_b32 s54, v40, 22 1167; GCN-NEXT: v_readlane_b32 s53, v40, 21 1168; GCN-NEXT: v_readlane_b32 s52, v40, 20 1169; GCN-NEXT: v_readlane_b32 s51, v40, 19 1170; GCN-NEXT: v_readlane_b32 s50, v40, 18 1171; GCN-NEXT: v_readlane_b32 s49, v40, 17 1172; GCN-NEXT: v_readlane_b32 s48, v40, 16 1173; GCN-NEXT: v_readlane_b32 s47, v40, 15 1174; GCN-NEXT: v_readlane_b32 s46, v40, 14 1175; GCN-NEXT: v_readlane_b32 s45, v40, 13 1176; GCN-NEXT: v_readlane_b32 s44, v40, 12 1177; GCN-NEXT: v_readlane_b32 s43, v40, 11 1178; GCN-NEXT: v_readlane_b32 s42, v40, 10 1179; GCN-NEXT: v_readlane_b32 s41, v40, 9 1180; GCN-NEXT: v_readlane_b32 s40, v40, 8 1181; GCN-NEXT: v_readlane_b32 s39, v40, 7 1182; GCN-NEXT: v_readlane_b32 s38, v40, 6 1183; GCN-NEXT: v_readlane_b32 s37, v40, 5 1184; GCN-NEXT: v_readlane_b32 s36, v40, 4 1185; GCN-NEXT: v_readlane_b32 s35, v40, 3 1186; GCN-NEXT: v_readlane_b32 s34, v40, 2 1187; GCN-NEXT: v_readlane_b32 s31, v40, 1 1188; GCN-NEXT: v_readlane_b32 s30, v40, 0 1189; GCN-NEXT: s_addk_i32 s32, 0xfc00 1190; GCN-NEXT: v_readlane_b32 s33, v40, 32 1191; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 1192; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1193; GCN-NEXT: s_mov_b64 exec, s[4:5] 1194; GCN-NEXT: s_waitcnt vmcnt(0) 1195; GCN-NEXT: s_setpc_b64 s[30:31] 1196; 1197; GISEL-LABEL: test_indirect_call_vgpr_ptr_inreg_arg: 1198; GISEL: ; %bb.0: 1199; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1200; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 1201; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1202; GISEL-NEXT: s_mov_b64 exec, s[4:5] 1203; GISEL-NEXT: v_writelane_b32 v40, s33, 32 1204; GISEL-NEXT: s_mov_b32 s33, s32 1205; GISEL-NEXT: s_addk_i32 s32, 0x400 1206; GISEL-NEXT: v_writelane_b32 v40, s30, 0 1207; GISEL-NEXT: v_writelane_b32 v40, s31, 1 1208; GISEL-NEXT: v_writelane_b32 v40, s34, 2 1209; GISEL-NEXT: v_writelane_b32 v40, s35, 3 1210; GISEL-NEXT: v_writelane_b32 v40, s36, 4 1211; GISEL-NEXT: v_writelane_b32 v40, s37, 5 1212; GISEL-NEXT: v_writelane_b32 v40, s38, 6 1213; GISEL-NEXT: v_writelane_b32 v40, s39, 7 1214; GISEL-NEXT: v_writelane_b32 v40, s40, 8 1215; GISEL-NEXT: v_writelane_b32 v40, s41, 9 1216; GISEL-NEXT: v_writelane_b32 v40, s42, 10 1217; GISEL-NEXT: v_writelane_b32 v40, s43, 11 1218; GISEL-NEXT: v_writelane_b32 v40, s44, 12 1219; GISEL-NEXT: v_writelane_b32 v40, s45, 13 1220; GISEL-NEXT: v_writelane_b32 v40, s46, 14 1221; GISEL-NEXT: v_writelane_b32 v40, s47, 15 1222; GISEL-NEXT: v_writelane_b32 v40, s48, 16 1223; GISEL-NEXT: v_writelane_b32 v40, s49, 17 1224; GISEL-NEXT: v_writelane_b32 v40, s50, 18 1225; GISEL-NEXT: v_writelane_b32 v40, s51, 19 1226; GISEL-NEXT: v_writelane_b32 v40, s52, 20 1227; GISEL-NEXT: v_writelane_b32 v40, s53, 21 1228; GISEL-NEXT: v_writelane_b32 v40, s54, 22 1229; GISEL-NEXT: v_writelane_b32 v40, s55, 23 1230; GISEL-NEXT: v_writelane_b32 v40, s56, 24 1231; GISEL-NEXT: v_writelane_b32 v40, s57, 25 1232; GISEL-NEXT: v_writelane_b32 v40, s58, 26 1233; GISEL-NEXT: v_writelane_b32 v40, s59, 27 1234; GISEL-NEXT: v_writelane_b32 v40, s60, 28 1235; GISEL-NEXT: v_writelane_b32 v40, s61, 29 1236; GISEL-NEXT: v_writelane_b32 v40, s62, 30 1237; GISEL-NEXT: v_writelane_b32 v40, s63, 31 1238; GISEL-NEXT: s_mov_b64 s[6:7], exec 1239; GISEL-NEXT: s_movk_i32 s4, 0x7b 1240; GISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 1241; GISEL-NEXT: v_readfirstlane_b32 s8, v0 1242; GISEL-NEXT: v_readfirstlane_b32 s9, v1 1243; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] 1244; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc 1245; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] 1246; GISEL-NEXT: ; implicit-def: $vgpr0 1247; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11] 1248; GISEL-NEXT: s_cbranch_execnz .LBB6_1 1249; GISEL-NEXT: ; %bb.2: 1250; GISEL-NEXT: s_mov_b64 exec, s[6:7] 1251; GISEL-NEXT: v_readlane_b32 s63, v40, 31 1252; GISEL-NEXT: v_readlane_b32 s62, v40, 30 1253; GISEL-NEXT: v_readlane_b32 s61, v40, 29 1254; GISEL-NEXT: v_readlane_b32 s60, v40, 28 1255; GISEL-NEXT: v_readlane_b32 s59, v40, 27 1256; GISEL-NEXT: v_readlane_b32 s58, v40, 26 1257; GISEL-NEXT: v_readlane_b32 s57, v40, 25 1258; GISEL-NEXT: v_readlane_b32 s56, v40, 24 1259; GISEL-NEXT: v_readlane_b32 s55, v40, 23 1260; GISEL-NEXT: v_readlane_b32 s54, v40, 22 1261; GISEL-NEXT: v_readlane_b32 s53, v40, 21 1262; GISEL-NEXT: v_readlane_b32 s52, v40, 20 1263; GISEL-NEXT: v_readlane_b32 s51, v40, 19 1264; GISEL-NEXT: v_readlane_b32 s50, v40, 18 1265; GISEL-NEXT: v_readlane_b32 s49, v40, 17 1266; GISEL-NEXT: v_readlane_b32 s48, v40, 16 1267; GISEL-NEXT: v_readlane_b32 s47, v40, 15 1268; GISEL-NEXT: v_readlane_b32 s46, v40, 14 1269; GISEL-NEXT: v_readlane_b32 s45, v40, 13 1270; GISEL-NEXT: v_readlane_b32 s44, v40, 12 1271; GISEL-NEXT: v_readlane_b32 s43, v40, 11 1272; GISEL-NEXT: v_readlane_b32 s42, v40, 10 1273; GISEL-NEXT: v_readlane_b32 s41, v40, 9 1274; GISEL-NEXT: v_readlane_b32 s40, v40, 8 1275; GISEL-NEXT: v_readlane_b32 s39, v40, 7 1276; GISEL-NEXT: v_readlane_b32 s38, v40, 6 1277; GISEL-NEXT: v_readlane_b32 s37, v40, 5 1278; GISEL-NEXT: v_readlane_b32 s36, v40, 4 1279; GISEL-NEXT: v_readlane_b32 s35, v40, 3 1280; GISEL-NEXT: v_readlane_b32 s34, v40, 2 1281; GISEL-NEXT: v_readlane_b32 s31, v40, 1 1282; GISEL-NEXT: v_readlane_b32 s30, v40, 0 1283; GISEL-NEXT: s_addk_i32 s32, 0xfc00 1284; GISEL-NEXT: v_readlane_b32 s33, v40, 32 1285; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 1286; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1287; GISEL-NEXT: s_mov_b64 exec, s[4:5] 1288; GISEL-NEXT: s_waitcnt vmcnt(0) 1289; GISEL-NEXT: s_setpc_b64 s[30:31] 1290 call amdgpu_gfx void %fptr(i32 inreg 123) 1291 ret void 1292} 1293 1294define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, void(i32)* %fptr) { 1295; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse: 1296; GCN: ; %bb.0: 1297; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1298; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 1299; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill 1300; GCN-NEXT: s_mov_b64 exec, s[4:5] 1301; GCN-NEXT: v_writelane_b32 v40, s33, 32 1302; GCN-NEXT: s_mov_b32 s33, s32 1303; GCN-NEXT: s_addk_i32 s32, 0x400 1304; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill 1305; GCN-NEXT: v_writelane_b32 v40, s30, 0 1306; GCN-NEXT: v_writelane_b32 v40, s31, 1 1307; GCN-NEXT: v_writelane_b32 v40, s34, 2 1308; GCN-NEXT: v_writelane_b32 v40, s35, 3 1309; GCN-NEXT: v_writelane_b32 v40, s36, 4 1310; GCN-NEXT: v_writelane_b32 v40, s37, 5 1311; GCN-NEXT: v_writelane_b32 v40, s38, 6 1312; GCN-NEXT: v_writelane_b32 v40, s39, 7 1313; GCN-NEXT: v_writelane_b32 v40, s40, 8 1314; GCN-NEXT: v_writelane_b32 v40, s41, 9 1315; GCN-NEXT: v_writelane_b32 v40, s42, 10 1316; GCN-NEXT: v_writelane_b32 v40, s43, 11 1317; GCN-NEXT: v_writelane_b32 v40, s44, 12 1318; GCN-NEXT: v_writelane_b32 v40, s45, 13 1319; GCN-NEXT: v_writelane_b32 v40, s46, 14 1320; GCN-NEXT: v_writelane_b32 v40, s47, 15 1321; GCN-NEXT: v_writelane_b32 v40, s48, 16 1322; GCN-NEXT: v_writelane_b32 v40, s49, 17 1323; GCN-NEXT: v_writelane_b32 v40, s50, 18 1324; GCN-NEXT: v_writelane_b32 v40, s51, 19 1325; GCN-NEXT: v_writelane_b32 v40, s52, 20 1326; GCN-NEXT: v_writelane_b32 v40, s53, 21 1327; GCN-NEXT: v_writelane_b32 v40, s54, 22 1328; GCN-NEXT: v_writelane_b32 v40, s55, 23 1329; GCN-NEXT: v_writelane_b32 v40, s56, 24 1330; GCN-NEXT: v_writelane_b32 v40, s57, 25 1331; GCN-NEXT: v_writelane_b32 v40, s58, 26 1332; GCN-NEXT: v_writelane_b32 v40, s59, 27 1333; GCN-NEXT: v_writelane_b32 v40, s60, 28 1334; GCN-NEXT: v_writelane_b32 v40, s61, 29 1335; GCN-NEXT: v_writelane_b32 v40, s62, 30 1336; GCN-NEXT: v_writelane_b32 v40, s63, 31 1337; GCN-NEXT: v_mov_b32_e32 v41, v0 1338; GCN-NEXT: s_mov_b64 s[4:5], exec 1339; GCN-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 1340; GCN-NEXT: v_readfirstlane_b32 s6, v1 1341; GCN-NEXT: v_readfirstlane_b32 s7, v2 1342; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2] 1343; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc 1344; GCN-NEXT: v_mov_b32_e32 v0, v41 1345; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7] 1346; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2 1347; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] 1348; GCN-NEXT: s_cbranch_execnz .LBB7_1 1349; GCN-NEXT: ; %bb.2: 1350; GCN-NEXT: s_mov_b64 exec, s[4:5] 1351; GCN-NEXT: v_mov_b32_e32 v0, v41 1352; GCN-NEXT: v_readlane_b32 s63, v40, 31 1353; GCN-NEXT: v_readlane_b32 s62, v40, 30 1354; GCN-NEXT: v_readlane_b32 s61, v40, 29 1355; GCN-NEXT: v_readlane_b32 s60, v40, 28 1356; GCN-NEXT: v_readlane_b32 s59, v40, 27 1357; GCN-NEXT: v_readlane_b32 s58, v40, 26 1358; GCN-NEXT: v_readlane_b32 s57, v40, 25 1359; GCN-NEXT: v_readlane_b32 s56, v40, 24 1360; GCN-NEXT: v_readlane_b32 s55, v40, 23 1361; GCN-NEXT: v_readlane_b32 s54, v40, 22 1362; GCN-NEXT: v_readlane_b32 s53, v40, 21 1363; GCN-NEXT: v_readlane_b32 s52, v40, 20 1364; GCN-NEXT: v_readlane_b32 s51, v40, 19 1365; GCN-NEXT: v_readlane_b32 s50, v40, 18 1366; GCN-NEXT: v_readlane_b32 s49, v40, 17 1367; GCN-NEXT: v_readlane_b32 s48, v40, 16 1368; GCN-NEXT: v_readlane_b32 s47, v40, 15 1369; GCN-NEXT: v_readlane_b32 s46, v40, 14 1370; GCN-NEXT: v_readlane_b32 s45, v40, 13 1371; GCN-NEXT: v_readlane_b32 s44, v40, 12 1372; GCN-NEXT: v_readlane_b32 s43, v40, 11 1373; GCN-NEXT: v_readlane_b32 s42, v40, 10 1374; GCN-NEXT: v_readlane_b32 s41, v40, 9 1375; GCN-NEXT: v_readlane_b32 s40, v40, 8 1376; GCN-NEXT: v_readlane_b32 s39, v40, 7 1377; GCN-NEXT: v_readlane_b32 s38, v40, 6 1378; GCN-NEXT: v_readlane_b32 s37, v40, 5 1379; GCN-NEXT: v_readlane_b32 s36, v40, 4 1380; GCN-NEXT: v_readlane_b32 s35, v40, 3 1381; GCN-NEXT: v_readlane_b32 s34, v40, 2 1382; GCN-NEXT: v_readlane_b32 s31, v40, 1 1383; GCN-NEXT: v_readlane_b32 s30, v40, 0 1384; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload 1385; GCN-NEXT: s_addk_i32 s32, 0xfc00 1386; GCN-NEXT: v_readlane_b32 s33, v40, 32 1387; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 1388; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload 1389; GCN-NEXT: s_mov_b64 exec, s[4:5] 1390; GCN-NEXT: s_waitcnt vmcnt(0) 1391; GCN-NEXT: s_setpc_b64 s[30:31] 1392; 1393; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse: 1394; GISEL: ; %bb.0: 1395; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1396; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 1397; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill 1398; GISEL-NEXT: s_mov_b64 exec, s[4:5] 1399; GISEL-NEXT: v_writelane_b32 v40, s33, 32 1400; GISEL-NEXT: s_mov_b32 s33, s32 1401; GISEL-NEXT: s_addk_i32 s32, 0x400 1402; GISEL-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill 1403; GISEL-NEXT: v_writelane_b32 v40, s30, 0 1404; GISEL-NEXT: v_writelane_b32 v40, s31, 1 1405; GISEL-NEXT: v_writelane_b32 v40, s34, 2 1406; GISEL-NEXT: v_writelane_b32 v40, s35, 3 1407; GISEL-NEXT: v_writelane_b32 v40, s36, 4 1408; GISEL-NEXT: v_writelane_b32 v40, s37, 5 1409; GISEL-NEXT: v_writelane_b32 v40, s38, 6 1410; GISEL-NEXT: v_writelane_b32 v40, s39, 7 1411; GISEL-NEXT: v_writelane_b32 v40, s40, 8 1412; GISEL-NEXT: v_writelane_b32 v40, s41, 9 1413; GISEL-NEXT: v_writelane_b32 v40, s42, 10 1414; GISEL-NEXT: v_writelane_b32 v40, s43, 11 1415; GISEL-NEXT: v_writelane_b32 v40, s44, 12 1416; GISEL-NEXT: v_writelane_b32 v40, s45, 13 1417; GISEL-NEXT: v_writelane_b32 v40, s46, 14 1418; GISEL-NEXT: v_writelane_b32 v40, s47, 15 1419; GISEL-NEXT: v_writelane_b32 v40, s48, 16 1420; GISEL-NEXT: v_writelane_b32 v40, s49, 17 1421; GISEL-NEXT: v_writelane_b32 v40, s50, 18 1422; GISEL-NEXT: v_writelane_b32 v40, s51, 19 1423; GISEL-NEXT: v_writelane_b32 v40, s52, 20 1424; GISEL-NEXT: v_writelane_b32 v40, s53, 21 1425; GISEL-NEXT: v_writelane_b32 v40, s54, 22 1426; GISEL-NEXT: v_writelane_b32 v40, s55, 23 1427; GISEL-NEXT: v_writelane_b32 v40, s56, 24 1428; GISEL-NEXT: v_writelane_b32 v40, s57, 25 1429; GISEL-NEXT: v_writelane_b32 v40, s58, 26 1430; GISEL-NEXT: v_writelane_b32 v40, s59, 27 1431; GISEL-NEXT: v_writelane_b32 v40, s60, 28 1432; GISEL-NEXT: v_writelane_b32 v40, s61, 29 1433; GISEL-NEXT: v_writelane_b32 v40, s62, 30 1434; GISEL-NEXT: v_writelane_b32 v40, s63, 31 1435; GISEL-NEXT: v_mov_b32_e32 v41, v0 1436; GISEL-NEXT: s_mov_b64 s[4:5], exec 1437; GISEL-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 1438; GISEL-NEXT: v_readfirstlane_b32 s6, v1 1439; GISEL-NEXT: v_readfirstlane_b32 s7, v2 1440; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2] 1441; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc 1442; GISEL-NEXT: v_mov_b32_e32 v0, v41 1443; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] 1444; GISEL-NEXT: ; implicit-def: $vgpr1 1445; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9] 1446; GISEL-NEXT: s_cbranch_execnz .LBB7_1 1447; GISEL-NEXT: ; %bb.2: 1448; GISEL-NEXT: s_mov_b64 exec, s[4:5] 1449; GISEL-NEXT: v_mov_b32_e32 v0, v41 1450; GISEL-NEXT: v_readlane_b32 s63, v40, 31 1451; GISEL-NEXT: v_readlane_b32 s62, v40, 30 1452; GISEL-NEXT: v_readlane_b32 s61, v40, 29 1453; GISEL-NEXT: v_readlane_b32 s60, v40, 28 1454; GISEL-NEXT: v_readlane_b32 s59, v40, 27 1455; GISEL-NEXT: v_readlane_b32 s58, v40, 26 1456; GISEL-NEXT: v_readlane_b32 s57, v40, 25 1457; GISEL-NEXT: v_readlane_b32 s56, v40, 24 1458; GISEL-NEXT: v_readlane_b32 s55, v40, 23 1459; GISEL-NEXT: v_readlane_b32 s54, v40, 22 1460; GISEL-NEXT: v_readlane_b32 s53, v40, 21 1461; GISEL-NEXT: v_readlane_b32 s52, v40, 20 1462; GISEL-NEXT: v_readlane_b32 s51, v40, 19 1463; GISEL-NEXT: v_readlane_b32 s50, v40, 18 1464; GISEL-NEXT: v_readlane_b32 s49, v40, 17 1465; GISEL-NEXT: v_readlane_b32 s48, v40, 16 1466; GISEL-NEXT: v_readlane_b32 s47, v40, 15 1467; GISEL-NEXT: v_readlane_b32 s46, v40, 14 1468; GISEL-NEXT: v_readlane_b32 s45, v40, 13 1469; GISEL-NEXT: v_readlane_b32 s44, v40, 12 1470; GISEL-NEXT: v_readlane_b32 s43, v40, 11 1471; GISEL-NEXT: v_readlane_b32 s42, v40, 10 1472; GISEL-NEXT: v_readlane_b32 s41, v40, 9 1473; GISEL-NEXT: v_readlane_b32 s40, v40, 8 1474; GISEL-NEXT: v_readlane_b32 s39, v40, 7 1475; GISEL-NEXT: v_readlane_b32 s38, v40, 6 1476; GISEL-NEXT: v_readlane_b32 s37, v40, 5 1477; GISEL-NEXT: v_readlane_b32 s36, v40, 4 1478; GISEL-NEXT: v_readlane_b32 s35, v40, 3 1479; GISEL-NEXT: v_readlane_b32 s34, v40, 2 1480; GISEL-NEXT: v_readlane_b32 s31, v40, 1 1481; GISEL-NEXT: v_readlane_b32 s30, v40, 0 1482; GISEL-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload 1483; GISEL-NEXT: s_addk_i32 s32, 0xfc00 1484; GISEL-NEXT: v_readlane_b32 s33, v40, 32 1485; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 1486; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload 1487; GISEL-NEXT: s_mov_b64 exec, s[4:5] 1488; GISEL-NEXT: s_waitcnt vmcnt(0) 1489; GISEL-NEXT: s_setpc_b64 s[30:31] 1490 call amdgpu_gfx void %fptr(i32 %i) 1491 ret i32 %i 1492} 1493 1494; Use a variable inside a waterfall loop and use the return variable after the loop. 1495; TODO The argument and return variable could be in the same physical register, but the register 1496; allocator is not able to do that because the return value clashes with the liverange of an 1497; IMPLICIT_DEF of the argument. 1498define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, i32(i32)* %fptr) { 1499; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_return: 1500; GCN: ; %bb.0: 1501; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1502; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 1503; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1504; GCN-NEXT: s_mov_b64 exec, s[4:5] 1505; GCN-NEXT: v_writelane_b32 v40, s33, 32 1506; GCN-NEXT: s_mov_b32 s33, s32 1507; GCN-NEXT: s_addk_i32 s32, 0x400 1508; GCN-NEXT: v_writelane_b32 v40, s30, 0 1509; GCN-NEXT: v_writelane_b32 v40, s31, 1 1510; GCN-NEXT: v_writelane_b32 v40, s34, 2 1511; GCN-NEXT: v_writelane_b32 v40, s35, 3 1512; GCN-NEXT: v_writelane_b32 v40, s36, 4 1513; GCN-NEXT: v_writelane_b32 v40, s37, 5 1514; GCN-NEXT: v_writelane_b32 v40, s38, 6 1515; GCN-NEXT: v_writelane_b32 v40, s39, 7 1516; GCN-NEXT: v_writelane_b32 v40, s40, 8 1517; GCN-NEXT: v_writelane_b32 v40, s41, 9 1518; GCN-NEXT: v_writelane_b32 v40, s42, 10 1519; GCN-NEXT: v_writelane_b32 v40, s43, 11 1520; GCN-NEXT: v_writelane_b32 v40, s44, 12 1521; GCN-NEXT: v_writelane_b32 v40, s45, 13 1522; GCN-NEXT: v_writelane_b32 v40, s46, 14 1523; GCN-NEXT: v_writelane_b32 v40, s47, 15 1524; GCN-NEXT: v_writelane_b32 v40, s48, 16 1525; GCN-NEXT: v_writelane_b32 v40, s49, 17 1526; GCN-NEXT: v_writelane_b32 v40, s50, 18 1527; GCN-NEXT: v_writelane_b32 v40, s51, 19 1528; GCN-NEXT: v_writelane_b32 v40, s52, 20 1529; GCN-NEXT: v_writelane_b32 v40, s53, 21 1530; GCN-NEXT: v_writelane_b32 v40, s54, 22 1531; GCN-NEXT: v_writelane_b32 v40, s55, 23 1532; GCN-NEXT: v_writelane_b32 v40, s56, 24 1533; GCN-NEXT: v_writelane_b32 v40, s57, 25 1534; GCN-NEXT: v_writelane_b32 v40, s58, 26 1535; GCN-NEXT: v_writelane_b32 v40, s59, 27 1536; GCN-NEXT: v_writelane_b32 v40, s60, 28 1537; GCN-NEXT: v_writelane_b32 v40, s61, 29 1538; GCN-NEXT: v_writelane_b32 v40, s62, 30 1539; GCN-NEXT: v_writelane_b32 v40, s63, 31 1540; GCN-NEXT: s_mov_b64 s[4:5], exec 1541; GCN-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 1542; GCN-NEXT: v_readfirstlane_b32 s6, v1 1543; GCN-NEXT: v_readfirstlane_b32 s7, v2 1544; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2] 1545; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc 1546; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7] 1547; GCN-NEXT: v_mov_b32_e32 v3, v0 1548; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2 1549; GCN-NEXT: ; implicit-def: $vgpr0 1550; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] 1551; GCN-NEXT: s_cbranch_execnz .LBB8_1 1552; GCN-NEXT: ; %bb.2: 1553; GCN-NEXT: s_mov_b64 exec, s[4:5] 1554; GCN-NEXT: v_mov_b32_e32 v0, v3 1555; GCN-NEXT: v_readlane_b32 s63, v40, 31 1556; GCN-NEXT: v_readlane_b32 s62, v40, 30 1557; GCN-NEXT: v_readlane_b32 s61, v40, 29 1558; GCN-NEXT: v_readlane_b32 s60, v40, 28 1559; GCN-NEXT: v_readlane_b32 s59, v40, 27 1560; GCN-NEXT: v_readlane_b32 s58, v40, 26 1561; GCN-NEXT: v_readlane_b32 s57, v40, 25 1562; GCN-NEXT: v_readlane_b32 s56, v40, 24 1563; GCN-NEXT: v_readlane_b32 s55, v40, 23 1564; GCN-NEXT: v_readlane_b32 s54, v40, 22 1565; GCN-NEXT: v_readlane_b32 s53, v40, 21 1566; GCN-NEXT: v_readlane_b32 s52, v40, 20 1567; GCN-NEXT: v_readlane_b32 s51, v40, 19 1568; GCN-NEXT: v_readlane_b32 s50, v40, 18 1569; GCN-NEXT: v_readlane_b32 s49, v40, 17 1570; GCN-NEXT: v_readlane_b32 s48, v40, 16 1571; GCN-NEXT: v_readlane_b32 s47, v40, 15 1572; GCN-NEXT: v_readlane_b32 s46, v40, 14 1573; GCN-NEXT: v_readlane_b32 s45, v40, 13 1574; GCN-NEXT: v_readlane_b32 s44, v40, 12 1575; GCN-NEXT: v_readlane_b32 s43, v40, 11 1576; GCN-NEXT: v_readlane_b32 s42, v40, 10 1577; GCN-NEXT: v_readlane_b32 s41, v40, 9 1578; GCN-NEXT: v_readlane_b32 s40, v40, 8 1579; GCN-NEXT: v_readlane_b32 s39, v40, 7 1580; GCN-NEXT: v_readlane_b32 s38, v40, 6 1581; GCN-NEXT: v_readlane_b32 s37, v40, 5 1582; GCN-NEXT: v_readlane_b32 s36, v40, 4 1583; GCN-NEXT: v_readlane_b32 s35, v40, 3 1584; GCN-NEXT: v_readlane_b32 s34, v40, 2 1585; GCN-NEXT: v_readlane_b32 s31, v40, 1 1586; GCN-NEXT: v_readlane_b32 s30, v40, 0 1587; GCN-NEXT: s_addk_i32 s32, 0xfc00 1588; GCN-NEXT: v_readlane_b32 s33, v40, 32 1589; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 1590; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1591; GCN-NEXT: s_mov_b64 exec, s[4:5] 1592; GCN-NEXT: s_waitcnt vmcnt(0) 1593; GCN-NEXT: s_setpc_b64 s[30:31] 1594; 1595; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_return: 1596; GISEL: ; %bb.0: 1597; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1598; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 1599; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1600; GISEL-NEXT: s_mov_b64 exec, s[4:5] 1601; GISEL-NEXT: v_writelane_b32 v40, s33, 32 1602; GISEL-NEXT: s_mov_b32 s33, s32 1603; GISEL-NEXT: s_addk_i32 s32, 0x400 1604; GISEL-NEXT: v_writelane_b32 v40, s30, 0 1605; GISEL-NEXT: v_writelane_b32 v40, s31, 1 1606; GISEL-NEXT: v_writelane_b32 v40, s34, 2 1607; GISEL-NEXT: v_writelane_b32 v40, s35, 3 1608; GISEL-NEXT: v_writelane_b32 v40, s36, 4 1609; GISEL-NEXT: v_writelane_b32 v40, s37, 5 1610; GISEL-NEXT: v_writelane_b32 v40, s38, 6 1611; GISEL-NEXT: v_writelane_b32 v40, s39, 7 1612; GISEL-NEXT: v_writelane_b32 v40, s40, 8 1613; GISEL-NEXT: v_writelane_b32 v40, s41, 9 1614; GISEL-NEXT: v_writelane_b32 v40, s42, 10 1615; GISEL-NEXT: v_writelane_b32 v40, s43, 11 1616; GISEL-NEXT: v_writelane_b32 v40, s44, 12 1617; GISEL-NEXT: v_writelane_b32 v40, s45, 13 1618; GISEL-NEXT: v_writelane_b32 v40, s46, 14 1619; GISEL-NEXT: v_writelane_b32 v40, s47, 15 1620; GISEL-NEXT: v_writelane_b32 v40, s48, 16 1621; GISEL-NEXT: v_writelane_b32 v40, s49, 17 1622; GISEL-NEXT: v_writelane_b32 v40, s50, 18 1623; GISEL-NEXT: v_writelane_b32 v40, s51, 19 1624; GISEL-NEXT: v_writelane_b32 v40, s52, 20 1625; GISEL-NEXT: v_writelane_b32 v40, s53, 21 1626; GISEL-NEXT: v_writelane_b32 v40, s54, 22 1627; GISEL-NEXT: v_writelane_b32 v40, s55, 23 1628; GISEL-NEXT: v_writelane_b32 v40, s56, 24 1629; GISEL-NEXT: v_writelane_b32 v40, s57, 25 1630; GISEL-NEXT: v_writelane_b32 v40, s58, 26 1631; GISEL-NEXT: v_writelane_b32 v40, s59, 27 1632; GISEL-NEXT: v_writelane_b32 v40, s60, 28 1633; GISEL-NEXT: v_writelane_b32 v40, s61, 29 1634; GISEL-NEXT: v_writelane_b32 v40, s62, 30 1635; GISEL-NEXT: v_writelane_b32 v40, s63, 31 1636; GISEL-NEXT: s_mov_b64 s[4:5], exec 1637; GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 1638; GISEL-NEXT: v_readfirstlane_b32 s8, v1 1639; GISEL-NEXT: v_readfirstlane_b32 s9, v2 1640; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] 1641; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc 1642; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] 1643; GISEL-NEXT: v_mov_b32_e32 v2, v0 1644; GISEL-NEXT: ; implicit-def: $vgpr1 1645; GISEL-NEXT: ; implicit-def: $vgpr0 1646; GISEL-NEXT: s_xor_b64 exec, exec, s[6:7] 1647; GISEL-NEXT: s_cbranch_execnz .LBB8_1 1648; GISEL-NEXT: ; %bb.2: 1649; GISEL-NEXT: s_mov_b64 exec, s[4:5] 1650; GISEL-NEXT: v_mov_b32_e32 v0, v2 1651; GISEL-NEXT: v_readlane_b32 s63, v40, 31 1652; GISEL-NEXT: v_readlane_b32 s62, v40, 30 1653; GISEL-NEXT: v_readlane_b32 s61, v40, 29 1654; GISEL-NEXT: v_readlane_b32 s60, v40, 28 1655; GISEL-NEXT: v_readlane_b32 s59, v40, 27 1656; GISEL-NEXT: v_readlane_b32 s58, v40, 26 1657; GISEL-NEXT: v_readlane_b32 s57, v40, 25 1658; GISEL-NEXT: v_readlane_b32 s56, v40, 24 1659; GISEL-NEXT: v_readlane_b32 s55, v40, 23 1660; GISEL-NEXT: v_readlane_b32 s54, v40, 22 1661; GISEL-NEXT: v_readlane_b32 s53, v40, 21 1662; GISEL-NEXT: v_readlane_b32 s52, v40, 20 1663; GISEL-NEXT: v_readlane_b32 s51, v40, 19 1664; GISEL-NEXT: v_readlane_b32 s50, v40, 18 1665; GISEL-NEXT: v_readlane_b32 s49, v40, 17 1666; GISEL-NEXT: v_readlane_b32 s48, v40, 16 1667; GISEL-NEXT: v_readlane_b32 s47, v40, 15 1668; GISEL-NEXT: v_readlane_b32 s46, v40, 14 1669; GISEL-NEXT: v_readlane_b32 s45, v40, 13 1670; GISEL-NEXT: v_readlane_b32 s44, v40, 12 1671; GISEL-NEXT: v_readlane_b32 s43, v40, 11 1672; GISEL-NEXT: v_readlane_b32 s42, v40, 10 1673; GISEL-NEXT: v_readlane_b32 s41, v40, 9 1674; GISEL-NEXT: v_readlane_b32 s40, v40, 8 1675; GISEL-NEXT: v_readlane_b32 s39, v40, 7 1676; GISEL-NEXT: v_readlane_b32 s38, v40, 6 1677; GISEL-NEXT: v_readlane_b32 s37, v40, 5 1678; GISEL-NEXT: v_readlane_b32 s36, v40, 4 1679; GISEL-NEXT: v_readlane_b32 s35, v40, 3 1680; GISEL-NEXT: v_readlane_b32 s34, v40, 2 1681; GISEL-NEXT: v_readlane_b32 s31, v40, 1 1682; GISEL-NEXT: v_readlane_b32 s30, v40, 0 1683; GISEL-NEXT: s_addk_i32 s32, 0xfc00 1684; GISEL-NEXT: v_readlane_b32 s33, v40, 32 1685; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 1686; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1687; GISEL-NEXT: s_mov_b64 exec, s[4:5] 1688; GISEL-NEXT: s_waitcnt vmcnt(0) 1689; GISEL-NEXT: s_setpc_b64 s[30:31] 1690 %ret = call amdgpu_gfx i32 %fptr(i32 %i) 1691 ret i32 %ret 1692} 1693 1694; Calling a vgpr can never be a tail call. 1695define void @test_indirect_tail_call_vgpr_ptr(void()* %fptr) { 1696; GCN-LABEL: test_indirect_tail_call_vgpr_ptr: 1697; GCN: ; %bb.0: 1698; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1699; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 1700; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1701; GCN-NEXT: s_mov_b64 exec, s[4:5] 1702; GCN-NEXT: v_writelane_b32 v40, s33, 32 1703; GCN-NEXT: s_mov_b32 s33, s32 1704; GCN-NEXT: s_addk_i32 s32, 0x400 1705; GCN-NEXT: v_writelane_b32 v40, s30, 0 1706; GCN-NEXT: v_writelane_b32 v40, s31, 1 1707; GCN-NEXT: v_writelane_b32 v40, s34, 2 1708; GCN-NEXT: v_writelane_b32 v40, s35, 3 1709; GCN-NEXT: v_writelane_b32 v40, s36, 4 1710; GCN-NEXT: v_writelane_b32 v40, s37, 5 1711; GCN-NEXT: v_writelane_b32 v40, s38, 6 1712; GCN-NEXT: v_writelane_b32 v40, s39, 7 1713; GCN-NEXT: v_writelane_b32 v40, s40, 8 1714; GCN-NEXT: v_writelane_b32 v40, s41, 9 1715; GCN-NEXT: v_writelane_b32 v40, s42, 10 1716; GCN-NEXT: v_writelane_b32 v40, s43, 11 1717; GCN-NEXT: v_writelane_b32 v40, s44, 12 1718; GCN-NEXT: v_writelane_b32 v40, s45, 13 1719; GCN-NEXT: v_writelane_b32 v40, s46, 14 1720; GCN-NEXT: v_writelane_b32 v40, s47, 15 1721; GCN-NEXT: v_writelane_b32 v40, s48, 16 1722; GCN-NEXT: v_writelane_b32 v40, s49, 17 1723; GCN-NEXT: v_writelane_b32 v40, s50, 18 1724; GCN-NEXT: v_writelane_b32 v40, s51, 19 1725; GCN-NEXT: v_writelane_b32 v40, s52, 20 1726; GCN-NEXT: v_writelane_b32 v40, s53, 21 1727; GCN-NEXT: v_writelane_b32 v40, s54, 22 1728; GCN-NEXT: v_writelane_b32 v40, s55, 23 1729; GCN-NEXT: v_writelane_b32 v40, s56, 24 1730; GCN-NEXT: v_writelane_b32 v40, s57, 25 1731; GCN-NEXT: v_writelane_b32 v40, s58, 26 1732; GCN-NEXT: v_writelane_b32 v40, s59, 27 1733; GCN-NEXT: v_writelane_b32 v40, s60, 28 1734; GCN-NEXT: v_writelane_b32 v40, s61, 29 1735; GCN-NEXT: v_writelane_b32 v40, s62, 30 1736; GCN-NEXT: v_writelane_b32 v40, s63, 31 1737; GCN-NEXT: s_mov_b64 s[4:5], exec 1738; GCN-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 1739; GCN-NEXT: v_readfirstlane_b32 s6, v0 1740; GCN-NEXT: v_readfirstlane_b32 s7, v1 1741; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1] 1742; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc 1743; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7] 1744; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 1745; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] 1746; GCN-NEXT: s_cbranch_execnz .LBB9_1 1747; GCN-NEXT: ; %bb.2: 1748; GCN-NEXT: s_mov_b64 exec, s[4:5] 1749; GCN-NEXT: v_readlane_b32 s63, v40, 31 1750; GCN-NEXT: v_readlane_b32 s62, v40, 30 1751; GCN-NEXT: v_readlane_b32 s61, v40, 29 1752; GCN-NEXT: v_readlane_b32 s60, v40, 28 1753; GCN-NEXT: v_readlane_b32 s59, v40, 27 1754; GCN-NEXT: v_readlane_b32 s58, v40, 26 1755; GCN-NEXT: v_readlane_b32 s57, v40, 25 1756; GCN-NEXT: v_readlane_b32 s56, v40, 24 1757; GCN-NEXT: v_readlane_b32 s55, v40, 23 1758; GCN-NEXT: v_readlane_b32 s54, v40, 22 1759; GCN-NEXT: v_readlane_b32 s53, v40, 21 1760; GCN-NEXT: v_readlane_b32 s52, v40, 20 1761; GCN-NEXT: v_readlane_b32 s51, v40, 19 1762; GCN-NEXT: v_readlane_b32 s50, v40, 18 1763; GCN-NEXT: v_readlane_b32 s49, v40, 17 1764; GCN-NEXT: v_readlane_b32 s48, v40, 16 1765; GCN-NEXT: v_readlane_b32 s47, v40, 15 1766; GCN-NEXT: v_readlane_b32 s46, v40, 14 1767; GCN-NEXT: v_readlane_b32 s45, v40, 13 1768; GCN-NEXT: v_readlane_b32 s44, v40, 12 1769; GCN-NEXT: v_readlane_b32 s43, v40, 11 1770; GCN-NEXT: v_readlane_b32 s42, v40, 10 1771; GCN-NEXT: v_readlane_b32 s41, v40, 9 1772; GCN-NEXT: v_readlane_b32 s40, v40, 8 1773; GCN-NEXT: v_readlane_b32 s39, v40, 7 1774; GCN-NEXT: v_readlane_b32 s38, v40, 6 1775; GCN-NEXT: v_readlane_b32 s37, v40, 5 1776; GCN-NEXT: v_readlane_b32 s36, v40, 4 1777; GCN-NEXT: v_readlane_b32 s35, v40, 3 1778; GCN-NEXT: v_readlane_b32 s34, v40, 2 1779; GCN-NEXT: v_readlane_b32 s31, v40, 1 1780; GCN-NEXT: v_readlane_b32 s30, v40, 0 1781; GCN-NEXT: s_addk_i32 s32, 0xfc00 1782; GCN-NEXT: v_readlane_b32 s33, v40, 32 1783; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 1784; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1785; GCN-NEXT: s_mov_b64 exec, s[4:5] 1786; GCN-NEXT: s_waitcnt vmcnt(0) 1787; GCN-NEXT: s_setpc_b64 s[30:31] 1788; 1789; GISEL-LABEL: test_indirect_tail_call_vgpr_ptr: 1790; GISEL: ; %bb.0: 1791; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1792; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 1793; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1794; GISEL-NEXT: s_mov_b64 exec, s[4:5] 1795; GISEL-NEXT: v_writelane_b32 v40, s33, 32 1796; GISEL-NEXT: s_mov_b32 s33, s32 1797; GISEL-NEXT: s_addk_i32 s32, 0x400 1798; GISEL-NEXT: v_writelane_b32 v40, s30, 0 1799; GISEL-NEXT: v_writelane_b32 v40, s31, 1 1800; GISEL-NEXT: v_writelane_b32 v40, s34, 2 1801; GISEL-NEXT: v_writelane_b32 v40, s35, 3 1802; GISEL-NEXT: v_writelane_b32 v40, s36, 4 1803; GISEL-NEXT: v_writelane_b32 v40, s37, 5 1804; GISEL-NEXT: v_writelane_b32 v40, s38, 6 1805; GISEL-NEXT: v_writelane_b32 v40, s39, 7 1806; GISEL-NEXT: v_writelane_b32 v40, s40, 8 1807; GISEL-NEXT: v_writelane_b32 v40, s41, 9 1808; GISEL-NEXT: v_writelane_b32 v40, s42, 10 1809; GISEL-NEXT: v_writelane_b32 v40, s43, 11 1810; GISEL-NEXT: v_writelane_b32 v40, s44, 12 1811; GISEL-NEXT: v_writelane_b32 v40, s45, 13 1812; GISEL-NEXT: v_writelane_b32 v40, s46, 14 1813; GISEL-NEXT: v_writelane_b32 v40, s47, 15 1814; GISEL-NEXT: v_writelane_b32 v40, s48, 16 1815; GISEL-NEXT: v_writelane_b32 v40, s49, 17 1816; GISEL-NEXT: v_writelane_b32 v40, s50, 18 1817; GISEL-NEXT: v_writelane_b32 v40, s51, 19 1818; GISEL-NEXT: v_writelane_b32 v40, s52, 20 1819; GISEL-NEXT: v_writelane_b32 v40, s53, 21 1820; GISEL-NEXT: v_writelane_b32 v40, s54, 22 1821; GISEL-NEXT: v_writelane_b32 v40, s55, 23 1822; GISEL-NEXT: v_writelane_b32 v40, s56, 24 1823; GISEL-NEXT: v_writelane_b32 v40, s57, 25 1824; GISEL-NEXT: v_writelane_b32 v40, s58, 26 1825; GISEL-NEXT: v_writelane_b32 v40, s59, 27 1826; GISEL-NEXT: v_writelane_b32 v40, s60, 28 1827; GISEL-NEXT: v_writelane_b32 v40, s61, 29 1828; GISEL-NEXT: v_writelane_b32 v40, s62, 30 1829; GISEL-NEXT: v_writelane_b32 v40, s63, 31 1830; GISEL-NEXT: s_mov_b64 s[4:5], exec 1831; GISEL-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 1832; GISEL-NEXT: v_readfirstlane_b32 s6, v0 1833; GISEL-NEXT: v_readfirstlane_b32 s7, v1 1834; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1] 1835; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc 1836; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] 1837; GISEL-NEXT: ; implicit-def: $vgpr0 1838; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9] 1839; GISEL-NEXT: s_cbranch_execnz .LBB9_1 1840; GISEL-NEXT: ; %bb.2: 1841; GISEL-NEXT: s_mov_b64 exec, s[4:5] 1842; GISEL-NEXT: v_readlane_b32 s63, v40, 31 1843; GISEL-NEXT: v_readlane_b32 s62, v40, 30 1844; GISEL-NEXT: v_readlane_b32 s61, v40, 29 1845; GISEL-NEXT: v_readlane_b32 s60, v40, 28 1846; GISEL-NEXT: v_readlane_b32 s59, v40, 27 1847; GISEL-NEXT: v_readlane_b32 s58, v40, 26 1848; GISEL-NEXT: v_readlane_b32 s57, v40, 25 1849; GISEL-NEXT: v_readlane_b32 s56, v40, 24 1850; GISEL-NEXT: v_readlane_b32 s55, v40, 23 1851; GISEL-NEXT: v_readlane_b32 s54, v40, 22 1852; GISEL-NEXT: v_readlane_b32 s53, v40, 21 1853; GISEL-NEXT: v_readlane_b32 s52, v40, 20 1854; GISEL-NEXT: v_readlane_b32 s51, v40, 19 1855; GISEL-NEXT: v_readlane_b32 s50, v40, 18 1856; GISEL-NEXT: v_readlane_b32 s49, v40, 17 1857; GISEL-NEXT: v_readlane_b32 s48, v40, 16 1858; GISEL-NEXT: v_readlane_b32 s47, v40, 15 1859; GISEL-NEXT: v_readlane_b32 s46, v40, 14 1860; GISEL-NEXT: v_readlane_b32 s45, v40, 13 1861; GISEL-NEXT: v_readlane_b32 s44, v40, 12 1862; GISEL-NEXT: v_readlane_b32 s43, v40, 11 1863; GISEL-NEXT: v_readlane_b32 s42, v40, 10 1864; GISEL-NEXT: v_readlane_b32 s41, v40, 9 1865; GISEL-NEXT: v_readlane_b32 s40, v40, 8 1866; GISEL-NEXT: v_readlane_b32 s39, v40, 7 1867; GISEL-NEXT: v_readlane_b32 s38, v40, 6 1868; GISEL-NEXT: v_readlane_b32 s37, v40, 5 1869; GISEL-NEXT: v_readlane_b32 s36, v40, 4 1870; GISEL-NEXT: v_readlane_b32 s35, v40, 3 1871; GISEL-NEXT: v_readlane_b32 s34, v40, 2 1872; GISEL-NEXT: v_readlane_b32 s31, v40, 1 1873; GISEL-NEXT: v_readlane_b32 s30, v40, 0 1874; GISEL-NEXT: s_addk_i32 s32, 0xfc00 1875; GISEL-NEXT: v_readlane_b32 s33, v40, 32 1876; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 1877; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1878; GISEL-NEXT: s_mov_b64 exec, s[4:5] 1879; GISEL-NEXT: s_waitcnt vmcnt(0) 1880; GISEL-NEXT: s_setpc_b64 s[30:31] 1881 tail call amdgpu_gfx void %fptr() 1882 ret void 1883} 1884