1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=GISEL %s 4 5@gv.fptr0 = external hidden unnamed_addr addrspace(4) constant void()*, align 4 6@gv.fptr1 = external hidden unnamed_addr addrspace(4) constant void(i32)*, align 4 7 8define amdgpu_kernel void @test_indirect_call_sgpr_ptr(i8) { 9; GCN-LABEL: test_indirect_call_sgpr_ptr: 10; GCN: .amd_kernel_code_t 11; GCN-NEXT: amd_code_version_major = 1 12; GCN-NEXT: amd_code_version_minor = 2 13; GCN-NEXT: amd_machine_kind = 1 14; GCN-NEXT: amd_machine_version_major = 7 15; GCN-NEXT: amd_machine_version_minor = 0 16; GCN-NEXT: amd_machine_version_stepping = 0 17; GCN-NEXT: kernel_code_entry_byte_offset = 256 18; GCN-NEXT: kernel_code_prefetch_byte_size = 0 19; GCN-NEXT: granulated_workitem_vgpr_count = 7 20; GCN-NEXT: granulated_wavefront_sgpr_count = 4 21; GCN-NEXT: priority = 0 22; GCN-NEXT: float_mode = 240 23; GCN-NEXT: priv = 0 24; GCN-NEXT: enable_dx10_clamp = 1 25; GCN-NEXT: debug_mode = 0 26; GCN-NEXT: enable_ieee_mode = 1 27; GCN-NEXT: enable_wgp_mode = 0 28; GCN-NEXT: enable_mem_ordered = 0 29; GCN-NEXT: enable_fwd_progress = 0 30; GCN-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1 31; GCN-NEXT: user_sgpr_count = 14 32; GCN-NEXT: enable_trap_handler = 0 33; GCN-NEXT: enable_sgpr_workgroup_id_x = 1 34; GCN-NEXT: enable_sgpr_workgroup_id_y = 1 35; GCN-NEXT: enable_sgpr_workgroup_id_z = 1 36; GCN-NEXT: enable_sgpr_workgroup_info = 0 37; GCN-NEXT: enable_vgpr_workitem_id = 2 38; GCN-NEXT: enable_exception_msb = 0 39; GCN-NEXT: granulated_lds_size = 0 40; GCN-NEXT: enable_exception = 0 41; GCN-NEXT: enable_sgpr_private_segment_buffer = 1 42; GCN-NEXT: enable_sgpr_dispatch_ptr = 1 43; GCN-NEXT: enable_sgpr_queue_ptr = 1 44; GCN-NEXT: enable_sgpr_kernarg_segment_ptr = 1 45; GCN-NEXT: enable_sgpr_dispatch_id = 1 46; GCN-NEXT: enable_sgpr_flat_scratch_init = 1 47; GCN-NEXT: enable_sgpr_private_segment_size = 0 48; GCN-NEXT: enable_sgpr_grid_workgroup_count_x = 0 49; GCN-NEXT: enable_sgpr_grid_workgroup_count_y = 0 50; GCN-NEXT: enable_sgpr_grid_workgroup_count_z = 0 51; GCN-NEXT: enable_wavefront_size32 = 0 52; GCN-NEXT: enable_ordered_append_gds = 0 53; GCN-NEXT: private_element_size = 1 54; GCN-NEXT: is_ptr64 = 1 55; GCN-NEXT: is_dynamic_callstack = 1 56; GCN-NEXT: is_debug_enabled = 0 57; GCN-NEXT: is_xnack_enabled = 0 58; GCN-NEXT: workitem_private_segment_byte_size = 16384 59; GCN-NEXT: workgroup_group_segment_byte_size = 0 60; GCN-NEXT: gds_segment_byte_size = 0 61; GCN-NEXT: kernarg_segment_byte_size = 64 62; GCN-NEXT: workgroup_fbarrier_count = 0 63; GCN-NEXT: wavefront_sgpr_count = 37 64; GCN-NEXT: workitem_vgpr_count = 32 65; GCN-NEXT: reserved_vgpr_first = 0 66; GCN-NEXT: reserved_vgpr_count = 0 67; GCN-NEXT: reserved_sgpr_first = 0 68; GCN-NEXT: reserved_sgpr_count = 0 69; GCN-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 70; GCN-NEXT: debug_private_segment_buffer_sgpr = 0 71; GCN-NEXT: kernarg_segment_alignment = 4 72; GCN-NEXT: group_segment_alignment = 4 73; GCN-NEXT: private_segment_alignment = 4 74; GCN-NEXT: wavefront_size = 6 75; GCN-NEXT: call_convention = -1 76; GCN-NEXT: runtime_loader_kernel_symbol = 0 77; GCN-NEXT: .end_amd_kernel_code_t 78; GCN-NEXT: ; %bb.0: 79; GCN-NEXT: s_mov_b32 s32, 0 80; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13 81; GCN-NEXT: s_add_i32 s12, s12, s17 82; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 83; GCN-NEXT: s_add_u32 s0, s0, s17 84; GCN-NEXT: s_addc_u32 s1, s1, 0 85; GCN-NEXT: s_mov_b32 s13, s15 86; GCN-NEXT: s_mov_b32 s12, s14 87; GCN-NEXT: s_getpc_b64 s[14:15] 88; GCN-NEXT: s_add_u32 s14, s14, gv.fptr0@rel32@lo+4 89; GCN-NEXT: s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12 90; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2 91; GCN-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 92; GCN-NEXT: s_add_u32 s8, s8, 8 93; GCN-NEXT: s_addc_u32 s9, s9, 0 94; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 95; GCN-NEXT: v_or_b32_e32 v0, v0, v1 96; GCN-NEXT: v_or_b32_e32 v31, v0, v2 97; GCN-NEXT: s_mov_b32 s14, s16 98; GCN-NEXT: s_waitcnt lgkmcnt(0) 99; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19] 100; GCN-NEXT: s_endpgm 101; 102; GISEL-LABEL: test_indirect_call_sgpr_ptr: 103; GISEL: .amd_kernel_code_t 104; GISEL-NEXT: amd_code_version_major = 1 105; GISEL-NEXT: amd_code_version_minor = 2 106; GISEL-NEXT: amd_machine_kind = 1 107; GISEL-NEXT: amd_machine_version_major = 7 108; GISEL-NEXT: amd_machine_version_minor = 0 109; GISEL-NEXT: amd_machine_version_stepping = 0 110; GISEL-NEXT: kernel_code_entry_byte_offset = 256 111; GISEL-NEXT: kernel_code_prefetch_byte_size = 0 112; GISEL-NEXT: granulated_workitem_vgpr_count = 7 113; GISEL-NEXT: granulated_wavefront_sgpr_count = 4 114; GISEL-NEXT: priority = 0 115; GISEL-NEXT: float_mode = 240 116; GISEL-NEXT: priv = 0 117; GISEL-NEXT: enable_dx10_clamp = 1 118; GISEL-NEXT: debug_mode = 0 119; GISEL-NEXT: enable_ieee_mode = 1 120; GISEL-NEXT: enable_wgp_mode = 0 121; GISEL-NEXT: enable_mem_ordered = 0 122; GISEL-NEXT: enable_fwd_progress = 0 123; GISEL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1 124; GISEL-NEXT: user_sgpr_count = 14 125; GISEL-NEXT: enable_trap_handler = 0 126; GISEL-NEXT: enable_sgpr_workgroup_id_x = 1 127; GISEL-NEXT: enable_sgpr_workgroup_id_y = 1 128; GISEL-NEXT: enable_sgpr_workgroup_id_z = 1 129; GISEL-NEXT: enable_sgpr_workgroup_info = 0 130; GISEL-NEXT: enable_vgpr_workitem_id = 2 131; GISEL-NEXT: enable_exception_msb = 0 132; GISEL-NEXT: granulated_lds_size = 0 133; GISEL-NEXT: enable_exception = 0 134; GISEL-NEXT: enable_sgpr_private_segment_buffer = 1 135; GISEL-NEXT: enable_sgpr_dispatch_ptr = 1 136; GISEL-NEXT: enable_sgpr_queue_ptr = 1 137; GISEL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 138; GISEL-NEXT: enable_sgpr_dispatch_id = 1 139; GISEL-NEXT: enable_sgpr_flat_scratch_init = 1 140; GISEL-NEXT: enable_sgpr_private_segment_size = 0 141; GISEL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 142; GISEL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 143; GISEL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 144; GISEL-NEXT: enable_wavefront_size32 = 0 145; GISEL-NEXT: enable_ordered_append_gds = 0 146; GISEL-NEXT: private_element_size = 1 147; GISEL-NEXT: is_ptr64 = 1 148; GISEL-NEXT: is_dynamic_callstack = 1 149; GISEL-NEXT: is_debug_enabled = 0 150; GISEL-NEXT: is_xnack_enabled = 0 151; GISEL-NEXT: workitem_private_segment_byte_size = 16384 152; GISEL-NEXT: workgroup_group_segment_byte_size = 0 153; GISEL-NEXT: gds_segment_byte_size = 0 154; GISEL-NEXT: kernarg_segment_byte_size = 64 155; GISEL-NEXT: workgroup_fbarrier_count = 0 156; GISEL-NEXT: wavefront_sgpr_count = 37 157; GISEL-NEXT: workitem_vgpr_count = 32 158; GISEL-NEXT: reserved_vgpr_first = 0 159; GISEL-NEXT: reserved_vgpr_count = 0 160; GISEL-NEXT: reserved_sgpr_first = 0 161; GISEL-NEXT: reserved_sgpr_count = 0 162; GISEL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 163; GISEL-NEXT: debug_private_segment_buffer_sgpr = 0 164; GISEL-NEXT: kernarg_segment_alignment = 4 165; GISEL-NEXT: group_segment_alignment = 4 166; GISEL-NEXT: private_segment_alignment = 4 167; GISEL-NEXT: wavefront_size = 6 168; GISEL-NEXT: call_convention = -1 169; GISEL-NEXT: runtime_loader_kernel_symbol = 0 170; GISEL-NEXT: .end_amd_kernel_code_t 171; GISEL-NEXT: ; %bb.0: 172; GISEL-NEXT: s_mov_b32 s32, 0 173; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 174; GISEL-NEXT: s_add_i32 s12, s12, s17 175; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 176; GISEL-NEXT: s_add_u32 s0, s0, s17 177; GISEL-NEXT: s_addc_u32 s1, s1, 0 178; GISEL-NEXT: s_mov_b32 s13, s15 179; GISEL-NEXT: s_mov_b32 s12, s14 180; GISEL-NEXT: s_getpc_b64 s[14:15] 181; GISEL-NEXT: s_add_u32 s14, s14, gv.fptr0@rel32@lo+4 182; GISEL-NEXT: s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12 183; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 184; GISEL-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 185; GISEL-NEXT: s_add_u32 s8, s8, 8 186; GISEL-NEXT: s_addc_u32 s9, s9, 0 187; GISEL-NEXT: v_or_b32_e32 v0, v0, v1 188; GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2 189; GISEL-NEXT: v_or_b32_e32 v31, v0, v1 190; GISEL-NEXT: s_mov_b32 s14, s16 191; GISEL-NEXT: s_waitcnt lgkmcnt(0) 192; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19] 193; GISEL-NEXT: s_endpgm 194 %fptr = load void()*, void()* addrspace(4)* @gv.fptr0 195 call void %fptr() 196 ret void 197} 198 199define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg(i8) { 200; GCN-LABEL: test_indirect_call_sgpr_ptr_arg: 201; GCN: .amd_kernel_code_t 202; GCN-NEXT: amd_code_version_major = 1 203; GCN-NEXT: amd_code_version_minor = 2 204; GCN-NEXT: amd_machine_kind = 1 205; GCN-NEXT: amd_machine_version_major = 7 206; GCN-NEXT: amd_machine_version_minor = 0 207; GCN-NEXT: amd_machine_version_stepping = 0 208; GCN-NEXT: kernel_code_entry_byte_offset = 256 209; GCN-NEXT: kernel_code_prefetch_byte_size = 0 210; GCN-NEXT: granulated_workitem_vgpr_count = 7 211; GCN-NEXT: granulated_wavefront_sgpr_count = 4 212; GCN-NEXT: priority = 0 213; GCN-NEXT: float_mode = 240 214; GCN-NEXT: priv = 0 215; GCN-NEXT: enable_dx10_clamp = 1 216; GCN-NEXT: debug_mode = 0 217; GCN-NEXT: enable_ieee_mode = 1 218; GCN-NEXT: enable_wgp_mode = 0 219; GCN-NEXT: enable_mem_ordered = 0 220; GCN-NEXT: enable_fwd_progress = 0 221; GCN-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1 222; GCN-NEXT: user_sgpr_count = 14 223; GCN-NEXT: enable_trap_handler = 0 224; GCN-NEXT: enable_sgpr_workgroup_id_x = 1 225; GCN-NEXT: enable_sgpr_workgroup_id_y = 1 226; GCN-NEXT: enable_sgpr_workgroup_id_z = 1 227; GCN-NEXT: enable_sgpr_workgroup_info = 0 228; GCN-NEXT: enable_vgpr_workitem_id = 2 229; GCN-NEXT: enable_exception_msb = 0 230; GCN-NEXT: granulated_lds_size = 0 231; GCN-NEXT: enable_exception = 0 232; GCN-NEXT: enable_sgpr_private_segment_buffer = 1 233; GCN-NEXT: enable_sgpr_dispatch_ptr = 1 234; GCN-NEXT: enable_sgpr_queue_ptr = 1 235; GCN-NEXT: enable_sgpr_kernarg_segment_ptr = 1 236; GCN-NEXT: enable_sgpr_dispatch_id = 1 237; GCN-NEXT: enable_sgpr_flat_scratch_init = 1 238; GCN-NEXT: enable_sgpr_private_segment_size = 0 239; GCN-NEXT: enable_sgpr_grid_workgroup_count_x = 0 240; GCN-NEXT: enable_sgpr_grid_workgroup_count_y = 0 241; GCN-NEXT: enable_sgpr_grid_workgroup_count_z = 0 242; GCN-NEXT: enable_wavefront_size32 = 0 243; GCN-NEXT: enable_ordered_append_gds = 0 244; GCN-NEXT: private_element_size = 1 245; GCN-NEXT: is_ptr64 = 1 246; GCN-NEXT: is_dynamic_callstack = 1 247; GCN-NEXT: is_debug_enabled = 0 248; GCN-NEXT: is_xnack_enabled = 0 249; GCN-NEXT: workitem_private_segment_byte_size = 16384 250; GCN-NEXT: workgroup_group_segment_byte_size = 0 251; GCN-NEXT: gds_segment_byte_size = 0 252; GCN-NEXT: kernarg_segment_byte_size = 64 253; GCN-NEXT: workgroup_fbarrier_count = 0 254; GCN-NEXT: wavefront_sgpr_count = 37 255; GCN-NEXT: workitem_vgpr_count = 32 256; GCN-NEXT: reserved_vgpr_first = 0 257; GCN-NEXT: reserved_vgpr_count = 0 258; GCN-NEXT: reserved_sgpr_first = 0 259; GCN-NEXT: reserved_sgpr_count = 0 260; GCN-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 261; GCN-NEXT: debug_private_segment_buffer_sgpr = 0 262; GCN-NEXT: kernarg_segment_alignment = 4 263; GCN-NEXT: group_segment_alignment = 4 264; GCN-NEXT: private_segment_alignment = 4 265; GCN-NEXT: wavefront_size = 6 266; GCN-NEXT: call_convention = -1 267; GCN-NEXT: runtime_loader_kernel_symbol = 0 268; GCN-NEXT: .end_amd_kernel_code_t 269; GCN-NEXT: ; %bb.0: 270; GCN-NEXT: s_mov_b32 s32, 0 271; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13 272; GCN-NEXT: s_add_i32 s12, s12, s17 273; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 274; GCN-NEXT: s_add_u32 s0, s0, s17 275; GCN-NEXT: s_addc_u32 s1, s1, 0 276; GCN-NEXT: s_mov_b32 s13, s15 277; GCN-NEXT: s_mov_b32 s12, s14 278; GCN-NEXT: s_getpc_b64 s[14:15] 279; GCN-NEXT: s_add_u32 s14, s14, gv.fptr1@rel32@lo+4 280; GCN-NEXT: s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12 281; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2 282; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 283; GCN-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 284; GCN-NEXT: s_add_u32 s8, s8, 8 285; GCN-NEXT: s_addc_u32 s9, s9, 0 286; GCN-NEXT: v_or_b32_e32 v0, v0, v1 287; GCN-NEXT: v_or_b32_e32 v31, v0, v2 288; GCN-NEXT: v_mov_b32_e32 v0, 0x7b 289; GCN-NEXT: s_mov_b32 s14, s16 290; GCN-NEXT: s_waitcnt lgkmcnt(0) 291; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19] 292; GCN-NEXT: s_endpgm 293; 294; GISEL-LABEL: test_indirect_call_sgpr_ptr_arg: 295; GISEL: .amd_kernel_code_t 296; GISEL-NEXT: amd_code_version_major = 1 297; GISEL-NEXT: amd_code_version_minor = 2 298; GISEL-NEXT: amd_machine_kind = 1 299; GISEL-NEXT: amd_machine_version_major = 7 300; GISEL-NEXT: amd_machine_version_minor = 0 301; GISEL-NEXT: amd_machine_version_stepping = 0 302; GISEL-NEXT: kernel_code_entry_byte_offset = 256 303; GISEL-NEXT: kernel_code_prefetch_byte_size = 0 304; GISEL-NEXT: granulated_workitem_vgpr_count = 7 305; GISEL-NEXT: granulated_wavefront_sgpr_count = 4 306; GISEL-NEXT: priority = 0 307; GISEL-NEXT: float_mode = 240 308; GISEL-NEXT: priv = 0 309; GISEL-NEXT: enable_dx10_clamp = 1 310; GISEL-NEXT: debug_mode = 0 311; GISEL-NEXT: enable_ieee_mode = 1 312; GISEL-NEXT: enable_wgp_mode = 0 313; GISEL-NEXT: enable_mem_ordered = 0 314; GISEL-NEXT: enable_fwd_progress = 0 315; GISEL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1 316; GISEL-NEXT: user_sgpr_count = 14 317; GISEL-NEXT: enable_trap_handler = 0 318; GISEL-NEXT: enable_sgpr_workgroup_id_x = 1 319; GISEL-NEXT: enable_sgpr_workgroup_id_y = 1 320; GISEL-NEXT: enable_sgpr_workgroup_id_z = 1 321; GISEL-NEXT: enable_sgpr_workgroup_info = 0 322; GISEL-NEXT: enable_vgpr_workitem_id = 2 323; GISEL-NEXT: enable_exception_msb = 0 324; GISEL-NEXT: granulated_lds_size = 0 325; GISEL-NEXT: enable_exception = 0 326; GISEL-NEXT: enable_sgpr_private_segment_buffer = 1 327; GISEL-NEXT: enable_sgpr_dispatch_ptr = 1 328; GISEL-NEXT: enable_sgpr_queue_ptr = 1 329; GISEL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 330; GISEL-NEXT: enable_sgpr_dispatch_id = 1 331; GISEL-NEXT: enable_sgpr_flat_scratch_init = 1 332; GISEL-NEXT: enable_sgpr_private_segment_size = 0 333; GISEL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 334; GISEL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 335; GISEL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 336; GISEL-NEXT: enable_wavefront_size32 = 0 337; GISEL-NEXT: enable_ordered_append_gds = 0 338; GISEL-NEXT: private_element_size = 1 339; GISEL-NEXT: is_ptr64 = 1 340; GISEL-NEXT: is_dynamic_callstack = 1 341; GISEL-NEXT: is_debug_enabled = 0 342; GISEL-NEXT: is_xnack_enabled = 0 343; GISEL-NEXT: workitem_private_segment_byte_size = 16384 344; GISEL-NEXT: workgroup_group_segment_byte_size = 0 345; GISEL-NEXT: gds_segment_byte_size = 0 346; GISEL-NEXT: kernarg_segment_byte_size = 64 347; GISEL-NEXT: workgroup_fbarrier_count = 0 348; GISEL-NEXT: wavefront_sgpr_count = 37 349; GISEL-NEXT: workitem_vgpr_count = 32 350; GISEL-NEXT: reserved_vgpr_first = 0 351; GISEL-NEXT: reserved_vgpr_count = 0 352; GISEL-NEXT: reserved_sgpr_first = 0 353; GISEL-NEXT: reserved_sgpr_count = 0 354; GISEL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 355; GISEL-NEXT: debug_private_segment_buffer_sgpr = 0 356; GISEL-NEXT: kernarg_segment_alignment = 4 357; GISEL-NEXT: group_segment_alignment = 4 358; GISEL-NEXT: private_segment_alignment = 4 359; GISEL-NEXT: wavefront_size = 6 360; GISEL-NEXT: call_convention = -1 361; GISEL-NEXT: runtime_loader_kernel_symbol = 0 362; GISEL-NEXT: .end_amd_kernel_code_t 363; GISEL-NEXT: ; %bb.0: 364; GISEL-NEXT: s_mov_b32 s32, 0 365; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 366; GISEL-NEXT: s_add_i32 s12, s12, s17 367; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 368; GISEL-NEXT: s_add_u32 s0, s0, s17 369; GISEL-NEXT: s_addc_u32 s1, s1, 0 370; GISEL-NEXT: s_mov_b32 s13, s15 371; GISEL-NEXT: s_mov_b32 s12, s14 372; GISEL-NEXT: s_getpc_b64 s[14:15] 373; GISEL-NEXT: s_add_u32 s14, s14, gv.fptr1@rel32@lo+4 374; GISEL-NEXT: s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12 375; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 376; GISEL-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 377; GISEL-NEXT: s_add_u32 s8, s8, 8 378; GISEL-NEXT: v_or_b32_e32 v0, v0, v1 379; GISEL-NEXT: s_addc_u32 s9, s9, 0 380; GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2 381; GISEL-NEXT: v_or_b32_e32 v31, v0, v1 382; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b 383; GISEL-NEXT: s_mov_b32 s14, s16 384; GISEL-NEXT: s_waitcnt lgkmcnt(0) 385; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19] 386; GISEL-NEXT: s_endpgm 387 %fptr = load void(i32)*, void(i32)* addrspace(4)* @gv.fptr1 388 call void %fptr(i32 123) 389 ret void 390} 391 392define void @test_indirect_call_vgpr_ptr(void()* %fptr) { 393; GCN-LABEL: test_indirect_call_vgpr_ptr: 394; GCN: ; %bb.0: 395; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 396; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 397; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 398; GCN-NEXT: s_mov_b64 exec, s[16:17] 399; GCN-NEXT: v_writelane_b32 v40, s33, 17 400; GCN-NEXT: s_mov_b32 s33, s32 401; GCN-NEXT: s_addk_i32 s32, 0x400 402; GCN-NEXT: v_writelane_b32 v40, s34, 0 403; GCN-NEXT: v_writelane_b32 v40, s35, 1 404; GCN-NEXT: v_writelane_b32 v40, s36, 2 405; GCN-NEXT: v_writelane_b32 v40, s37, 3 406; GCN-NEXT: v_writelane_b32 v40, s38, 4 407; GCN-NEXT: v_writelane_b32 v40, s39, 5 408; GCN-NEXT: v_writelane_b32 v40, s40, 6 409; GCN-NEXT: v_writelane_b32 v40, s41, 7 410; GCN-NEXT: v_writelane_b32 v40, s42, 8 411; GCN-NEXT: v_writelane_b32 v40, s43, 9 412; GCN-NEXT: v_writelane_b32 v40, s44, 10 413; GCN-NEXT: v_writelane_b32 v40, s46, 11 414; GCN-NEXT: v_writelane_b32 v40, s47, 12 415; GCN-NEXT: v_writelane_b32 v40, s48, 13 416; GCN-NEXT: v_writelane_b32 v40, s49, 14 417; GCN-NEXT: v_writelane_b32 v40, s30, 15 418; GCN-NEXT: v_writelane_b32 v40, s31, 16 419; GCN-NEXT: s_mov_b32 s42, s14 420; GCN-NEXT: s_mov_b32 s43, s13 421; GCN-NEXT: s_mov_b32 s44, s12 422; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] 423; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] 424; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] 425; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] 426; GCN-NEXT: s_mov_b64 s[46:47], exec 427; GCN-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 428; GCN-NEXT: v_readfirstlane_b32 s16, v0 429; GCN-NEXT: v_readfirstlane_b32 s17, v1 430; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] 431; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc 432; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] 433; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] 434; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] 435; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] 436; GCN-NEXT: s_mov_b32 s12, s44 437; GCN-NEXT: s_mov_b32 s13, s43 438; GCN-NEXT: s_mov_b32 s14, s42 439; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] 440; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 441; GCN-NEXT: ; implicit-def: $vgpr31 442; GCN-NEXT: s_xor_b64 exec, exec, s[48:49] 443; GCN-NEXT: s_cbranch_execnz .LBB2_1 444; GCN-NEXT: ; %bb.2: 445; GCN-NEXT: s_mov_b64 exec, s[46:47] 446; GCN-NEXT: v_readlane_b32 s4, v40, 15 447; GCN-NEXT: v_readlane_b32 s5, v40, 16 448; GCN-NEXT: v_readlane_b32 s49, v40, 14 449; GCN-NEXT: v_readlane_b32 s48, v40, 13 450; GCN-NEXT: v_readlane_b32 s47, v40, 12 451; GCN-NEXT: v_readlane_b32 s46, v40, 11 452; GCN-NEXT: v_readlane_b32 s44, v40, 10 453; GCN-NEXT: v_readlane_b32 s43, v40, 9 454; GCN-NEXT: v_readlane_b32 s42, v40, 8 455; GCN-NEXT: v_readlane_b32 s41, v40, 7 456; GCN-NEXT: v_readlane_b32 s40, v40, 6 457; GCN-NEXT: v_readlane_b32 s39, v40, 5 458; GCN-NEXT: v_readlane_b32 s38, v40, 4 459; GCN-NEXT: v_readlane_b32 s37, v40, 3 460; GCN-NEXT: v_readlane_b32 s36, v40, 2 461; GCN-NEXT: v_readlane_b32 s35, v40, 1 462; GCN-NEXT: v_readlane_b32 s34, v40, 0 463; GCN-NEXT: s_addk_i32 s32, 0xfc00 464; GCN-NEXT: v_readlane_b32 s33, v40, 17 465; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 466; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 467; GCN-NEXT: s_mov_b64 exec, s[6:7] 468; GCN-NEXT: s_waitcnt vmcnt(0) 469; GCN-NEXT: s_setpc_b64 s[4:5] 470; 471; GISEL-LABEL: test_indirect_call_vgpr_ptr: 472; GISEL: ; %bb.0: 473; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 474; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1 475; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 476; GISEL-NEXT: s_mov_b64 exec, s[16:17] 477; GISEL-NEXT: v_writelane_b32 v40, s33, 17 478; GISEL-NEXT: s_mov_b32 s33, s32 479; GISEL-NEXT: s_addk_i32 s32, 0x400 480; GISEL-NEXT: v_writelane_b32 v40, s34, 0 481; GISEL-NEXT: v_writelane_b32 v40, s35, 1 482; GISEL-NEXT: v_writelane_b32 v40, s36, 2 483; GISEL-NEXT: v_writelane_b32 v40, s37, 3 484; GISEL-NEXT: v_writelane_b32 v40, s38, 4 485; GISEL-NEXT: v_writelane_b32 v40, s39, 5 486; GISEL-NEXT: v_writelane_b32 v40, s40, 6 487; GISEL-NEXT: v_writelane_b32 v40, s41, 7 488; GISEL-NEXT: v_writelane_b32 v40, s42, 8 489; GISEL-NEXT: v_writelane_b32 v40, s43, 9 490; GISEL-NEXT: v_writelane_b32 v40, s44, 10 491; GISEL-NEXT: v_writelane_b32 v40, s46, 11 492; GISEL-NEXT: v_writelane_b32 v40, s47, 12 493; GISEL-NEXT: v_writelane_b32 v40, s48, 13 494; GISEL-NEXT: v_writelane_b32 v40, s49, 14 495; GISEL-NEXT: s_mov_b32 s42, s14 496; GISEL-NEXT: s_mov_b32 s43, s13 497; GISEL-NEXT: s_mov_b32 s44, s12 498; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] 499; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] 500; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] 501; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] 502; GISEL-NEXT: v_writelane_b32 v40, s30, 15 503; GISEL-NEXT: v_writelane_b32 v40, s31, 16 504; GISEL-NEXT: s_mov_b64 s[46:47], exec 505; GISEL-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 506; GISEL-NEXT: v_readfirstlane_b32 s16, v0 507; GISEL-NEXT: v_readfirstlane_b32 s17, v1 508; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] 509; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc 510; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41] 511; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] 512; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] 513; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] 514; GISEL-NEXT: s_mov_b32 s12, s44 515; GISEL-NEXT: s_mov_b32 s13, s43 516; GISEL-NEXT: s_mov_b32 s14, s42 517; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] 518; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1 519; GISEL-NEXT: ; implicit-def: $vgpr31 520; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49] 521; GISEL-NEXT: s_cbranch_execnz .LBB2_1 522; GISEL-NEXT: ; %bb.2: 523; GISEL-NEXT: s_mov_b64 exec, s[46:47] 524; GISEL-NEXT: v_readlane_b32 s4, v40, 15 525; GISEL-NEXT: v_readlane_b32 s5, v40, 16 526; GISEL-NEXT: v_readlane_b32 s49, v40, 14 527; GISEL-NEXT: v_readlane_b32 s48, v40, 13 528; GISEL-NEXT: v_readlane_b32 s47, v40, 12 529; GISEL-NEXT: v_readlane_b32 s46, v40, 11 530; GISEL-NEXT: v_readlane_b32 s44, v40, 10 531; GISEL-NEXT: v_readlane_b32 s43, v40, 9 532; GISEL-NEXT: v_readlane_b32 s42, v40, 8 533; GISEL-NEXT: v_readlane_b32 s41, v40, 7 534; GISEL-NEXT: v_readlane_b32 s40, v40, 6 535; GISEL-NEXT: v_readlane_b32 s39, v40, 5 536; GISEL-NEXT: v_readlane_b32 s38, v40, 4 537; GISEL-NEXT: v_readlane_b32 s37, v40, 3 538; GISEL-NEXT: v_readlane_b32 s36, v40, 2 539; GISEL-NEXT: v_readlane_b32 s35, v40, 1 540; GISEL-NEXT: v_readlane_b32 s34, v40, 0 541; GISEL-NEXT: s_addk_i32 s32, 0xfc00 542; GISEL-NEXT: v_readlane_b32 s33, v40, 17 543; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 544; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 545; GISEL-NEXT: s_mov_b64 exec, s[6:7] 546; GISEL-NEXT: s_waitcnt vmcnt(0) 547; GISEL-NEXT: s_setpc_b64 s[4:5] 548 call void %fptr() 549 ret void 550} 551 552define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) { 553; GCN-LABEL: test_indirect_call_vgpr_ptr_arg: 554; GCN: ; %bb.0: 555; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 556; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 557; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 558; GCN-NEXT: s_mov_b64 exec, s[16:17] 559; GCN-NEXT: v_writelane_b32 v40, s33, 17 560; GCN-NEXT: s_mov_b32 s33, s32 561; GCN-NEXT: s_addk_i32 s32, 0x400 562; GCN-NEXT: v_writelane_b32 v40, s34, 0 563; GCN-NEXT: v_writelane_b32 v40, s35, 1 564; GCN-NEXT: v_writelane_b32 v40, s36, 2 565; GCN-NEXT: v_writelane_b32 v40, s37, 3 566; GCN-NEXT: v_writelane_b32 v40, s38, 4 567; GCN-NEXT: v_writelane_b32 v40, s39, 5 568; GCN-NEXT: v_writelane_b32 v40, s40, 6 569; GCN-NEXT: v_writelane_b32 v40, s41, 7 570; GCN-NEXT: v_writelane_b32 v40, s42, 8 571; GCN-NEXT: v_writelane_b32 v40, s43, 9 572; GCN-NEXT: v_writelane_b32 v40, s44, 10 573; GCN-NEXT: v_writelane_b32 v40, s46, 11 574; GCN-NEXT: v_writelane_b32 v40, s47, 12 575; GCN-NEXT: v_writelane_b32 v40, s48, 13 576; GCN-NEXT: v_writelane_b32 v40, s49, 14 577; GCN-NEXT: v_writelane_b32 v40, s30, 15 578; GCN-NEXT: v_writelane_b32 v40, s31, 16 579; GCN-NEXT: s_mov_b32 s42, s14 580; GCN-NEXT: s_mov_b32 s43, s13 581; GCN-NEXT: s_mov_b32 s44, s12 582; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] 583; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] 584; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] 585; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] 586; GCN-NEXT: s_mov_b64 s[46:47], exec 587; GCN-NEXT: v_mov_b32_e32 v2, 0x7b 588; GCN-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 589; GCN-NEXT: v_readfirstlane_b32 s16, v0 590; GCN-NEXT: v_readfirstlane_b32 s17, v1 591; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] 592; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc 593; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] 594; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] 595; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] 596; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] 597; GCN-NEXT: s_mov_b32 s12, s44 598; GCN-NEXT: s_mov_b32 s13, s43 599; GCN-NEXT: s_mov_b32 s14, s42 600; GCN-NEXT: v_mov_b32_e32 v0, v2 601; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] 602; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 603; GCN-NEXT: ; implicit-def: $vgpr31 604; GCN-NEXT: ; implicit-def: $vgpr2 605; GCN-NEXT: s_xor_b64 exec, exec, s[48:49] 606; GCN-NEXT: s_cbranch_execnz .LBB3_1 607; GCN-NEXT: ; %bb.2: 608; GCN-NEXT: s_mov_b64 exec, s[46:47] 609; GCN-NEXT: v_readlane_b32 s4, v40, 15 610; GCN-NEXT: v_readlane_b32 s5, v40, 16 611; GCN-NEXT: v_readlane_b32 s49, v40, 14 612; GCN-NEXT: v_readlane_b32 s48, v40, 13 613; GCN-NEXT: v_readlane_b32 s47, v40, 12 614; GCN-NEXT: v_readlane_b32 s46, v40, 11 615; GCN-NEXT: v_readlane_b32 s44, v40, 10 616; GCN-NEXT: v_readlane_b32 s43, v40, 9 617; GCN-NEXT: v_readlane_b32 s42, v40, 8 618; GCN-NEXT: v_readlane_b32 s41, v40, 7 619; GCN-NEXT: v_readlane_b32 s40, v40, 6 620; GCN-NEXT: v_readlane_b32 s39, v40, 5 621; GCN-NEXT: v_readlane_b32 s38, v40, 4 622; GCN-NEXT: v_readlane_b32 s37, v40, 3 623; GCN-NEXT: v_readlane_b32 s36, v40, 2 624; GCN-NEXT: v_readlane_b32 s35, v40, 1 625; GCN-NEXT: v_readlane_b32 s34, v40, 0 626; GCN-NEXT: s_addk_i32 s32, 0xfc00 627; GCN-NEXT: v_readlane_b32 s33, v40, 17 628; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 629; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 630; GCN-NEXT: s_mov_b64 exec, s[6:7] 631; GCN-NEXT: s_waitcnt vmcnt(0) 632; GCN-NEXT: s_setpc_b64 s[4:5] 633; 634; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg: 635; GISEL: ; %bb.0: 636; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 637; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1 638; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 639; GISEL-NEXT: s_mov_b64 exec, s[16:17] 640; GISEL-NEXT: v_writelane_b32 v40, s33, 17 641; GISEL-NEXT: s_mov_b32 s33, s32 642; GISEL-NEXT: s_addk_i32 s32, 0x400 643; GISEL-NEXT: v_writelane_b32 v40, s34, 0 644; GISEL-NEXT: v_writelane_b32 v40, s35, 1 645; GISEL-NEXT: v_writelane_b32 v40, s36, 2 646; GISEL-NEXT: v_writelane_b32 v40, s37, 3 647; GISEL-NEXT: v_writelane_b32 v40, s38, 4 648; GISEL-NEXT: v_writelane_b32 v40, s39, 5 649; GISEL-NEXT: v_writelane_b32 v40, s40, 6 650; GISEL-NEXT: v_writelane_b32 v40, s41, 7 651; GISEL-NEXT: v_writelane_b32 v40, s42, 8 652; GISEL-NEXT: v_writelane_b32 v40, s43, 9 653; GISEL-NEXT: v_writelane_b32 v40, s44, 10 654; GISEL-NEXT: v_writelane_b32 v40, s46, 11 655; GISEL-NEXT: v_writelane_b32 v40, s47, 12 656; GISEL-NEXT: v_writelane_b32 v40, s48, 13 657; GISEL-NEXT: v_writelane_b32 v40, s49, 14 658; GISEL-NEXT: s_mov_b32 s42, s14 659; GISEL-NEXT: s_mov_b32 s43, s13 660; GISEL-NEXT: s_mov_b32 s44, s12 661; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] 662; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] 663; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] 664; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] 665; GISEL-NEXT: v_writelane_b32 v40, s30, 15 666; GISEL-NEXT: v_writelane_b32 v40, s31, 16 667; GISEL-NEXT: s_mov_b64 s[46:47], exec 668; GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 669; GISEL-NEXT: v_readfirstlane_b32 s16, v0 670; GISEL-NEXT: v_readfirstlane_b32 s17, v1 671; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] 672; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc 673; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b 674; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41] 675; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] 676; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] 677; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] 678; GISEL-NEXT: s_mov_b32 s12, s44 679; GISEL-NEXT: s_mov_b32 s13, s43 680; GISEL-NEXT: s_mov_b32 s14, s42 681; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] 682; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1 683; GISEL-NEXT: ; implicit-def: $vgpr31 684; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49] 685; GISEL-NEXT: s_cbranch_execnz .LBB3_1 686; GISEL-NEXT: ; %bb.2: 687; GISEL-NEXT: s_mov_b64 exec, s[46:47] 688; GISEL-NEXT: v_readlane_b32 s4, v40, 15 689; GISEL-NEXT: v_readlane_b32 s5, v40, 16 690; GISEL-NEXT: v_readlane_b32 s49, v40, 14 691; GISEL-NEXT: v_readlane_b32 s48, v40, 13 692; GISEL-NEXT: v_readlane_b32 s47, v40, 12 693; GISEL-NEXT: v_readlane_b32 s46, v40, 11 694; GISEL-NEXT: v_readlane_b32 s44, v40, 10 695; GISEL-NEXT: v_readlane_b32 s43, v40, 9 696; GISEL-NEXT: v_readlane_b32 s42, v40, 8 697; GISEL-NEXT: v_readlane_b32 s41, v40, 7 698; GISEL-NEXT: v_readlane_b32 s40, v40, 6 699; GISEL-NEXT: v_readlane_b32 s39, v40, 5 700; GISEL-NEXT: v_readlane_b32 s38, v40, 4 701; GISEL-NEXT: v_readlane_b32 s37, v40, 3 702; GISEL-NEXT: v_readlane_b32 s36, v40, 2 703; GISEL-NEXT: v_readlane_b32 s35, v40, 1 704; GISEL-NEXT: v_readlane_b32 s34, v40, 0 705; GISEL-NEXT: s_addk_i32 s32, 0xfc00 706; GISEL-NEXT: v_readlane_b32 s33, v40, 17 707; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 708; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 709; GISEL-NEXT: s_mov_b64 exec, s[6:7] 710; GISEL-NEXT: s_waitcnt vmcnt(0) 711; GISEL-NEXT: s_setpc_b64 s[4:5] 712 call void %fptr(i32 123) 713 ret void 714} 715 716define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) { 717; GCN-LABEL: test_indirect_call_vgpr_ptr_ret: 718; GCN: ; %bb.0: 719; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 720; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 721; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 722; GCN-NEXT: s_mov_b64 exec, s[16:17] 723; GCN-NEXT: v_writelane_b32 v40, s33, 17 724; GCN-NEXT: s_mov_b32 s33, s32 725; GCN-NEXT: s_addk_i32 s32, 0x400 726; GCN-NEXT: v_writelane_b32 v40, s34, 0 727; GCN-NEXT: v_writelane_b32 v40, s35, 1 728; GCN-NEXT: v_writelane_b32 v40, s36, 2 729; GCN-NEXT: v_writelane_b32 v40, s37, 3 730; GCN-NEXT: v_writelane_b32 v40, s38, 4 731; GCN-NEXT: v_writelane_b32 v40, s39, 5 732; GCN-NEXT: v_writelane_b32 v40, s40, 6 733; GCN-NEXT: v_writelane_b32 v40, s41, 7 734; GCN-NEXT: v_writelane_b32 v40, s42, 8 735; GCN-NEXT: v_writelane_b32 v40, s43, 9 736; GCN-NEXT: v_writelane_b32 v40, s44, 10 737; GCN-NEXT: v_writelane_b32 v40, s46, 11 738; GCN-NEXT: v_writelane_b32 v40, s47, 12 739; GCN-NEXT: v_writelane_b32 v40, s48, 13 740; GCN-NEXT: v_writelane_b32 v40, s49, 14 741; GCN-NEXT: v_writelane_b32 v40, s30, 15 742; GCN-NEXT: v_writelane_b32 v40, s31, 16 743; GCN-NEXT: s_mov_b32 s42, s14 744; GCN-NEXT: s_mov_b32 s43, s13 745; GCN-NEXT: s_mov_b32 s44, s12 746; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] 747; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] 748; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] 749; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] 750; GCN-NEXT: s_mov_b64 s[46:47], exec 751; GCN-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 752; GCN-NEXT: v_readfirstlane_b32 s16, v0 753; GCN-NEXT: v_readfirstlane_b32 s17, v1 754; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] 755; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc 756; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] 757; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] 758; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] 759; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] 760; GCN-NEXT: s_mov_b32 s12, s44 761; GCN-NEXT: s_mov_b32 s13, s43 762; GCN-NEXT: s_mov_b32 s14, s42 763; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] 764; GCN-NEXT: v_mov_b32_e32 v2, v0 765; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 766; GCN-NEXT: ; implicit-def: $vgpr31 767; GCN-NEXT: s_xor_b64 exec, exec, s[48:49] 768; GCN-NEXT: s_cbranch_execnz .LBB4_1 769; GCN-NEXT: ; %bb.2: 770; GCN-NEXT: s_mov_b64 exec, s[46:47] 771; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v2 772; GCN-NEXT: v_readlane_b32 s4, v40, 15 773; GCN-NEXT: v_readlane_b32 s5, v40, 16 774; GCN-NEXT: v_readlane_b32 s49, v40, 14 775; GCN-NEXT: v_readlane_b32 s48, v40, 13 776; GCN-NEXT: v_readlane_b32 s47, v40, 12 777; GCN-NEXT: v_readlane_b32 s46, v40, 11 778; GCN-NEXT: v_readlane_b32 s44, v40, 10 779; GCN-NEXT: v_readlane_b32 s43, v40, 9 780; GCN-NEXT: v_readlane_b32 s42, v40, 8 781; GCN-NEXT: v_readlane_b32 s41, v40, 7 782; GCN-NEXT: v_readlane_b32 s40, v40, 6 783; GCN-NEXT: v_readlane_b32 s39, v40, 5 784; GCN-NEXT: v_readlane_b32 s38, v40, 4 785; GCN-NEXT: v_readlane_b32 s37, v40, 3 786; GCN-NEXT: v_readlane_b32 s36, v40, 2 787; GCN-NEXT: v_readlane_b32 s35, v40, 1 788; GCN-NEXT: v_readlane_b32 s34, v40, 0 789; GCN-NEXT: s_addk_i32 s32, 0xfc00 790; GCN-NEXT: v_readlane_b32 s33, v40, 17 791; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 792; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 793; GCN-NEXT: s_mov_b64 exec, s[6:7] 794; GCN-NEXT: s_waitcnt vmcnt(0) 795; GCN-NEXT: s_setpc_b64 s[4:5] 796; 797; GISEL-LABEL: test_indirect_call_vgpr_ptr_ret: 798; GISEL: ; %bb.0: 799; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 800; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1 801; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 802; GISEL-NEXT: s_mov_b64 exec, s[16:17] 803; GISEL-NEXT: v_writelane_b32 v40, s33, 17 804; GISEL-NEXT: s_mov_b32 s33, s32 805; GISEL-NEXT: s_addk_i32 s32, 0x400 806; GISEL-NEXT: v_writelane_b32 v40, s34, 0 807; GISEL-NEXT: v_writelane_b32 v40, s35, 1 808; GISEL-NEXT: v_writelane_b32 v40, s36, 2 809; GISEL-NEXT: v_writelane_b32 v40, s37, 3 810; GISEL-NEXT: v_writelane_b32 v40, s38, 4 811; GISEL-NEXT: v_writelane_b32 v40, s39, 5 812; GISEL-NEXT: v_writelane_b32 v40, s40, 6 813; GISEL-NEXT: v_writelane_b32 v40, s41, 7 814; GISEL-NEXT: v_writelane_b32 v40, s42, 8 815; GISEL-NEXT: v_writelane_b32 v40, s43, 9 816; GISEL-NEXT: v_writelane_b32 v40, s44, 10 817; GISEL-NEXT: v_writelane_b32 v40, s46, 11 818; GISEL-NEXT: v_writelane_b32 v40, s47, 12 819; GISEL-NEXT: v_writelane_b32 v40, s48, 13 820; GISEL-NEXT: v_writelane_b32 v40, s49, 14 821; GISEL-NEXT: s_mov_b32 s42, s14 822; GISEL-NEXT: s_mov_b32 s43, s13 823; GISEL-NEXT: s_mov_b32 s44, s12 824; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] 825; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] 826; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] 827; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] 828; GISEL-NEXT: v_writelane_b32 v40, s30, 15 829; GISEL-NEXT: v_writelane_b32 v40, s31, 16 830; GISEL-NEXT: s_mov_b64 s[46:47], exec 831; GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 832; GISEL-NEXT: v_readfirstlane_b32 s16, v0 833; GISEL-NEXT: v_readfirstlane_b32 s17, v1 834; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] 835; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc 836; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41] 837; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] 838; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] 839; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] 840; GISEL-NEXT: s_mov_b32 s12, s44 841; GISEL-NEXT: s_mov_b32 s13, s43 842; GISEL-NEXT: s_mov_b32 s14, s42 843; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] 844; GISEL-NEXT: v_mov_b32_e32 v2, v0 845; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1 846; GISEL-NEXT: ; implicit-def: $vgpr31 847; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49] 848; GISEL-NEXT: s_cbranch_execnz .LBB4_1 849; GISEL-NEXT: ; %bb.2: 850; GISEL-NEXT: s_mov_b64 exec, s[46:47] 851; GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v2 852; GISEL-NEXT: v_readlane_b32 s4, v40, 15 853; GISEL-NEXT: v_readlane_b32 s5, v40, 16 854; GISEL-NEXT: v_readlane_b32 s49, v40, 14 855; GISEL-NEXT: v_readlane_b32 s48, v40, 13 856; GISEL-NEXT: v_readlane_b32 s47, v40, 12 857; GISEL-NEXT: v_readlane_b32 s46, v40, 11 858; GISEL-NEXT: v_readlane_b32 s44, v40, 10 859; GISEL-NEXT: v_readlane_b32 s43, v40, 9 860; GISEL-NEXT: v_readlane_b32 s42, v40, 8 861; GISEL-NEXT: v_readlane_b32 s41, v40, 7 862; GISEL-NEXT: v_readlane_b32 s40, v40, 6 863; GISEL-NEXT: v_readlane_b32 s39, v40, 5 864; GISEL-NEXT: v_readlane_b32 s38, v40, 4 865; GISEL-NEXT: v_readlane_b32 s37, v40, 3 866; GISEL-NEXT: v_readlane_b32 s36, v40, 2 867; GISEL-NEXT: v_readlane_b32 s35, v40, 1 868; GISEL-NEXT: v_readlane_b32 s34, v40, 0 869; GISEL-NEXT: s_addk_i32 s32, 0xfc00 870; GISEL-NEXT: v_readlane_b32 s33, v40, 17 871; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 872; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 873; GISEL-NEXT: s_mov_b64 exec, s[6:7] 874; GISEL-NEXT: s_waitcnt vmcnt(0) 875; GISEL-NEXT: s_setpc_b64 s[4:5] 876 %a = call i32 %fptr() 877 %b = add i32 %a, 1 878 ret i32 %b 879} 880 881define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) { 882; GCN-LABEL: test_indirect_call_vgpr_ptr_in_branch: 883; GCN: ; %bb.0: ; %bb0 884; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 885; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 886; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 887; GCN-NEXT: s_mov_b64 exec, s[16:17] 888; GCN-NEXT: v_writelane_b32 v40, s33, 19 889; GCN-NEXT: s_mov_b32 s33, s32 890; GCN-NEXT: s_addk_i32 s32, 0x400 891; GCN-NEXT: v_writelane_b32 v40, s34, 0 892; GCN-NEXT: v_writelane_b32 v40, s35, 1 893; GCN-NEXT: v_writelane_b32 v40, s36, 2 894; GCN-NEXT: v_writelane_b32 v40, s37, 3 895; GCN-NEXT: v_writelane_b32 v40, s38, 4 896; GCN-NEXT: v_writelane_b32 v40, s39, 5 897; GCN-NEXT: v_writelane_b32 v40, s40, 6 898; GCN-NEXT: v_writelane_b32 v40, s41, 7 899; GCN-NEXT: v_writelane_b32 v40, s42, 8 900; GCN-NEXT: v_writelane_b32 v40, s43, 9 901; GCN-NEXT: v_writelane_b32 v40, s44, 10 902; GCN-NEXT: v_writelane_b32 v40, s46, 11 903; GCN-NEXT: v_writelane_b32 v40, s47, 12 904; GCN-NEXT: v_writelane_b32 v40, s48, 13 905; GCN-NEXT: v_writelane_b32 v40, s49, 14 906; GCN-NEXT: v_writelane_b32 v40, s50, 15 907; GCN-NEXT: v_writelane_b32 v40, s51, 16 908; GCN-NEXT: s_mov_b32 s42, s14 909; GCN-NEXT: s_mov_b32 s43, s13 910; GCN-NEXT: s_mov_b32 s44, s12 911; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] 912; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] 913; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] 914; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] 915; GCN-NEXT: v_and_b32_e32 v2, 1, v2 916; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 917; GCN-NEXT: s_and_saveexec_b64 s[46:47], vcc 918; GCN-NEXT: s_cbranch_execz .LBB5_4 919; GCN-NEXT: ; %bb.1: ; %bb1 920; GCN-NEXT: v_writelane_b32 v40, s30, 17 921; GCN-NEXT: v_writelane_b32 v40, s31, 18 922; GCN-NEXT: s_mov_b64 s[48:49], exec 923; GCN-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1 924; GCN-NEXT: v_readfirstlane_b32 s16, v0 925; GCN-NEXT: v_readfirstlane_b32 s17, v1 926; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] 927; GCN-NEXT: s_and_saveexec_b64 s[50:51], vcc 928; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] 929; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] 930; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] 931; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] 932; GCN-NEXT: s_mov_b32 s12, s44 933; GCN-NEXT: s_mov_b32 s13, s43 934; GCN-NEXT: s_mov_b32 s14, s42 935; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] 936; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 937; GCN-NEXT: ; implicit-def: $vgpr31 938; GCN-NEXT: s_xor_b64 exec, exec, s[50:51] 939; GCN-NEXT: s_cbranch_execnz .LBB5_2 940; GCN-NEXT: ; %bb.3: 941; GCN-NEXT: s_mov_b64 exec, s[48:49] 942; GCN-NEXT: v_readlane_b32 s30, v40, 17 943; GCN-NEXT: v_readlane_b32 s31, v40, 18 944; GCN-NEXT: .LBB5_4: ; %bb2 945; GCN-NEXT: s_or_b64 exec, exec, s[46:47] 946; GCN-NEXT: v_readlane_b32 s51, v40, 16 947; GCN-NEXT: v_readlane_b32 s50, v40, 15 948; GCN-NEXT: v_readlane_b32 s49, v40, 14 949; GCN-NEXT: v_readlane_b32 s48, v40, 13 950; GCN-NEXT: v_readlane_b32 s47, v40, 12 951; GCN-NEXT: v_readlane_b32 s46, v40, 11 952; GCN-NEXT: v_readlane_b32 s44, v40, 10 953; GCN-NEXT: v_readlane_b32 s43, v40, 9 954; GCN-NEXT: v_readlane_b32 s42, v40, 8 955; GCN-NEXT: v_readlane_b32 s41, v40, 7 956; GCN-NEXT: v_readlane_b32 s40, v40, 6 957; GCN-NEXT: v_readlane_b32 s39, v40, 5 958; GCN-NEXT: v_readlane_b32 s38, v40, 4 959; GCN-NEXT: v_readlane_b32 s37, v40, 3 960; GCN-NEXT: v_readlane_b32 s36, v40, 2 961; GCN-NEXT: v_readlane_b32 s35, v40, 1 962; GCN-NEXT: v_readlane_b32 s34, v40, 0 963; GCN-NEXT: s_addk_i32 s32, 0xfc00 964; GCN-NEXT: v_readlane_b32 s33, v40, 19 965; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 966; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 967; GCN-NEXT: s_mov_b64 exec, s[4:5] 968; GCN-NEXT: s_waitcnt vmcnt(0) 969; GCN-NEXT: s_setpc_b64 s[30:31] 970; 971; GISEL-LABEL: test_indirect_call_vgpr_ptr_in_branch: 972; GISEL: ; %bb.0: ; %bb0 973; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 974; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1 975; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 976; GISEL-NEXT: s_mov_b64 exec, s[16:17] 977; GISEL-NEXT: v_writelane_b32 v40, s33, 19 978; GISEL-NEXT: s_mov_b32 s33, s32 979; GISEL-NEXT: s_addk_i32 s32, 0x400 980; GISEL-NEXT: v_writelane_b32 v40, s34, 0 981; GISEL-NEXT: v_writelane_b32 v40, s35, 1 982; GISEL-NEXT: v_writelane_b32 v40, s36, 2 983; GISEL-NEXT: v_writelane_b32 v40, s37, 3 984; GISEL-NEXT: v_writelane_b32 v40, s38, 4 985; GISEL-NEXT: v_writelane_b32 v40, s39, 5 986; GISEL-NEXT: v_writelane_b32 v40, s40, 6 987; GISEL-NEXT: v_writelane_b32 v40, s41, 7 988; GISEL-NEXT: v_writelane_b32 v40, s42, 8 989; GISEL-NEXT: v_writelane_b32 v40, s43, 9 990; GISEL-NEXT: v_writelane_b32 v40, s44, 10 991; GISEL-NEXT: v_writelane_b32 v40, s46, 11 992; GISEL-NEXT: v_writelane_b32 v40, s47, 12 993; GISEL-NEXT: v_writelane_b32 v40, s48, 13 994; GISEL-NEXT: v_writelane_b32 v40, s49, 14 995; GISEL-NEXT: v_writelane_b32 v40, s50, 15 996; GISEL-NEXT: v_writelane_b32 v40, s51, 16 997; GISEL-NEXT: s_mov_b32 s42, s14 998; GISEL-NEXT: s_mov_b32 s43, s13 999; GISEL-NEXT: s_mov_b32 s44, s12 1000; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] 1001; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] 1002; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] 1003; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] 1004; GISEL-NEXT: v_and_b32_e32 v2, 1, v2 1005; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 1006; GISEL-NEXT: s_and_saveexec_b64 s[46:47], vcc 1007; GISEL-NEXT: s_cbranch_execz .LBB5_4 1008; GISEL-NEXT: ; %bb.1: ; %bb1 1009; GISEL-NEXT: v_writelane_b32 v40, s30, 17 1010; GISEL-NEXT: v_writelane_b32 v40, s31, 18 1011; GISEL-NEXT: s_mov_b64 s[48:49], exec 1012; GISEL-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1 1013; GISEL-NEXT: v_readfirstlane_b32 s16, v0 1014; GISEL-NEXT: v_readfirstlane_b32 s17, v1 1015; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] 1016; GISEL-NEXT: s_and_saveexec_b64 s[50:51], vcc 1017; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41] 1018; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] 1019; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] 1020; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] 1021; GISEL-NEXT: s_mov_b32 s12, s44 1022; GISEL-NEXT: s_mov_b32 s13, s43 1023; GISEL-NEXT: s_mov_b32 s14, s42 1024; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] 1025; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1 1026; GISEL-NEXT: ; implicit-def: $vgpr31 1027; GISEL-NEXT: s_xor_b64 exec, exec, s[50:51] 1028; GISEL-NEXT: s_cbranch_execnz .LBB5_2 1029; GISEL-NEXT: ; %bb.3: 1030; GISEL-NEXT: s_mov_b64 exec, s[48:49] 1031; GISEL-NEXT: v_readlane_b32 s30, v40, 17 1032; GISEL-NEXT: v_readlane_b32 s31, v40, 18 1033; GISEL-NEXT: .LBB5_4: ; %bb2 1034; GISEL-NEXT: s_or_b64 exec, exec, s[46:47] 1035; GISEL-NEXT: v_readlane_b32 s51, v40, 16 1036; GISEL-NEXT: v_readlane_b32 s50, v40, 15 1037; GISEL-NEXT: v_readlane_b32 s49, v40, 14 1038; GISEL-NEXT: v_readlane_b32 s48, v40, 13 1039; GISEL-NEXT: v_readlane_b32 s47, v40, 12 1040; GISEL-NEXT: v_readlane_b32 s46, v40, 11 1041; GISEL-NEXT: v_readlane_b32 s44, v40, 10 1042; GISEL-NEXT: v_readlane_b32 s43, v40, 9 1043; GISEL-NEXT: v_readlane_b32 s42, v40, 8 1044; GISEL-NEXT: v_readlane_b32 s41, v40, 7 1045; GISEL-NEXT: v_readlane_b32 s40, v40, 6 1046; GISEL-NEXT: v_readlane_b32 s39, v40, 5 1047; GISEL-NEXT: v_readlane_b32 s38, v40, 4 1048; GISEL-NEXT: v_readlane_b32 s37, v40, 3 1049; GISEL-NEXT: v_readlane_b32 s36, v40, 2 1050; GISEL-NEXT: v_readlane_b32 s35, v40, 1 1051; GISEL-NEXT: v_readlane_b32 s34, v40, 0 1052; GISEL-NEXT: s_addk_i32 s32, 0xfc00 1053; GISEL-NEXT: v_readlane_b32 s33, v40, 19 1054; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 1055; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1056; GISEL-NEXT: s_mov_b64 exec, s[4:5] 1057; GISEL-NEXT: s_waitcnt vmcnt(0) 1058; GISEL-NEXT: s_setpc_b64 s[30:31] 1059bb0: 1060 br i1 %cond, label %bb1, label %bb2 1061 1062bb1: 1063 call void %fptr() 1064 br label %bb2 1065 1066bb2: 1067 ret void 1068} 1069 1070define void @test_indirect_call_vgpr_ptr_inreg_arg(void(i32)* %fptr) { 1071; GCN-LABEL: test_indirect_call_vgpr_ptr_inreg_arg: 1072; GCN: ; %bb.0: 1073; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1074; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 1075; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1076; GCN-NEXT: s_mov_b64 exec, s[4:5] 1077; GCN-NEXT: v_writelane_b32 v40, s33, 30 1078; GCN-NEXT: s_mov_b32 s33, s32 1079; GCN-NEXT: s_addk_i32 s32, 0x400 1080; GCN-NEXT: v_writelane_b32 v40, s34, 0 1081; GCN-NEXT: v_writelane_b32 v40, s35, 1 1082; GCN-NEXT: v_writelane_b32 v40, s36, 2 1083; GCN-NEXT: v_writelane_b32 v40, s37, 3 1084; GCN-NEXT: v_writelane_b32 v40, s38, 4 1085; GCN-NEXT: v_writelane_b32 v40, s39, 5 1086; GCN-NEXT: v_writelane_b32 v40, s40, 6 1087; GCN-NEXT: v_writelane_b32 v40, s41, 7 1088; GCN-NEXT: v_writelane_b32 v40, s42, 8 1089; GCN-NEXT: v_writelane_b32 v40, s43, 9 1090; GCN-NEXT: v_writelane_b32 v40, s44, 10 1091; GCN-NEXT: v_writelane_b32 v40, s45, 11 1092; GCN-NEXT: v_writelane_b32 v40, s46, 12 1093; GCN-NEXT: v_writelane_b32 v40, s47, 13 1094; GCN-NEXT: v_writelane_b32 v40, s48, 14 1095; GCN-NEXT: v_writelane_b32 v40, s49, 15 1096; GCN-NEXT: v_writelane_b32 v40, s50, 16 1097; GCN-NEXT: v_writelane_b32 v40, s51, 17 1098; GCN-NEXT: v_writelane_b32 v40, s52, 18 1099; GCN-NEXT: v_writelane_b32 v40, s53, 19 1100; GCN-NEXT: v_writelane_b32 v40, s54, 20 1101; GCN-NEXT: v_writelane_b32 v40, s55, 21 1102; GCN-NEXT: v_writelane_b32 v40, s56, 22 1103; GCN-NEXT: v_writelane_b32 v40, s57, 23 1104; GCN-NEXT: v_writelane_b32 v40, s58, 24 1105; GCN-NEXT: v_writelane_b32 v40, s59, 25 1106; GCN-NEXT: v_writelane_b32 v40, s60, 26 1107; GCN-NEXT: v_writelane_b32 v40, s61, 27 1108; GCN-NEXT: v_writelane_b32 v40, s62, 28 1109; GCN-NEXT: v_writelane_b32 v40, s63, 29 1110; GCN-NEXT: s_mov_b64 s[6:7], s[30:31] 1111; GCN-NEXT: s_mov_b64 s[8:9], exec 1112; GCN-NEXT: s_movk_i32 s4, 0x7b 1113; GCN-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 1114; GCN-NEXT: v_readfirstlane_b32 s12, v0 1115; GCN-NEXT: v_readfirstlane_b32 s13, v1 1116; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[12:13], v[0:1] 1117; GCN-NEXT: s_and_saveexec_b64 s[10:11], vcc 1118; GCN-NEXT: s_swappc_b64 s[30:31], s[12:13] 1119; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 1120; GCN-NEXT: s_xor_b64 exec, exec, s[10:11] 1121; GCN-NEXT: s_cbranch_execnz .LBB6_1 1122; GCN-NEXT: ; %bb.2: 1123; GCN-NEXT: s_mov_b64 exec, s[8:9] 1124; GCN-NEXT: v_readlane_b32 s63, v40, 29 1125; GCN-NEXT: v_readlane_b32 s62, v40, 28 1126; GCN-NEXT: v_readlane_b32 s61, v40, 27 1127; GCN-NEXT: v_readlane_b32 s60, v40, 26 1128; GCN-NEXT: v_readlane_b32 s59, v40, 25 1129; GCN-NEXT: v_readlane_b32 s58, v40, 24 1130; GCN-NEXT: v_readlane_b32 s57, v40, 23 1131; GCN-NEXT: v_readlane_b32 s56, v40, 22 1132; GCN-NEXT: v_readlane_b32 s55, v40, 21 1133; GCN-NEXT: v_readlane_b32 s54, v40, 20 1134; GCN-NEXT: v_readlane_b32 s53, v40, 19 1135; GCN-NEXT: v_readlane_b32 s52, v40, 18 1136; GCN-NEXT: v_readlane_b32 s51, v40, 17 1137; GCN-NEXT: v_readlane_b32 s50, v40, 16 1138; GCN-NEXT: v_readlane_b32 s49, v40, 15 1139; GCN-NEXT: v_readlane_b32 s48, v40, 14 1140; GCN-NEXT: v_readlane_b32 s47, v40, 13 1141; GCN-NEXT: v_readlane_b32 s46, v40, 12 1142; GCN-NEXT: v_readlane_b32 s45, v40, 11 1143; GCN-NEXT: v_readlane_b32 s44, v40, 10 1144; GCN-NEXT: v_readlane_b32 s43, v40, 9 1145; GCN-NEXT: v_readlane_b32 s42, v40, 8 1146; GCN-NEXT: v_readlane_b32 s41, v40, 7 1147; GCN-NEXT: v_readlane_b32 s40, v40, 6 1148; GCN-NEXT: v_readlane_b32 s39, v40, 5 1149; GCN-NEXT: v_readlane_b32 s38, v40, 4 1150; GCN-NEXT: v_readlane_b32 s37, v40, 3 1151; GCN-NEXT: v_readlane_b32 s36, v40, 2 1152; GCN-NEXT: v_readlane_b32 s35, v40, 1 1153; GCN-NEXT: v_readlane_b32 s34, v40, 0 1154; GCN-NEXT: s_addk_i32 s32, 0xfc00 1155; GCN-NEXT: v_readlane_b32 s33, v40, 30 1156; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 1157; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1158; GCN-NEXT: s_mov_b64 exec, s[4:5] 1159; GCN-NEXT: s_waitcnt vmcnt(0) 1160; GCN-NEXT: s_setpc_b64 s[6:7] 1161; 1162; GISEL-LABEL: test_indirect_call_vgpr_ptr_inreg_arg: 1163; GISEL: ; %bb.0: 1164; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1165; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 1166; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1167; GISEL-NEXT: s_mov_b64 exec, s[4:5] 1168; GISEL-NEXT: v_writelane_b32 v40, s33, 30 1169; GISEL-NEXT: s_mov_b32 s33, s32 1170; GISEL-NEXT: s_addk_i32 s32, 0x400 1171; GISEL-NEXT: v_writelane_b32 v40, s34, 0 1172; GISEL-NEXT: v_writelane_b32 v40, s35, 1 1173; GISEL-NEXT: v_writelane_b32 v40, s36, 2 1174; GISEL-NEXT: v_writelane_b32 v40, s37, 3 1175; GISEL-NEXT: v_writelane_b32 v40, s38, 4 1176; GISEL-NEXT: v_writelane_b32 v40, s39, 5 1177; GISEL-NEXT: v_writelane_b32 v40, s40, 6 1178; GISEL-NEXT: v_writelane_b32 v40, s41, 7 1179; GISEL-NEXT: v_writelane_b32 v40, s42, 8 1180; GISEL-NEXT: v_writelane_b32 v40, s43, 9 1181; GISEL-NEXT: v_writelane_b32 v40, s44, 10 1182; GISEL-NEXT: v_writelane_b32 v40, s45, 11 1183; GISEL-NEXT: v_writelane_b32 v40, s46, 12 1184; GISEL-NEXT: v_writelane_b32 v40, s47, 13 1185; GISEL-NEXT: v_writelane_b32 v40, s48, 14 1186; GISEL-NEXT: v_writelane_b32 v40, s49, 15 1187; GISEL-NEXT: v_writelane_b32 v40, s50, 16 1188; GISEL-NEXT: v_writelane_b32 v40, s51, 17 1189; GISEL-NEXT: v_writelane_b32 v40, s52, 18 1190; GISEL-NEXT: v_writelane_b32 v40, s53, 19 1191; GISEL-NEXT: v_writelane_b32 v40, s54, 20 1192; GISEL-NEXT: v_writelane_b32 v40, s55, 21 1193; GISEL-NEXT: v_writelane_b32 v40, s56, 22 1194; GISEL-NEXT: v_writelane_b32 v40, s57, 23 1195; GISEL-NEXT: v_writelane_b32 v40, s58, 24 1196; GISEL-NEXT: v_writelane_b32 v40, s59, 25 1197; GISEL-NEXT: v_writelane_b32 v40, s60, 26 1198; GISEL-NEXT: v_writelane_b32 v40, s61, 27 1199; GISEL-NEXT: v_writelane_b32 v40, s62, 28 1200; GISEL-NEXT: v_writelane_b32 v40, s63, 29 1201; GISEL-NEXT: s_mov_b64 s[6:7], s[30:31] 1202; GISEL-NEXT: s_mov_b64 s[8:9], exec 1203; GISEL-NEXT: s_movk_i32 s4, 0x7b 1204; GISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 1205; GISEL-NEXT: v_readfirstlane_b32 s10, v0 1206; GISEL-NEXT: v_readfirstlane_b32 s11, v1 1207; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1] 1208; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc 1209; GISEL-NEXT: s_swappc_b64 s[30:31], s[10:11] 1210; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1 1211; GISEL-NEXT: s_xor_b64 exec, exec, s[12:13] 1212; GISEL-NEXT: s_cbranch_execnz .LBB6_1 1213; GISEL-NEXT: ; %bb.2: 1214; GISEL-NEXT: s_mov_b64 exec, s[8:9] 1215; GISEL-NEXT: v_readlane_b32 s63, v40, 29 1216; GISEL-NEXT: v_readlane_b32 s62, v40, 28 1217; GISEL-NEXT: v_readlane_b32 s61, v40, 27 1218; GISEL-NEXT: v_readlane_b32 s60, v40, 26 1219; GISEL-NEXT: v_readlane_b32 s59, v40, 25 1220; GISEL-NEXT: v_readlane_b32 s58, v40, 24 1221; GISEL-NEXT: v_readlane_b32 s57, v40, 23 1222; GISEL-NEXT: v_readlane_b32 s56, v40, 22 1223; GISEL-NEXT: v_readlane_b32 s55, v40, 21 1224; GISEL-NEXT: v_readlane_b32 s54, v40, 20 1225; GISEL-NEXT: v_readlane_b32 s53, v40, 19 1226; GISEL-NEXT: v_readlane_b32 s52, v40, 18 1227; GISEL-NEXT: v_readlane_b32 s51, v40, 17 1228; GISEL-NEXT: v_readlane_b32 s50, v40, 16 1229; GISEL-NEXT: v_readlane_b32 s49, v40, 15 1230; GISEL-NEXT: v_readlane_b32 s48, v40, 14 1231; GISEL-NEXT: v_readlane_b32 s47, v40, 13 1232; GISEL-NEXT: v_readlane_b32 s46, v40, 12 1233; GISEL-NEXT: v_readlane_b32 s45, v40, 11 1234; GISEL-NEXT: v_readlane_b32 s44, v40, 10 1235; GISEL-NEXT: v_readlane_b32 s43, v40, 9 1236; GISEL-NEXT: v_readlane_b32 s42, v40, 8 1237; GISEL-NEXT: v_readlane_b32 s41, v40, 7 1238; GISEL-NEXT: v_readlane_b32 s40, v40, 6 1239; GISEL-NEXT: v_readlane_b32 s39, v40, 5 1240; GISEL-NEXT: v_readlane_b32 s38, v40, 4 1241; GISEL-NEXT: v_readlane_b32 s37, v40, 3 1242; GISEL-NEXT: v_readlane_b32 s36, v40, 2 1243; GISEL-NEXT: v_readlane_b32 s35, v40, 1 1244; GISEL-NEXT: v_readlane_b32 s34, v40, 0 1245; GISEL-NEXT: s_addk_i32 s32, 0xfc00 1246; GISEL-NEXT: v_readlane_b32 s33, v40, 30 1247; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 1248; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1249; GISEL-NEXT: s_mov_b64 exec, s[4:5] 1250; GISEL-NEXT: s_waitcnt vmcnt(0) 1251; GISEL-NEXT: s_setpc_b64 s[6:7] 1252 call amdgpu_gfx void %fptr(i32 inreg 123) 1253 ret void 1254} 1255 1256define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, void(i32)* %fptr) { 1257; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse: 1258; GCN: ; %bb.0: 1259; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1260; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 1261; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill 1262; GCN-NEXT: s_mov_b64 exec, s[4:5] 1263; GCN-NEXT: v_writelane_b32 v40, s33, 30 1264; GCN-NEXT: s_mov_b32 s33, s32 1265; GCN-NEXT: s_addk_i32 s32, 0x400 1266; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill 1267; GCN-NEXT: v_writelane_b32 v40, s34, 0 1268; GCN-NEXT: v_writelane_b32 v40, s35, 1 1269; GCN-NEXT: v_writelane_b32 v40, s36, 2 1270; GCN-NEXT: v_writelane_b32 v40, s37, 3 1271; GCN-NEXT: v_writelane_b32 v40, s38, 4 1272; GCN-NEXT: v_writelane_b32 v40, s39, 5 1273; GCN-NEXT: v_writelane_b32 v40, s40, 6 1274; GCN-NEXT: v_writelane_b32 v40, s41, 7 1275; GCN-NEXT: v_writelane_b32 v40, s42, 8 1276; GCN-NEXT: v_writelane_b32 v40, s43, 9 1277; GCN-NEXT: v_writelane_b32 v40, s44, 10 1278; GCN-NEXT: v_writelane_b32 v40, s45, 11 1279; GCN-NEXT: v_writelane_b32 v40, s46, 12 1280; GCN-NEXT: v_writelane_b32 v40, s47, 13 1281; GCN-NEXT: v_writelane_b32 v40, s48, 14 1282; GCN-NEXT: v_writelane_b32 v40, s49, 15 1283; GCN-NEXT: v_writelane_b32 v40, s50, 16 1284; GCN-NEXT: v_writelane_b32 v40, s51, 17 1285; GCN-NEXT: v_writelane_b32 v40, s52, 18 1286; GCN-NEXT: v_writelane_b32 v40, s53, 19 1287; GCN-NEXT: v_writelane_b32 v40, s54, 20 1288; GCN-NEXT: v_writelane_b32 v40, s55, 21 1289; GCN-NEXT: v_writelane_b32 v40, s56, 22 1290; GCN-NEXT: v_writelane_b32 v40, s57, 23 1291; GCN-NEXT: v_writelane_b32 v40, s58, 24 1292; GCN-NEXT: v_writelane_b32 v40, s59, 25 1293; GCN-NEXT: v_writelane_b32 v40, s60, 26 1294; GCN-NEXT: v_writelane_b32 v40, s61, 27 1295; GCN-NEXT: v_writelane_b32 v40, s62, 28 1296; GCN-NEXT: v_writelane_b32 v40, s63, 29 1297; GCN-NEXT: s_mov_b64 s[4:5], s[30:31] 1298; GCN-NEXT: v_mov_b32_e32 v41, v0 1299; GCN-NEXT: s_mov_b64 s[6:7], exec 1300; GCN-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 1301; GCN-NEXT: v_readfirstlane_b32 s10, v1 1302; GCN-NEXT: v_readfirstlane_b32 s11, v2 1303; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[1:2] 1304; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc 1305; GCN-NEXT: v_mov_b32_e32 v0, v41 1306; GCN-NEXT: s_swappc_b64 s[30:31], s[10:11] 1307; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2 1308; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] 1309; GCN-NEXT: s_cbranch_execnz .LBB7_1 1310; GCN-NEXT: ; %bb.2: 1311; GCN-NEXT: s_mov_b64 exec, s[6:7] 1312; GCN-NEXT: v_mov_b32_e32 v0, v41 1313; GCN-NEXT: v_readlane_b32 s63, v40, 29 1314; GCN-NEXT: v_readlane_b32 s62, v40, 28 1315; GCN-NEXT: v_readlane_b32 s61, v40, 27 1316; GCN-NEXT: v_readlane_b32 s60, v40, 26 1317; GCN-NEXT: v_readlane_b32 s59, v40, 25 1318; GCN-NEXT: v_readlane_b32 s58, v40, 24 1319; GCN-NEXT: v_readlane_b32 s57, v40, 23 1320; GCN-NEXT: v_readlane_b32 s56, v40, 22 1321; GCN-NEXT: v_readlane_b32 s55, v40, 21 1322; GCN-NEXT: v_readlane_b32 s54, v40, 20 1323; GCN-NEXT: v_readlane_b32 s53, v40, 19 1324; GCN-NEXT: v_readlane_b32 s52, v40, 18 1325; GCN-NEXT: v_readlane_b32 s51, v40, 17 1326; GCN-NEXT: v_readlane_b32 s50, v40, 16 1327; GCN-NEXT: v_readlane_b32 s49, v40, 15 1328; GCN-NEXT: v_readlane_b32 s48, v40, 14 1329; GCN-NEXT: v_readlane_b32 s47, v40, 13 1330; GCN-NEXT: v_readlane_b32 s46, v40, 12 1331; GCN-NEXT: v_readlane_b32 s45, v40, 11 1332; GCN-NEXT: v_readlane_b32 s44, v40, 10 1333; GCN-NEXT: v_readlane_b32 s43, v40, 9 1334; GCN-NEXT: v_readlane_b32 s42, v40, 8 1335; GCN-NEXT: v_readlane_b32 s41, v40, 7 1336; GCN-NEXT: v_readlane_b32 s40, v40, 6 1337; GCN-NEXT: v_readlane_b32 s39, v40, 5 1338; GCN-NEXT: v_readlane_b32 s38, v40, 4 1339; GCN-NEXT: v_readlane_b32 s37, v40, 3 1340; GCN-NEXT: v_readlane_b32 s36, v40, 2 1341; GCN-NEXT: v_readlane_b32 s35, v40, 1 1342; GCN-NEXT: v_readlane_b32 s34, v40, 0 1343; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload 1344; GCN-NEXT: s_addk_i32 s32, 0xfc00 1345; GCN-NEXT: v_readlane_b32 s33, v40, 30 1346; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 1347; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload 1348; GCN-NEXT: s_mov_b64 exec, s[6:7] 1349; GCN-NEXT: s_waitcnt vmcnt(0) 1350; GCN-NEXT: s_setpc_b64 s[4:5] 1351; 1352; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse: 1353; GISEL: ; %bb.0: 1354; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1355; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 1356; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill 1357; GISEL-NEXT: s_mov_b64 exec, s[4:5] 1358; GISEL-NEXT: v_writelane_b32 v40, s33, 30 1359; GISEL-NEXT: s_mov_b32 s33, s32 1360; GISEL-NEXT: s_addk_i32 s32, 0x400 1361; GISEL-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill 1362; GISEL-NEXT: v_writelane_b32 v40, s34, 0 1363; GISEL-NEXT: v_writelane_b32 v40, s35, 1 1364; GISEL-NEXT: v_writelane_b32 v40, s36, 2 1365; GISEL-NEXT: v_writelane_b32 v40, s37, 3 1366; GISEL-NEXT: v_writelane_b32 v40, s38, 4 1367; GISEL-NEXT: v_writelane_b32 v40, s39, 5 1368; GISEL-NEXT: v_writelane_b32 v40, s40, 6 1369; GISEL-NEXT: v_writelane_b32 v40, s41, 7 1370; GISEL-NEXT: v_writelane_b32 v40, s42, 8 1371; GISEL-NEXT: v_writelane_b32 v40, s43, 9 1372; GISEL-NEXT: v_writelane_b32 v40, s44, 10 1373; GISEL-NEXT: v_writelane_b32 v40, s45, 11 1374; GISEL-NEXT: v_writelane_b32 v40, s46, 12 1375; GISEL-NEXT: v_writelane_b32 v40, s47, 13 1376; GISEL-NEXT: v_writelane_b32 v40, s48, 14 1377; GISEL-NEXT: v_writelane_b32 v40, s49, 15 1378; GISEL-NEXT: v_writelane_b32 v40, s50, 16 1379; GISEL-NEXT: v_writelane_b32 v40, s51, 17 1380; GISEL-NEXT: v_writelane_b32 v40, s52, 18 1381; GISEL-NEXT: v_writelane_b32 v40, s53, 19 1382; GISEL-NEXT: v_writelane_b32 v40, s54, 20 1383; GISEL-NEXT: v_writelane_b32 v40, s55, 21 1384; GISEL-NEXT: v_writelane_b32 v40, s56, 22 1385; GISEL-NEXT: v_writelane_b32 v40, s57, 23 1386; GISEL-NEXT: v_writelane_b32 v40, s58, 24 1387; GISEL-NEXT: v_writelane_b32 v40, s59, 25 1388; GISEL-NEXT: v_writelane_b32 v40, s60, 26 1389; GISEL-NEXT: v_writelane_b32 v40, s61, 27 1390; GISEL-NEXT: v_writelane_b32 v40, s62, 28 1391; GISEL-NEXT: v_writelane_b32 v40, s63, 29 1392; GISEL-NEXT: v_mov_b32_e32 v41, v0 1393; GISEL-NEXT: s_mov_b64 s[4:5], s[30:31] 1394; GISEL-NEXT: s_mov_b64 s[6:7], exec 1395; GISEL-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 1396; GISEL-NEXT: v_readfirstlane_b32 s8, v1 1397; GISEL-NEXT: v_readfirstlane_b32 s9, v2 1398; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] 1399; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc 1400; GISEL-NEXT: v_mov_b32_e32 v0, v41 1401; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] 1402; GISEL-NEXT: ; implicit-def: $vgpr1_vgpr2 1403; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11] 1404; GISEL-NEXT: s_cbranch_execnz .LBB7_1 1405; GISEL-NEXT: ; %bb.2: 1406; GISEL-NEXT: s_mov_b64 exec, s[6:7] 1407; GISEL-NEXT: v_mov_b32_e32 v0, v41 1408; GISEL-NEXT: v_readlane_b32 s63, v40, 29 1409; GISEL-NEXT: v_readlane_b32 s62, v40, 28 1410; GISEL-NEXT: v_readlane_b32 s61, v40, 27 1411; GISEL-NEXT: v_readlane_b32 s60, v40, 26 1412; GISEL-NEXT: v_readlane_b32 s59, v40, 25 1413; GISEL-NEXT: v_readlane_b32 s58, v40, 24 1414; GISEL-NEXT: v_readlane_b32 s57, v40, 23 1415; GISEL-NEXT: v_readlane_b32 s56, v40, 22 1416; GISEL-NEXT: v_readlane_b32 s55, v40, 21 1417; GISEL-NEXT: v_readlane_b32 s54, v40, 20 1418; GISEL-NEXT: v_readlane_b32 s53, v40, 19 1419; GISEL-NEXT: v_readlane_b32 s52, v40, 18 1420; GISEL-NEXT: v_readlane_b32 s51, v40, 17 1421; GISEL-NEXT: v_readlane_b32 s50, v40, 16 1422; GISEL-NEXT: v_readlane_b32 s49, v40, 15 1423; GISEL-NEXT: v_readlane_b32 s48, v40, 14 1424; GISEL-NEXT: v_readlane_b32 s47, v40, 13 1425; GISEL-NEXT: v_readlane_b32 s46, v40, 12 1426; GISEL-NEXT: v_readlane_b32 s45, v40, 11 1427; GISEL-NEXT: v_readlane_b32 s44, v40, 10 1428; GISEL-NEXT: v_readlane_b32 s43, v40, 9 1429; GISEL-NEXT: v_readlane_b32 s42, v40, 8 1430; GISEL-NEXT: v_readlane_b32 s41, v40, 7 1431; GISEL-NEXT: v_readlane_b32 s40, v40, 6 1432; GISEL-NEXT: v_readlane_b32 s39, v40, 5 1433; GISEL-NEXT: v_readlane_b32 s38, v40, 4 1434; GISEL-NEXT: v_readlane_b32 s37, v40, 3 1435; GISEL-NEXT: v_readlane_b32 s36, v40, 2 1436; GISEL-NEXT: v_readlane_b32 s35, v40, 1 1437; GISEL-NEXT: v_readlane_b32 s34, v40, 0 1438; GISEL-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload 1439; GISEL-NEXT: s_addk_i32 s32, 0xfc00 1440; GISEL-NEXT: v_readlane_b32 s33, v40, 30 1441; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 1442; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload 1443; GISEL-NEXT: s_mov_b64 exec, s[6:7] 1444; GISEL-NEXT: s_waitcnt vmcnt(0) 1445; GISEL-NEXT: s_setpc_b64 s[4:5] 1446 call amdgpu_gfx void %fptr(i32 %i) 1447 ret i32 %i 1448} 1449 1450; Use a variable inside a waterfall loop and use the return variable after the loop. 1451; TODO The argument and return variable could be in the same physical register, but the register 1452; allocator is not able to do that because the return value clashes with the liverange of an 1453; IMPLICIT_DEF of the argument. 1454define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, i32(i32)* %fptr) { 1455; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_return: 1456; GCN: ; %bb.0: 1457; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1458; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 1459; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1460; GCN-NEXT: s_mov_b64 exec, s[4:5] 1461; GCN-NEXT: v_writelane_b32 v40, s33, 30 1462; GCN-NEXT: s_mov_b32 s33, s32 1463; GCN-NEXT: s_addk_i32 s32, 0x400 1464; GCN-NEXT: v_writelane_b32 v40, s34, 0 1465; GCN-NEXT: v_writelane_b32 v40, s35, 1 1466; GCN-NEXT: v_writelane_b32 v40, s36, 2 1467; GCN-NEXT: v_writelane_b32 v40, s37, 3 1468; GCN-NEXT: v_writelane_b32 v40, s38, 4 1469; GCN-NEXT: v_writelane_b32 v40, s39, 5 1470; GCN-NEXT: v_writelane_b32 v40, s40, 6 1471; GCN-NEXT: v_writelane_b32 v40, s41, 7 1472; GCN-NEXT: v_writelane_b32 v40, s42, 8 1473; GCN-NEXT: v_writelane_b32 v40, s43, 9 1474; GCN-NEXT: v_writelane_b32 v40, s44, 10 1475; GCN-NEXT: v_writelane_b32 v40, s45, 11 1476; GCN-NEXT: v_writelane_b32 v40, s46, 12 1477; GCN-NEXT: v_writelane_b32 v40, s47, 13 1478; GCN-NEXT: v_writelane_b32 v40, s48, 14 1479; GCN-NEXT: v_writelane_b32 v40, s49, 15 1480; GCN-NEXT: v_writelane_b32 v40, s50, 16 1481; GCN-NEXT: v_writelane_b32 v40, s51, 17 1482; GCN-NEXT: v_writelane_b32 v40, s52, 18 1483; GCN-NEXT: v_writelane_b32 v40, s53, 19 1484; GCN-NEXT: v_writelane_b32 v40, s54, 20 1485; GCN-NEXT: v_writelane_b32 v40, s55, 21 1486; GCN-NEXT: v_writelane_b32 v40, s56, 22 1487; GCN-NEXT: v_writelane_b32 v40, s57, 23 1488; GCN-NEXT: v_writelane_b32 v40, s58, 24 1489; GCN-NEXT: v_writelane_b32 v40, s59, 25 1490; GCN-NEXT: v_writelane_b32 v40, s60, 26 1491; GCN-NEXT: v_writelane_b32 v40, s61, 27 1492; GCN-NEXT: v_writelane_b32 v40, s62, 28 1493; GCN-NEXT: v_writelane_b32 v40, s63, 29 1494; GCN-NEXT: s_mov_b64 s[4:5], s[30:31] 1495; GCN-NEXT: s_mov_b64 s[6:7], exec 1496; GCN-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 1497; GCN-NEXT: v_readfirstlane_b32 s10, v1 1498; GCN-NEXT: v_readfirstlane_b32 s11, v2 1499; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[1:2] 1500; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc 1501; GCN-NEXT: s_swappc_b64 s[30:31], s[10:11] 1502; GCN-NEXT: v_mov_b32_e32 v3, v0 1503; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2 1504; GCN-NEXT: ; implicit-def: $vgpr0 1505; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] 1506; GCN-NEXT: s_cbranch_execnz .LBB8_1 1507; GCN-NEXT: ; %bb.2: 1508; GCN-NEXT: s_mov_b64 exec, s[6:7] 1509; GCN-NEXT: v_mov_b32_e32 v0, v3 1510; GCN-NEXT: v_readlane_b32 s63, v40, 29 1511; GCN-NEXT: v_readlane_b32 s62, v40, 28 1512; GCN-NEXT: v_readlane_b32 s61, v40, 27 1513; GCN-NEXT: v_readlane_b32 s60, v40, 26 1514; GCN-NEXT: v_readlane_b32 s59, v40, 25 1515; GCN-NEXT: v_readlane_b32 s58, v40, 24 1516; GCN-NEXT: v_readlane_b32 s57, v40, 23 1517; GCN-NEXT: v_readlane_b32 s56, v40, 22 1518; GCN-NEXT: v_readlane_b32 s55, v40, 21 1519; GCN-NEXT: v_readlane_b32 s54, v40, 20 1520; GCN-NEXT: v_readlane_b32 s53, v40, 19 1521; GCN-NEXT: v_readlane_b32 s52, v40, 18 1522; GCN-NEXT: v_readlane_b32 s51, v40, 17 1523; GCN-NEXT: v_readlane_b32 s50, v40, 16 1524; GCN-NEXT: v_readlane_b32 s49, v40, 15 1525; GCN-NEXT: v_readlane_b32 s48, v40, 14 1526; GCN-NEXT: v_readlane_b32 s47, v40, 13 1527; GCN-NEXT: v_readlane_b32 s46, v40, 12 1528; GCN-NEXT: v_readlane_b32 s45, v40, 11 1529; GCN-NEXT: v_readlane_b32 s44, v40, 10 1530; GCN-NEXT: v_readlane_b32 s43, v40, 9 1531; GCN-NEXT: v_readlane_b32 s42, v40, 8 1532; GCN-NEXT: v_readlane_b32 s41, v40, 7 1533; GCN-NEXT: v_readlane_b32 s40, v40, 6 1534; GCN-NEXT: v_readlane_b32 s39, v40, 5 1535; GCN-NEXT: v_readlane_b32 s38, v40, 4 1536; GCN-NEXT: v_readlane_b32 s37, v40, 3 1537; GCN-NEXT: v_readlane_b32 s36, v40, 2 1538; GCN-NEXT: v_readlane_b32 s35, v40, 1 1539; GCN-NEXT: v_readlane_b32 s34, v40, 0 1540; GCN-NEXT: s_addk_i32 s32, 0xfc00 1541; GCN-NEXT: v_readlane_b32 s33, v40, 30 1542; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 1543; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1544; GCN-NEXT: s_mov_b64 exec, s[6:7] 1545; GCN-NEXT: s_waitcnt vmcnt(0) 1546; GCN-NEXT: s_setpc_b64 s[4:5] 1547; 1548; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_return: 1549; GISEL: ; %bb.0: 1550; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1551; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 1552; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1553; GISEL-NEXT: s_mov_b64 exec, s[4:5] 1554; GISEL-NEXT: v_writelane_b32 v40, s33, 30 1555; GISEL-NEXT: s_mov_b32 s33, s32 1556; GISEL-NEXT: s_addk_i32 s32, 0x400 1557; GISEL-NEXT: v_writelane_b32 v40, s34, 0 1558; GISEL-NEXT: v_writelane_b32 v40, s35, 1 1559; GISEL-NEXT: v_writelane_b32 v40, s36, 2 1560; GISEL-NEXT: v_writelane_b32 v40, s37, 3 1561; GISEL-NEXT: v_writelane_b32 v40, s38, 4 1562; GISEL-NEXT: v_writelane_b32 v40, s39, 5 1563; GISEL-NEXT: v_writelane_b32 v40, s40, 6 1564; GISEL-NEXT: v_writelane_b32 v40, s41, 7 1565; GISEL-NEXT: v_writelane_b32 v40, s42, 8 1566; GISEL-NEXT: v_writelane_b32 v40, s43, 9 1567; GISEL-NEXT: v_writelane_b32 v40, s44, 10 1568; GISEL-NEXT: v_writelane_b32 v40, s45, 11 1569; GISEL-NEXT: v_writelane_b32 v40, s46, 12 1570; GISEL-NEXT: v_writelane_b32 v40, s47, 13 1571; GISEL-NEXT: v_writelane_b32 v40, s48, 14 1572; GISEL-NEXT: v_writelane_b32 v40, s49, 15 1573; GISEL-NEXT: v_writelane_b32 v40, s50, 16 1574; GISEL-NEXT: v_writelane_b32 v40, s51, 17 1575; GISEL-NEXT: v_writelane_b32 v40, s52, 18 1576; GISEL-NEXT: v_writelane_b32 v40, s53, 19 1577; GISEL-NEXT: v_writelane_b32 v40, s54, 20 1578; GISEL-NEXT: v_writelane_b32 v40, s55, 21 1579; GISEL-NEXT: v_writelane_b32 v40, s56, 22 1580; GISEL-NEXT: v_writelane_b32 v40, s57, 23 1581; GISEL-NEXT: v_writelane_b32 v40, s58, 24 1582; GISEL-NEXT: v_writelane_b32 v40, s59, 25 1583; GISEL-NEXT: v_writelane_b32 v40, s60, 26 1584; GISEL-NEXT: v_writelane_b32 v40, s61, 27 1585; GISEL-NEXT: v_writelane_b32 v40, s62, 28 1586; GISEL-NEXT: v_writelane_b32 v40, s63, 29 1587; GISEL-NEXT: s_mov_b64 s[4:5], s[30:31] 1588; GISEL-NEXT: s_mov_b64 s[6:7], exec 1589; GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 1590; GISEL-NEXT: v_readfirstlane_b32 s8, v1 1591; GISEL-NEXT: v_readfirstlane_b32 s9, v2 1592; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] 1593; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc 1594; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] 1595; GISEL-NEXT: v_mov_b32_e32 v3, v0 1596; GISEL-NEXT: ; implicit-def: $vgpr1_vgpr2 1597; GISEL-NEXT: ; implicit-def: $vgpr0 1598; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11] 1599; GISEL-NEXT: s_cbranch_execnz .LBB8_1 1600; GISEL-NEXT: ; %bb.2: 1601; GISEL-NEXT: s_mov_b64 exec, s[6:7] 1602; GISEL-NEXT: v_mov_b32_e32 v0, v3 1603; GISEL-NEXT: v_readlane_b32 s63, v40, 29 1604; GISEL-NEXT: v_readlane_b32 s62, v40, 28 1605; GISEL-NEXT: v_readlane_b32 s61, v40, 27 1606; GISEL-NEXT: v_readlane_b32 s60, v40, 26 1607; GISEL-NEXT: v_readlane_b32 s59, v40, 25 1608; GISEL-NEXT: v_readlane_b32 s58, v40, 24 1609; GISEL-NEXT: v_readlane_b32 s57, v40, 23 1610; GISEL-NEXT: v_readlane_b32 s56, v40, 22 1611; GISEL-NEXT: v_readlane_b32 s55, v40, 21 1612; GISEL-NEXT: v_readlane_b32 s54, v40, 20 1613; GISEL-NEXT: v_readlane_b32 s53, v40, 19 1614; GISEL-NEXT: v_readlane_b32 s52, v40, 18 1615; GISEL-NEXT: v_readlane_b32 s51, v40, 17 1616; GISEL-NEXT: v_readlane_b32 s50, v40, 16 1617; GISEL-NEXT: v_readlane_b32 s49, v40, 15 1618; GISEL-NEXT: v_readlane_b32 s48, v40, 14 1619; GISEL-NEXT: v_readlane_b32 s47, v40, 13 1620; GISEL-NEXT: v_readlane_b32 s46, v40, 12 1621; GISEL-NEXT: v_readlane_b32 s45, v40, 11 1622; GISEL-NEXT: v_readlane_b32 s44, v40, 10 1623; GISEL-NEXT: v_readlane_b32 s43, v40, 9 1624; GISEL-NEXT: v_readlane_b32 s42, v40, 8 1625; GISEL-NEXT: v_readlane_b32 s41, v40, 7 1626; GISEL-NEXT: v_readlane_b32 s40, v40, 6 1627; GISEL-NEXT: v_readlane_b32 s39, v40, 5 1628; GISEL-NEXT: v_readlane_b32 s38, v40, 4 1629; GISEL-NEXT: v_readlane_b32 s37, v40, 3 1630; GISEL-NEXT: v_readlane_b32 s36, v40, 2 1631; GISEL-NEXT: v_readlane_b32 s35, v40, 1 1632; GISEL-NEXT: v_readlane_b32 s34, v40, 0 1633; GISEL-NEXT: s_addk_i32 s32, 0xfc00 1634; GISEL-NEXT: v_readlane_b32 s33, v40, 30 1635; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 1636; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1637; GISEL-NEXT: s_mov_b64 exec, s[6:7] 1638; GISEL-NEXT: s_waitcnt vmcnt(0) 1639; GISEL-NEXT: s_setpc_b64 s[4:5] 1640 %ret = call amdgpu_gfx i32 %fptr(i32 %i) 1641 ret i32 %ret 1642} 1643 1644; Calling a vgpr can never be a tail call. 1645define void @test_indirect_tail_call_vgpr_ptr(void()* %fptr) { 1646; GCN-LABEL: test_indirect_tail_call_vgpr_ptr: 1647; GCN: ; %bb.0: 1648; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1649; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 1650; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1651; GCN-NEXT: s_mov_b64 exec, s[4:5] 1652; GCN-NEXT: v_writelane_b32 v40, s33, 30 1653; GCN-NEXT: s_mov_b32 s33, s32 1654; GCN-NEXT: s_addk_i32 s32, 0x400 1655; GCN-NEXT: v_writelane_b32 v40, s34, 0 1656; GCN-NEXT: v_writelane_b32 v40, s35, 1 1657; GCN-NEXT: v_writelane_b32 v40, s36, 2 1658; GCN-NEXT: v_writelane_b32 v40, s37, 3 1659; GCN-NEXT: v_writelane_b32 v40, s38, 4 1660; GCN-NEXT: v_writelane_b32 v40, s39, 5 1661; GCN-NEXT: v_writelane_b32 v40, s40, 6 1662; GCN-NEXT: v_writelane_b32 v40, s41, 7 1663; GCN-NEXT: v_writelane_b32 v40, s42, 8 1664; GCN-NEXT: v_writelane_b32 v40, s43, 9 1665; GCN-NEXT: v_writelane_b32 v40, s44, 10 1666; GCN-NEXT: v_writelane_b32 v40, s45, 11 1667; GCN-NEXT: v_writelane_b32 v40, s46, 12 1668; GCN-NEXT: v_writelane_b32 v40, s47, 13 1669; GCN-NEXT: v_writelane_b32 v40, s48, 14 1670; GCN-NEXT: v_writelane_b32 v40, s49, 15 1671; GCN-NEXT: v_writelane_b32 v40, s50, 16 1672; GCN-NEXT: v_writelane_b32 v40, s51, 17 1673; GCN-NEXT: v_writelane_b32 v40, s52, 18 1674; GCN-NEXT: v_writelane_b32 v40, s53, 19 1675; GCN-NEXT: v_writelane_b32 v40, s54, 20 1676; GCN-NEXT: v_writelane_b32 v40, s55, 21 1677; GCN-NEXT: v_writelane_b32 v40, s56, 22 1678; GCN-NEXT: v_writelane_b32 v40, s57, 23 1679; GCN-NEXT: v_writelane_b32 v40, s58, 24 1680; GCN-NEXT: v_writelane_b32 v40, s59, 25 1681; GCN-NEXT: v_writelane_b32 v40, s60, 26 1682; GCN-NEXT: v_writelane_b32 v40, s61, 27 1683; GCN-NEXT: v_writelane_b32 v40, s62, 28 1684; GCN-NEXT: v_writelane_b32 v40, s63, 29 1685; GCN-NEXT: s_mov_b64 s[4:5], s[30:31] 1686; GCN-NEXT: s_mov_b64 s[6:7], exec 1687; GCN-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 1688; GCN-NEXT: v_readfirstlane_b32 s10, v0 1689; GCN-NEXT: v_readfirstlane_b32 s11, v1 1690; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1] 1691; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc 1692; GCN-NEXT: s_swappc_b64 s[30:31], s[10:11] 1693; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 1694; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] 1695; GCN-NEXT: s_cbranch_execnz .LBB9_1 1696; GCN-NEXT: ; %bb.2: 1697; GCN-NEXT: s_mov_b64 exec, s[6:7] 1698; GCN-NEXT: v_readlane_b32 s63, v40, 29 1699; GCN-NEXT: v_readlane_b32 s62, v40, 28 1700; GCN-NEXT: v_readlane_b32 s61, v40, 27 1701; GCN-NEXT: v_readlane_b32 s60, v40, 26 1702; GCN-NEXT: v_readlane_b32 s59, v40, 25 1703; GCN-NEXT: v_readlane_b32 s58, v40, 24 1704; GCN-NEXT: v_readlane_b32 s57, v40, 23 1705; GCN-NEXT: v_readlane_b32 s56, v40, 22 1706; GCN-NEXT: v_readlane_b32 s55, v40, 21 1707; GCN-NEXT: v_readlane_b32 s54, v40, 20 1708; GCN-NEXT: v_readlane_b32 s53, v40, 19 1709; GCN-NEXT: v_readlane_b32 s52, v40, 18 1710; GCN-NEXT: v_readlane_b32 s51, v40, 17 1711; GCN-NEXT: v_readlane_b32 s50, v40, 16 1712; GCN-NEXT: v_readlane_b32 s49, v40, 15 1713; GCN-NEXT: v_readlane_b32 s48, v40, 14 1714; GCN-NEXT: v_readlane_b32 s47, v40, 13 1715; GCN-NEXT: v_readlane_b32 s46, v40, 12 1716; GCN-NEXT: v_readlane_b32 s45, v40, 11 1717; GCN-NEXT: v_readlane_b32 s44, v40, 10 1718; GCN-NEXT: v_readlane_b32 s43, v40, 9 1719; GCN-NEXT: v_readlane_b32 s42, v40, 8 1720; GCN-NEXT: v_readlane_b32 s41, v40, 7 1721; GCN-NEXT: v_readlane_b32 s40, v40, 6 1722; GCN-NEXT: v_readlane_b32 s39, v40, 5 1723; GCN-NEXT: v_readlane_b32 s38, v40, 4 1724; GCN-NEXT: v_readlane_b32 s37, v40, 3 1725; GCN-NEXT: v_readlane_b32 s36, v40, 2 1726; GCN-NEXT: v_readlane_b32 s35, v40, 1 1727; GCN-NEXT: v_readlane_b32 s34, v40, 0 1728; GCN-NEXT: s_addk_i32 s32, 0xfc00 1729; GCN-NEXT: v_readlane_b32 s33, v40, 30 1730; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 1731; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1732; GCN-NEXT: s_mov_b64 exec, s[6:7] 1733; GCN-NEXT: s_waitcnt vmcnt(0) 1734; GCN-NEXT: s_setpc_b64 s[4:5] 1735; 1736; GISEL-LABEL: test_indirect_tail_call_vgpr_ptr: 1737; GISEL: ; %bb.0: 1738; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1739; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 1740; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1741; GISEL-NEXT: s_mov_b64 exec, s[4:5] 1742; GISEL-NEXT: v_writelane_b32 v40, s33, 30 1743; GISEL-NEXT: s_mov_b32 s33, s32 1744; GISEL-NEXT: s_addk_i32 s32, 0x400 1745; GISEL-NEXT: v_writelane_b32 v40, s34, 0 1746; GISEL-NEXT: v_writelane_b32 v40, s35, 1 1747; GISEL-NEXT: v_writelane_b32 v40, s36, 2 1748; GISEL-NEXT: v_writelane_b32 v40, s37, 3 1749; GISEL-NEXT: v_writelane_b32 v40, s38, 4 1750; GISEL-NEXT: v_writelane_b32 v40, s39, 5 1751; GISEL-NEXT: v_writelane_b32 v40, s40, 6 1752; GISEL-NEXT: v_writelane_b32 v40, s41, 7 1753; GISEL-NEXT: v_writelane_b32 v40, s42, 8 1754; GISEL-NEXT: v_writelane_b32 v40, s43, 9 1755; GISEL-NEXT: v_writelane_b32 v40, s44, 10 1756; GISEL-NEXT: v_writelane_b32 v40, s45, 11 1757; GISEL-NEXT: v_writelane_b32 v40, s46, 12 1758; GISEL-NEXT: v_writelane_b32 v40, s47, 13 1759; GISEL-NEXT: v_writelane_b32 v40, s48, 14 1760; GISEL-NEXT: v_writelane_b32 v40, s49, 15 1761; GISEL-NEXT: v_writelane_b32 v40, s50, 16 1762; GISEL-NEXT: v_writelane_b32 v40, s51, 17 1763; GISEL-NEXT: v_writelane_b32 v40, s52, 18 1764; GISEL-NEXT: v_writelane_b32 v40, s53, 19 1765; GISEL-NEXT: v_writelane_b32 v40, s54, 20 1766; GISEL-NEXT: v_writelane_b32 v40, s55, 21 1767; GISEL-NEXT: v_writelane_b32 v40, s56, 22 1768; GISEL-NEXT: v_writelane_b32 v40, s57, 23 1769; GISEL-NEXT: v_writelane_b32 v40, s58, 24 1770; GISEL-NEXT: v_writelane_b32 v40, s59, 25 1771; GISEL-NEXT: v_writelane_b32 v40, s60, 26 1772; GISEL-NEXT: v_writelane_b32 v40, s61, 27 1773; GISEL-NEXT: v_writelane_b32 v40, s62, 28 1774; GISEL-NEXT: v_writelane_b32 v40, s63, 29 1775; GISEL-NEXT: s_mov_b64 s[4:5], s[30:31] 1776; GISEL-NEXT: s_mov_b64 s[6:7], exec 1777; GISEL-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 1778; GISEL-NEXT: v_readfirstlane_b32 s8, v0 1779; GISEL-NEXT: v_readfirstlane_b32 s9, v1 1780; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] 1781; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc 1782; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] 1783; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1 1784; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11] 1785; GISEL-NEXT: s_cbranch_execnz .LBB9_1 1786; GISEL-NEXT: ; %bb.2: 1787; GISEL-NEXT: s_mov_b64 exec, s[6:7] 1788; GISEL-NEXT: v_readlane_b32 s63, v40, 29 1789; GISEL-NEXT: v_readlane_b32 s62, v40, 28 1790; GISEL-NEXT: v_readlane_b32 s61, v40, 27 1791; GISEL-NEXT: v_readlane_b32 s60, v40, 26 1792; GISEL-NEXT: v_readlane_b32 s59, v40, 25 1793; GISEL-NEXT: v_readlane_b32 s58, v40, 24 1794; GISEL-NEXT: v_readlane_b32 s57, v40, 23 1795; GISEL-NEXT: v_readlane_b32 s56, v40, 22 1796; GISEL-NEXT: v_readlane_b32 s55, v40, 21 1797; GISEL-NEXT: v_readlane_b32 s54, v40, 20 1798; GISEL-NEXT: v_readlane_b32 s53, v40, 19 1799; GISEL-NEXT: v_readlane_b32 s52, v40, 18 1800; GISEL-NEXT: v_readlane_b32 s51, v40, 17 1801; GISEL-NEXT: v_readlane_b32 s50, v40, 16 1802; GISEL-NEXT: v_readlane_b32 s49, v40, 15 1803; GISEL-NEXT: v_readlane_b32 s48, v40, 14 1804; GISEL-NEXT: v_readlane_b32 s47, v40, 13 1805; GISEL-NEXT: v_readlane_b32 s46, v40, 12 1806; GISEL-NEXT: v_readlane_b32 s45, v40, 11 1807; GISEL-NEXT: v_readlane_b32 s44, v40, 10 1808; GISEL-NEXT: v_readlane_b32 s43, v40, 9 1809; GISEL-NEXT: v_readlane_b32 s42, v40, 8 1810; GISEL-NEXT: v_readlane_b32 s41, v40, 7 1811; GISEL-NEXT: v_readlane_b32 s40, v40, 6 1812; GISEL-NEXT: v_readlane_b32 s39, v40, 5 1813; GISEL-NEXT: v_readlane_b32 s38, v40, 4 1814; GISEL-NEXT: v_readlane_b32 s37, v40, 3 1815; GISEL-NEXT: v_readlane_b32 s36, v40, 2 1816; GISEL-NEXT: v_readlane_b32 s35, v40, 1 1817; GISEL-NEXT: v_readlane_b32 s34, v40, 0 1818; GISEL-NEXT: s_addk_i32 s32, 0xfc00 1819; GISEL-NEXT: v_readlane_b32 s33, v40, 30 1820; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 1821; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1822; GISEL-NEXT: s_mov_b64 exec, s[6:7] 1823; GISEL-NEXT: s_waitcnt vmcnt(0) 1824; GISEL-NEXT: s_setpc_b64 s[4:5] 1825 tail call amdgpu_gfx void %fptr() 1826 ret void 1827} 1828