1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s 3; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s 4; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX11 %s 5; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10-SCRATCH %s 6 7declare hidden amdgpu_gfx void @external_void_func_i1(i1) #0 8declare hidden amdgpu_gfx void @external_void_func_i1_signext(i1 signext) #0 9declare hidden amdgpu_gfx void @external_void_func_i1_zeroext(i1 zeroext) #0 10 11declare hidden amdgpu_gfx void @external_void_func_i8(i8) #0 12declare hidden amdgpu_gfx void @external_void_func_i8_signext(i8 signext) #0 13declare hidden amdgpu_gfx void @external_void_func_i8_zeroext(i8 zeroext) #0 14 15declare hidden amdgpu_gfx void @external_void_func_i16(i16) #0 16declare hidden amdgpu_gfx void @external_void_func_i16_signext(i16 signext) #0 17declare hidden amdgpu_gfx void @external_void_func_i16_zeroext(i16 zeroext) #0 18 19declare hidden amdgpu_gfx void @external_void_func_i32(i32) #0 20declare hidden amdgpu_gfx void @external_void_func_i64(i64) #0 21declare hidden amdgpu_gfx void @external_void_func_v2i64(<2 x i64>) #0 22declare hidden amdgpu_gfx void @external_void_func_v3i64(<3 x i64>) #0 23declare hidden amdgpu_gfx void @external_void_func_v4i64(<4 x i64>) #0 24 25declare hidden amdgpu_gfx void @external_void_func_f16(half) #0 26declare hidden amdgpu_gfx void @external_void_func_f32(float) #0 27declare hidden amdgpu_gfx void @external_void_func_f64(double) #0 28declare hidden amdgpu_gfx void @external_void_func_v2f32(<2 x float>) #0 29declare hidden amdgpu_gfx void @external_void_func_v2f64(<2 x double>) #0 30declare hidden amdgpu_gfx void @external_void_func_v3f32(<3 x float>) #0 31declare hidden amdgpu_gfx void @external_void_func_v3f64(<3 x double>) #0 32declare hidden amdgpu_gfx void @external_void_func_v5f32(<5 x float>) #0 33 34declare hidden amdgpu_gfx void @external_void_func_v2i16(<2 x i16>) #0 35declare hidden amdgpu_gfx void @external_void_func_v2f16(<2 x half>) #0 36declare hidden amdgpu_gfx void @external_void_func_v3i16(<3 x i16>) #0 37declare hidden amdgpu_gfx void @external_void_func_v3f16(<3 x half>) #0 38declare hidden amdgpu_gfx void @external_void_func_v4i16(<4 x i16>) #0 39declare hidden amdgpu_gfx void @external_void_func_v4f16(<4 x half>) #0 40 41declare hidden amdgpu_gfx void @external_void_func_v2i32(<2 x i32>) #0 42declare hidden amdgpu_gfx void @external_void_func_v3i32(<3 x i32>) #0 43declare hidden amdgpu_gfx void @external_void_func_v3i32_i32(<3 x i32>, i32) #0 44declare hidden amdgpu_gfx void @external_void_func_v4i32(<4 x i32>) #0 45declare hidden amdgpu_gfx void @external_void_func_v5i32(<5 x i32>) #0 46declare hidden amdgpu_gfx void @external_void_func_v8i32(<8 x i32>) #0 47declare hidden amdgpu_gfx void @external_void_func_v16i32(<16 x i32>) #0 48declare hidden amdgpu_gfx void @external_void_func_v32i32(<32 x i32>) #0 49declare hidden amdgpu_gfx void @external_void_func_v32i32_i32(<32 x i32>, i32) #0 50 51declare hidden amdgpu_gfx void @external_void_func_i1_inreg(i1 inreg) #0 52declare hidden amdgpu_gfx void @external_void_func_i8_inreg(i8 inreg) #0 53declare hidden amdgpu_gfx void @external_void_func_i16_inreg(i16 inreg) #0 54declare hidden amdgpu_gfx void @external_void_func_i32_inreg(i32 inreg) #0 55declare hidden amdgpu_gfx void @external_void_func_i64_inreg(i64 inreg) #0 56declare hidden amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> inreg) #0 57declare hidden amdgpu_gfx void @external_void_func_v3i64_inreg(<3 x i64> inreg) #0 58declare hidden amdgpu_gfx void @external_void_func_v4i64_inreg(<4 x i64> inreg) #0 59 60declare hidden amdgpu_gfx void @external_void_func_f16_inreg(half inreg) #0 61declare hidden amdgpu_gfx void @external_void_func_f32_inreg(float inreg) #0 62declare hidden amdgpu_gfx void @external_void_func_f64_inreg(double inreg) #0 63declare hidden amdgpu_gfx void @external_void_func_v2f32_inreg(<2 x float> inreg) #0 64declare hidden amdgpu_gfx void @external_void_func_v2f64_inreg(<2 x double> inreg) #0 65declare hidden amdgpu_gfx void @external_void_func_v3f32_inreg(<3 x float> inreg) #0 66declare hidden amdgpu_gfx void @external_void_func_v3f64_inreg(<3 x double> inreg) #0 67declare hidden amdgpu_gfx void @external_void_func_v5f32_inreg(<5 x float> inreg) #0 68 69declare hidden amdgpu_gfx void @external_void_func_v2i16_inreg(<2 x i16> inreg) #0 70declare hidden amdgpu_gfx void @external_void_func_v2f16_inreg(<2 x half> inreg) #0 71declare hidden amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> inreg) #0 72declare hidden amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> inreg) #0 73declare hidden amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> inreg) #0 74declare hidden amdgpu_gfx void @external_void_func_v4f16_inreg(<4 x half> inreg) #0 75 76declare hidden amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> inreg) #0 77declare hidden amdgpu_gfx void @external_void_func_v3i32_inreg(<3 x i32> inreg) #0 78declare hidden amdgpu_gfx void @external_void_func_v3i32_i32_inreg(<3 x i32> inreg, i32 inreg) #0 79declare hidden amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> inreg) #0 80declare hidden amdgpu_gfx void @external_void_func_v5i32_inreg(<5 x i32> inreg) #0 81declare hidden amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> inreg) #0 82declare hidden amdgpu_gfx void @external_void_func_v16i32_inreg(<16 x i32> inreg) #0 83declare hidden amdgpu_gfx void @external_void_func_v32i32_inreg(<32 x i32> inreg) #0 84declare hidden amdgpu_gfx void @external_void_func_v32i32_i32_inreg(<32 x i32> inreg, i32 inreg) #0 85 86; return value and argument 87declare hidden amdgpu_gfx i32 @external_i32_func_i32(i32) #0 88 89; Structs 90declare hidden amdgpu_gfx void @external_void_func_struct_i8_i32({ i8, i32 }) #0 91declare hidden amdgpu_gfx void @external_void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, i32 })) #0 92declare hidden amdgpu_gfx void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }), { i8, i32 } addrspace(5)* byval({ i8, i32 })) #0 93 94declare hidden amdgpu_gfx void @external_void_func_v16i8(<16 x i8>) #0 95 96define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { 97; GFX9-LABEL: test_call_external_void_func_i1_imm: 98; GFX9: ; %bb.0: 99; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 100; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 101; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 102; GFX9-NEXT: s_mov_b64 exec, s[34:35] 103; GFX9-NEXT: v_writelane_b32 v40, s33, 2 104; GFX9-NEXT: s_mov_b32 s33, s32 105; GFX9-NEXT: s_addk_i32 s32, 0x400 106; GFX9-NEXT: v_writelane_b32 v40, s30, 0 107; GFX9-NEXT: v_mov_b32_e32 v0, 1 108; GFX9-NEXT: v_writelane_b32 v40, s31, 1 109; GFX9-NEXT: s_getpc_b64 s[34:35] 110; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i1@rel32@lo+4 111; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i1@rel32@hi+12 112; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 113; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 114; GFX9-NEXT: v_readlane_b32 s31, v40, 1 115; GFX9-NEXT: v_readlane_b32 s30, v40, 0 116; GFX9-NEXT: s_addk_i32 s32, 0xfc00 117; GFX9-NEXT: v_readlane_b32 s33, v40, 2 118; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 119; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 120; GFX9-NEXT: s_mov_b64 exec, s[34:35] 121; GFX9-NEXT: s_waitcnt vmcnt(0) 122; GFX9-NEXT: s_setpc_b64 s[30:31] 123; 124; GFX10-LABEL: test_call_external_void_func_i1_imm: 125; GFX10: ; %bb.0: 126; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 127; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 128; GFX10-NEXT: s_or_saveexec_b32 s34, -1 129; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 130; GFX10-NEXT: s_waitcnt_depctr 0xffe3 131; GFX10-NEXT: s_mov_b32 exec_lo, s34 132; GFX10-NEXT: v_writelane_b32 v40, s33, 2 133; GFX10-NEXT: v_mov_b32_e32 v0, 1 134; GFX10-NEXT: s_mov_b32 s33, s32 135; GFX10-NEXT: s_addk_i32 s32, 0x200 136; GFX10-NEXT: s_getpc_b64 s[34:35] 137; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i1@rel32@lo+4 138; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i1@rel32@hi+12 139; GFX10-NEXT: v_writelane_b32 v40, s30, 0 140; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 141; GFX10-NEXT: v_writelane_b32 v40, s31, 1 142; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 143; GFX10-NEXT: v_readlane_b32 s31, v40, 1 144; GFX10-NEXT: v_readlane_b32 s30, v40, 0 145; GFX10-NEXT: s_addk_i32 s32, 0xfe00 146; GFX10-NEXT: v_readlane_b32 s33, v40, 2 147; GFX10-NEXT: s_or_saveexec_b32 s34, -1 148; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 149; GFX10-NEXT: s_waitcnt_depctr 0xffe3 150; GFX10-NEXT: s_mov_b32 exec_lo, s34 151; GFX10-NEXT: s_waitcnt vmcnt(0) 152; GFX10-NEXT: s_setpc_b64 s[30:31] 153; 154; GFX11-LABEL: test_call_external_void_func_i1_imm: 155; GFX11: ; %bb.0: 156; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 157; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 158; GFX11-NEXT: s_or_saveexec_b32 s0, -1 159; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 160; GFX11-NEXT: s_mov_b32 exec_lo, s0 161; GFX11-NEXT: v_writelane_b32 v40, s33, 2 162; GFX11-NEXT: v_mov_b32_e32 v0, 1 163; GFX11-NEXT: s_mov_b32 s33, s32 164; GFX11-NEXT: s_add_i32 s32, s32, 16 165; GFX11-NEXT: s_getpc_b64 s[0:1] 166; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i1@rel32@lo+4 167; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i1@rel32@hi+12 168; GFX11-NEXT: v_writelane_b32 v40, s30, 0 169; GFX11-NEXT: scratch_store_b8 off, v0, s32 170; GFX11-NEXT: v_writelane_b32 v40, s31, 1 171; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 172; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 173; GFX11-NEXT: v_readlane_b32 s31, v40, 1 174; GFX11-NEXT: v_readlane_b32 s30, v40, 0 175; GFX11-NEXT: s_add_i32 s32, s32, -16 176; GFX11-NEXT: v_readlane_b32 s33, v40, 2 177; GFX11-NEXT: s_or_saveexec_b32 s0, -1 178; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 179; GFX11-NEXT: s_mov_b32 exec_lo, s0 180; GFX11-NEXT: s_waitcnt vmcnt(0) 181; GFX11-NEXT: s_setpc_b64 s[30:31] 182; 183; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_imm: 184; GFX10-SCRATCH: ; %bb.0: 185; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 186; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 187; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 188; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 189; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 190; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 191; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 192; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 193; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 194; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 195; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 196; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i1@rel32@lo+4 197; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i1@rel32@hi+12 198; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 199; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 200; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 201; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 202; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 203; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 204; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 205; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 206; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 207; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 208; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 209; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 210; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 211; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 212 call amdgpu_gfx void @external_void_func_i1(i1 true) 213 ret void 214} 215 216define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { 217; GFX9-LABEL: test_call_external_void_func_i1_signext: 218; GFX9: ; %bb.0: 219; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 220; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 221; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 222; GFX9-NEXT: s_mov_b64 exec, s[34:35] 223; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc 224; GFX9-NEXT: s_waitcnt vmcnt(0) 225; GFX9-NEXT: v_writelane_b32 v40, s33, 2 226; GFX9-NEXT: s_mov_b32 s33, s32 227; GFX9-NEXT: s_addk_i32 s32, 0x400 228; GFX9-NEXT: v_writelane_b32 v40, s30, 0 229; GFX9-NEXT: v_writelane_b32 v40, s31, 1 230; GFX9-NEXT: s_getpc_b64 s[34:35] 231; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i1_signext@rel32@lo+4 232; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i1_signext@rel32@hi+12 233; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 234; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 235; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 236; GFX9-NEXT: v_readlane_b32 s31, v40, 1 237; GFX9-NEXT: v_readlane_b32 s30, v40, 0 238; GFX9-NEXT: s_addk_i32 s32, 0xfc00 239; GFX9-NEXT: v_readlane_b32 s33, v40, 2 240; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 241; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 242; GFX9-NEXT: s_mov_b64 exec, s[34:35] 243; GFX9-NEXT: s_waitcnt vmcnt(0) 244; GFX9-NEXT: s_setpc_b64 s[30:31] 245; 246; GFX10-LABEL: test_call_external_void_func_i1_signext: 247; GFX10: ; %bb.0: 248; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 249; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 250; GFX10-NEXT: s_or_saveexec_b32 s34, -1 251; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 252; GFX10-NEXT: s_waitcnt_depctr 0xffe3 253; GFX10-NEXT: s_mov_b32 exec_lo, s34 254; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 255; GFX10-NEXT: s_waitcnt vmcnt(0) 256; GFX10-NEXT: v_writelane_b32 v40, s33, 2 257; GFX10-NEXT: s_mov_b32 s33, s32 258; GFX10-NEXT: s_addk_i32 s32, 0x200 259; GFX10-NEXT: s_getpc_b64 s[34:35] 260; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i1_signext@rel32@lo+4 261; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i1_signext@rel32@hi+12 262; GFX10-NEXT: v_writelane_b32 v40, s30, 0 263; GFX10-NEXT: v_writelane_b32 v40, s31, 1 264; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 265; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 266; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 267; GFX10-NEXT: v_readlane_b32 s31, v40, 1 268; GFX10-NEXT: v_readlane_b32 s30, v40, 0 269; GFX10-NEXT: s_addk_i32 s32, 0xfe00 270; GFX10-NEXT: v_readlane_b32 s33, v40, 2 271; GFX10-NEXT: s_or_saveexec_b32 s34, -1 272; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 273; GFX10-NEXT: s_waitcnt_depctr 0xffe3 274; GFX10-NEXT: s_mov_b32 exec_lo, s34 275; GFX10-NEXT: s_waitcnt vmcnt(0) 276; GFX10-NEXT: s_setpc_b64 s[30:31] 277; 278; GFX11-LABEL: test_call_external_void_func_i1_signext: 279; GFX11: ; %bb.0: 280; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 281; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 282; GFX11-NEXT: s_or_saveexec_b32 s0, -1 283; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 284; GFX11-NEXT: s_mov_b32 exec_lo, s0 285; GFX11-NEXT: global_load_u8 v0, v[0:1], off glc dlc 286; GFX11-NEXT: s_waitcnt vmcnt(0) 287; GFX11-NEXT: v_writelane_b32 v40, s33, 2 288; GFX11-NEXT: s_mov_b32 s33, s32 289; GFX11-NEXT: s_add_i32 s32, s32, 16 290; GFX11-NEXT: s_getpc_b64 s[0:1] 291; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i1_signext@rel32@lo+4 292; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i1_signext@rel32@hi+12 293; GFX11-NEXT: v_writelane_b32 v40, s30, 0 294; GFX11-NEXT: v_writelane_b32 v40, s31, 1 295; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 296; GFX11-NEXT: scratch_store_b8 off, v0, s32 297; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 298; GFX11-NEXT: v_readlane_b32 s31, v40, 1 299; GFX11-NEXT: v_readlane_b32 s30, v40, 0 300; GFX11-NEXT: s_add_i32 s32, s32, -16 301; GFX11-NEXT: v_readlane_b32 s33, v40, 2 302; GFX11-NEXT: s_or_saveexec_b32 s0, -1 303; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 304; GFX11-NEXT: s_mov_b32 exec_lo, s0 305; GFX11-NEXT: s_waitcnt vmcnt(0) 306; GFX11-NEXT: s_setpc_b64 s[30:31] 307; 308; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_signext: 309; GFX10-SCRATCH: ; %bb.0: 310; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 311; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 312; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 313; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 314; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 315; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 316; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 317; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 318; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 319; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 320; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 321; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 322; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i1_signext@rel32@lo+4 323; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i1_signext@rel32@hi+12 324; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 325; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 326; GFX10-SCRATCH-NEXT: v_and_b32_e32 v0, 1, v0 327; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 328; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 329; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 330; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 331; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 332; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 333; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 334; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 335; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 336; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 337; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 338; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 339 %var = load volatile i1, i1 addrspace(1)* undef 340 call amdgpu_gfx void @external_void_func_i1_signext(i1 signext%var) 341 ret void 342} 343 344define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { 345; GFX9-LABEL: test_call_external_void_func_i1_zeroext: 346; GFX9: ; %bb.0: 347; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 348; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 349; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 350; GFX9-NEXT: s_mov_b64 exec, s[34:35] 351; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc 352; GFX9-NEXT: s_waitcnt vmcnt(0) 353; GFX9-NEXT: v_writelane_b32 v40, s33, 2 354; GFX9-NEXT: s_mov_b32 s33, s32 355; GFX9-NEXT: s_addk_i32 s32, 0x400 356; GFX9-NEXT: v_writelane_b32 v40, s30, 0 357; GFX9-NEXT: v_writelane_b32 v40, s31, 1 358; GFX9-NEXT: s_getpc_b64 s[34:35] 359; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i1_zeroext@rel32@lo+4 360; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i1_zeroext@rel32@hi+12 361; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 362; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 363; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 364; GFX9-NEXT: v_readlane_b32 s31, v40, 1 365; GFX9-NEXT: v_readlane_b32 s30, v40, 0 366; GFX9-NEXT: s_addk_i32 s32, 0xfc00 367; GFX9-NEXT: v_readlane_b32 s33, v40, 2 368; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 369; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 370; GFX9-NEXT: s_mov_b64 exec, s[34:35] 371; GFX9-NEXT: s_waitcnt vmcnt(0) 372; GFX9-NEXT: s_setpc_b64 s[30:31] 373; 374; GFX10-LABEL: test_call_external_void_func_i1_zeroext: 375; GFX10: ; %bb.0: 376; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 377; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 378; GFX10-NEXT: s_or_saveexec_b32 s34, -1 379; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 380; GFX10-NEXT: s_waitcnt_depctr 0xffe3 381; GFX10-NEXT: s_mov_b32 exec_lo, s34 382; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 383; GFX10-NEXT: s_waitcnt vmcnt(0) 384; GFX10-NEXT: v_writelane_b32 v40, s33, 2 385; GFX10-NEXT: s_mov_b32 s33, s32 386; GFX10-NEXT: s_addk_i32 s32, 0x200 387; GFX10-NEXT: s_getpc_b64 s[34:35] 388; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i1_zeroext@rel32@lo+4 389; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i1_zeroext@rel32@hi+12 390; GFX10-NEXT: v_writelane_b32 v40, s30, 0 391; GFX10-NEXT: v_writelane_b32 v40, s31, 1 392; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 393; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 394; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 395; GFX10-NEXT: v_readlane_b32 s31, v40, 1 396; GFX10-NEXT: v_readlane_b32 s30, v40, 0 397; GFX10-NEXT: s_addk_i32 s32, 0xfe00 398; GFX10-NEXT: v_readlane_b32 s33, v40, 2 399; GFX10-NEXT: s_or_saveexec_b32 s34, -1 400; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 401; GFX10-NEXT: s_waitcnt_depctr 0xffe3 402; GFX10-NEXT: s_mov_b32 exec_lo, s34 403; GFX10-NEXT: s_waitcnt vmcnt(0) 404; GFX10-NEXT: s_setpc_b64 s[30:31] 405; 406; GFX11-LABEL: test_call_external_void_func_i1_zeroext: 407; GFX11: ; %bb.0: 408; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 409; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 410; GFX11-NEXT: s_or_saveexec_b32 s0, -1 411; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 412; GFX11-NEXT: s_mov_b32 exec_lo, s0 413; GFX11-NEXT: global_load_u8 v0, v[0:1], off glc dlc 414; GFX11-NEXT: s_waitcnt vmcnt(0) 415; GFX11-NEXT: v_writelane_b32 v40, s33, 2 416; GFX11-NEXT: s_mov_b32 s33, s32 417; GFX11-NEXT: s_add_i32 s32, s32, 16 418; GFX11-NEXT: s_getpc_b64 s[0:1] 419; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i1_zeroext@rel32@lo+4 420; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i1_zeroext@rel32@hi+12 421; GFX11-NEXT: v_writelane_b32 v40, s30, 0 422; GFX11-NEXT: v_writelane_b32 v40, s31, 1 423; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 424; GFX11-NEXT: scratch_store_b8 off, v0, s32 425; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 426; GFX11-NEXT: v_readlane_b32 s31, v40, 1 427; GFX11-NEXT: v_readlane_b32 s30, v40, 0 428; GFX11-NEXT: s_add_i32 s32, s32, -16 429; GFX11-NEXT: v_readlane_b32 s33, v40, 2 430; GFX11-NEXT: s_or_saveexec_b32 s0, -1 431; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 432; GFX11-NEXT: s_mov_b32 exec_lo, s0 433; GFX11-NEXT: s_waitcnt vmcnt(0) 434; GFX11-NEXT: s_setpc_b64 s[30:31] 435; 436; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_zeroext: 437; GFX10-SCRATCH: ; %bb.0: 438; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 439; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 440; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 441; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 442; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 443; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 444; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 445; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 446; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 447; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 448; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 449; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 450; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i1_zeroext@rel32@lo+4 451; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i1_zeroext@rel32@hi+12 452; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 453; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 454; GFX10-SCRATCH-NEXT: v_and_b32_e32 v0, 1, v0 455; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 456; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 457; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 458; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 459; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 460; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 461; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 462; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 463; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 464; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 465; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 466; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 467 %var = load volatile i1, i1 addrspace(1)* undef 468 call amdgpu_gfx void @external_void_func_i1_zeroext(i1 zeroext %var) 469 ret void 470} 471 472define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { 473; GFX9-LABEL: test_call_external_void_func_i8_imm: 474; GFX9: ; %bb.0: 475; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 476; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 477; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 478; GFX9-NEXT: s_mov_b64 exec, s[34:35] 479; GFX9-NEXT: v_writelane_b32 v40, s33, 2 480; GFX9-NEXT: s_mov_b32 s33, s32 481; GFX9-NEXT: s_addk_i32 s32, 0x400 482; GFX9-NEXT: v_writelane_b32 v40, s30, 0 483; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b 484; GFX9-NEXT: v_writelane_b32 v40, s31, 1 485; GFX9-NEXT: s_getpc_b64 s[34:35] 486; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8@rel32@lo+4 487; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8@rel32@hi+12 488; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 489; GFX9-NEXT: v_readlane_b32 s31, v40, 1 490; GFX9-NEXT: v_readlane_b32 s30, v40, 0 491; GFX9-NEXT: s_addk_i32 s32, 0xfc00 492; GFX9-NEXT: v_readlane_b32 s33, v40, 2 493; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 494; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 495; GFX9-NEXT: s_mov_b64 exec, s[34:35] 496; GFX9-NEXT: s_waitcnt vmcnt(0) 497; GFX9-NEXT: s_setpc_b64 s[30:31] 498; 499; GFX10-LABEL: test_call_external_void_func_i8_imm: 500; GFX10: ; %bb.0: 501; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 502; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 503; GFX10-NEXT: s_or_saveexec_b32 s34, -1 504; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 505; GFX10-NEXT: s_waitcnt_depctr 0xffe3 506; GFX10-NEXT: s_mov_b32 exec_lo, s34 507; GFX10-NEXT: v_writelane_b32 v40, s33, 2 508; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b 509; GFX10-NEXT: s_mov_b32 s33, s32 510; GFX10-NEXT: s_addk_i32 s32, 0x200 511; GFX10-NEXT: s_getpc_b64 s[34:35] 512; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8@rel32@lo+4 513; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8@rel32@hi+12 514; GFX10-NEXT: v_writelane_b32 v40, s30, 0 515; GFX10-NEXT: v_writelane_b32 v40, s31, 1 516; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 517; GFX10-NEXT: v_readlane_b32 s31, v40, 1 518; GFX10-NEXT: v_readlane_b32 s30, v40, 0 519; GFX10-NEXT: s_addk_i32 s32, 0xfe00 520; GFX10-NEXT: v_readlane_b32 s33, v40, 2 521; GFX10-NEXT: s_or_saveexec_b32 s34, -1 522; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 523; GFX10-NEXT: s_waitcnt_depctr 0xffe3 524; GFX10-NEXT: s_mov_b32 exec_lo, s34 525; GFX10-NEXT: s_waitcnt vmcnt(0) 526; GFX10-NEXT: s_setpc_b64 s[30:31] 527; 528; GFX11-LABEL: test_call_external_void_func_i8_imm: 529; GFX11: ; %bb.0: 530; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 531; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 532; GFX11-NEXT: s_or_saveexec_b32 s0, -1 533; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 534; GFX11-NEXT: s_mov_b32 exec_lo, s0 535; GFX11-NEXT: v_writelane_b32 v40, s33, 2 536; GFX11-NEXT: v_mov_b32_e32 v0, 0x7b 537; GFX11-NEXT: s_mov_b32 s33, s32 538; GFX11-NEXT: s_add_i32 s32, s32, 16 539; GFX11-NEXT: s_getpc_b64 s[0:1] 540; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i8@rel32@lo+4 541; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i8@rel32@hi+12 542; GFX11-NEXT: v_writelane_b32 v40, s30, 0 543; GFX11-NEXT: v_writelane_b32 v40, s31, 1 544; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 545; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 546; GFX11-NEXT: v_readlane_b32 s31, v40, 1 547; GFX11-NEXT: v_readlane_b32 s30, v40, 0 548; GFX11-NEXT: s_add_i32 s32, s32, -16 549; GFX11-NEXT: v_readlane_b32 s33, v40, 2 550; GFX11-NEXT: s_or_saveexec_b32 s0, -1 551; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 552; GFX11-NEXT: s_mov_b32 exec_lo, s0 553; GFX11-NEXT: s_waitcnt vmcnt(0) 554; GFX11-NEXT: s_setpc_b64 s[30:31] 555; 556; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_imm: 557; GFX10-SCRATCH: ; %bb.0: 558; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 559; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 560; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 561; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 562; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 563; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 564; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 565; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x7b 566; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 567; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 568; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 569; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i8@rel32@lo+4 570; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i8@rel32@hi+12 571; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 572; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 573; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 574; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 575; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 576; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 577; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 578; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 579; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 580; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 581; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 582; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 583; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 584 call amdgpu_gfx void @external_void_func_i8(i8 123) 585 ret void 586} 587 588define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { 589; GFX9-LABEL: test_call_external_void_func_i8_signext: 590; GFX9: ; %bb.0: 591; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 592; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 593; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 594; GFX9-NEXT: s_mov_b64 exec, s[34:35] 595; GFX9-NEXT: global_load_sbyte v0, v[0:1], off glc 596; GFX9-NEXT: s_waitcnt vmcnt(0) 597; GFX9-NEXT: v_writelane_b32 v40, s33, 2 598; GFX9-NEXT: s_mov_b32 s33, s32 599; GFX9-NEXT: s_addk_i32 s32, 0x400 600; GFX9-NEXT: v_writelane_b32 v40, s30, 0 601; GFX9-NEXT: v_writelane_b32 v40, s31, 1 602; GFX9-NEXT: s_getpc_b64 s[34:35] 603; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8_signext@rel32@lo+4 604; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8_signext@rel32@hi+12 605; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 606; GFX9-NEXT: v_readlane_b32 s31, v40, 1 607; GFX9-NEXT: v_readlane_b32 s30, v40, 0 608; GFX9-NEXT: s_addk_i32 s32, 0xfc00 609; GFX9-NEXT: v_readlane_b32 s33, v40, 2 610; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 611; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 612; GFX9-NEXT: s_mov_b64 exec, s[34:35] 613; GFX9-NEXT: s_waitcnt vmcnt(0) 614; GFX9-NEXT: s_setpc_b64 s[30:31] 615; 616; GFX10-LABEL: test_call_external_void_func_i8_signext: 617; GFX10: ; %bb.0: 618; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 619; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 620; GFX10-NEXT: s_or_saveexec_b32 s34, -1 621; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 622; GFX10-NEXT: s_waitcnt_depctr 0xffe3 623; GFX10-NEXT: s_mov_b32 exec_lo, s34 624; GFX10-NEXT: global_load_sbyte v0, v[0:1], off glc dlc 625; GFX10-NEXT: s_waitcnt vmcnt(0) 626; GFX10-NEXT: v_writelane_b32 v40, s33, 2 627; GFX10-NEXT: s_mov_b32 s33, s32 628; GFX10-NEXT: s_addk_i32 s32, 0x200 629; GFX10-NEXT: s_getpc_b64 s[34:35] 630; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8_signext@rel32@lo+4 631; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8_signext@rel32@hi+12 632; GFX10-NEXT: v_writelane_b32 v40, s30, 0 633; GFX10-NEXT: v_writelane_b32 v40, s31, 1 634; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 635; GFX10-NEXT: v_readlane_b32 s31, v40, 1 636; GFX10-NEXT: v_readlane_b32 s30, v40, 0 637; GFX10-NEXT: s_addk_i32 s32, 0xfe00 638; GFX10-NEXT: v_readlane_b32 s33, v40, 2 639; GFX10-NEXT: s_or_saveexec_b32 s34, -1 640; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 641; GFX10-NEXT: s_waitcnt_depctr 0xffe3 642; GFX10-NEXT: s_mov_b32 exec_lo, s34 643; GFX10-NEXT: s_waitcnt vmcnt(0) 644; GFX10-NEXT: s_setpc_b64 s[30:31] 645; 646; GFX11-LABEL: test_call_external_void_func_i8_signext: 647; GFX11: ; %bb.0: 648; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 649; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 650; GFX11-NEXT: s_or_saveexec_b32 s0, -1 651; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 652; GFX11-NEXT: s_mov_b32 exec_lo, s0 653; GFX11-NEXT: global_load_i8 v0, v[0:1], off glc dlc 654; GFX11-NEXT: s_waitcnt vmcnt(0) 655; GFX11-NEXT: v_writelane_b32 v40, s33, 2 656; GFX11-NEXT: s_mov_b32 s33, s32 657; GFX11-NEXT: s_add_i32 s32, s32, 16 658; GFX11-NEXT: s_getpc_b64 s[0:1] 659; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i8_signext@rel32@lo+4 660; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i8_signext@rel32@hi+12 661; GFX11-NEXT: v_writelane_b32 v40, s30, 0 662; GFX11-NEXT: v_writelane_b32 v40, s31, 1 663; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 664; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 665; GFX11-NEXT: v_readlane_b32 s31, v40, 1 666; GFX11-NEXT: v_readlane_b32 s30, v40, 0 667; GFX11-NEXT: s_add_i32 s32, s32, -16 668; GFX11-NEXT: v_readlane_b32 s33, v40, 2 669; GFX11-NEXT: s_or_saveexec_b32 s0, -1 670; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 671; GFX11-NEXT: s_mov_b32 exec_lo, s0 672; GFX11-NEXT: s_waitcnt vmcnt(0) 673; GFX11-NEXT: s_setpc_b64 s[30:31] 674; 675; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_signext: 676; GFX10-SCRATCH: ; %bb.0: 677; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 678; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 679; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 680; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 681; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 682; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 683; GFX10-SCRATCH-NEXT: global_load_sbyte v0, v[0:1], off glc dlc 684; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 685; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 686; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 687; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 688; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 689; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i8_signext@rel32@lo+4 690; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i8_signext@rel32@hi+12 691; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 692; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 693; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 694; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 695; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 696; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 697; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 698; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 699; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 700; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 701; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 702; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 703; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 704 %var = load volatile i8, i8 addrspace(1)* undef 705 call amdgpu_gfx void @external_void_func_i8_signext(i8 signext %var) 706 ret void 707} 708 709define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { 710; GFX9-LABEL: test_call_external_void_func_i8_zeroext: 711; GFX9: ; %bb.0: 712; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 713; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 714; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 715; GFX9-NEXT: s_mov_b64 exec, s[34:35] 716; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc 717; GFX9-NEXT: s_waitcnt vmcnt(0) 718; GFX9-NEXT: v_writelane_b32 v40, s33, 2 719; GFX9-NEXT: s_mov_b32 s33, s32 720; GFX9-NEXT: s_addk_i32 s32, 0x400 721; GFX9-NEXT: v_writelane_b32 v40, s30, 0 722; GFX9-NEXT: v_writelane_b32 v40, s31, 1 723; GFX9-NEXT: s_getpc_b64 s[34:35] 724; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8_zeroext@rel32@lo+4 725; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8_zeroext@rel32@hi+12 726; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 727; GFX9-NEXT: v_readlane_b32 s31, v40, 1 728; GFX9-NEXT: v_readlane_b32 s30, v40, 0 729; GFX9-NEXT: s_addk_i32 s32, 0xfc00 730; GFX9-NEXT: v_readlane_b32 s33, v40, 2 731; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 732; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 733; GFX9-NEXT: s_mov_b64 exec, s[34:35] 734; GFX9-NEXT: s_waitcnt vmcnt(0) 735; GFX9-NEXT: s_setpc_b64 s[30:31] 736; 737; GFX10-LABEL: test_call_external_void_func_i8_zeroext: 738; GFX10: ; %bb.0: 739; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 740; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 741; GFX10-NEXT: s_or_saveexec_b32 s34, -1 742; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 743; GFX10-NEXT: s_waitcnt_depctr 0xffe3 744; GFX10-NEXT: s_mov_b32 exec_lo, s34 745; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 746; GFX10-NEXT: s_waitcnt vmcnt(0) 747; GFX10-NEXT: v_writelane_b32 v40, s33, 2 748; GFX10-NEXT: s_mov_b32 s33, s32 749; GFX10-NEXT: s_addk_i32 s32, 0x200 750; GFX10-NEXT: s_getpc_b64 s[34:35] 751; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8_zeroext@rel32@lo+4 752; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8_zeroext@rel32@hi+12 753; GFX10-NEXT: v_writelane_b32 v40, s30, 0 754; GFX10-NEXT: v_writelane_b32 v40, s31, 1 755; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 756; GFX10-NEXT: v_readlane_b32 s31, v40, 1 757; GFX10-NEXT: v_readlane_b32 s30, v40, 0 758; GFX10-NEXT: s_addk_i32 s32, 0xfe00 759; GFX10-NEXT: v_readlane_b32 s33, v40, 2 760; GFX10-NEXT: s_or_saveexec_b32 s34, -1 761; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 762; GFX10-NEXT: s_waitcnt_depctr 0xffe3 763; GFX10-NEXT: s_mov_b32 exec_lo, s34 764; GFX10-NEXT: s_waitcnt vmcnt(0) 765; GFX10-NEXT: s_setpc_b64 s[30:31] 766; 767; GFX11-LABEL: test_call_external_void_func_i8_zeroext: 768; GFX11: ; %bb.0: 769; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 770; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 771; GFX11-NEXT: s_or_saveexec_b32 s0, -1 772; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 773; GFX11-NEXT: s_mov_b32 exec_lo, s0 774; GFX11-NEXT: global_load_u8 v0, v[0:1], off glc dlc 775; GFX11-NEXT: s_waitcnt vmcnt(0) 776; GFX11-NEXT: v_writelane_b32 v40, s33, 2 777; GFX11-NEXT: s_mov_b32 s33, s32 778; GFX11-NEXT: s_add_i32 s32, s32, 16 779; GFX11-NEXT: s_getpc_b64 s[0:1] 780; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i8_zeroext@rel32@lo+4 781; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i8_zeroext@rel32@hi+12 782; GFX11-NEXT: v_writelane_b32 v40, s30, 0 783; GFX11-NEXT: v_writelane_b32 v40, s31, 1 784; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 785; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 786; GFX11-NEXT: v_readlane_b32 s31, v40, 1 787; GFX11-NEXT: v_readlane_b32 s30, v40, 0 788; GFX11-NEXT: s_add_i32 s32, s32, -16 789; GFX11-NEXT: v_readlane_b32 s33, v40, 2 790; GFX11-NEXT: s_or_saveexec_b32 s0, -1 791; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 792; GFX11-NEXT: s_mov_b32 exec_lo, s0 793; GFX11-NEXT: s_waitcnt vmcnt(0) 794; GFX11-NEXT: s_setpc_b64 s[30:31] 795; 796; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_zeroext: 797; GFX10-SCRATCH: ; %bb.0: 798; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 799; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 800; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 801; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 802; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 803; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 804; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 805; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 806; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 807; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 808; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 809; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 810; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i8_zeroext@rel32@lo+4 811; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i8_zeroext@rel32@hi+12 812; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 813; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 814; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 815; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 816; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 817; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 818; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 819; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 820; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 821; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 822; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 823; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 824; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 825 %var = load volatile i8, i8 addrspace(1)* undef 826 call amdgpu_gfx void @external_void_func_i8_zeroext(i8 zeroext %var) 827 ret void 828} 829 830define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { 831; GFX9-LABEL: test_call_external_void_func_i16_imm: 832; GFX9: ; %bb.0: 833; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 834; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 835; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 836; GFX9-NEXT: s_mov_b64 exec, s[34:35] 837; GFX9-NEXT: v_writelane_b32 v40, s33, 2 838; GFX9-NEXT: s_mov_b32 s33, s32 839; GFX9-NEXT: s_addk_i32 s32, 0x400 840; GFX9-NEXT: v_writelane_b32 v40, s30, 0 841; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b 842; GFX9-NEXT: v_writelane_b32 v40, s31, 1 843; GFX9-NEXT: s_getpc_b64 s[34:35] 844; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i16@rel32@lo+4 845; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i16@rel32@hi+12 846; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 847; GFX9-NEXT: v_readlane_b32 s31, v40, 1 848; GFX9-NEXT: v_readlane_b32 s30, v40, 0 849; GFX9-NEXT: s_addk_i32 s32, 0xfc00 850; GFX9-NEXT: v_readlane_b32 s33, v40, 2 851; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 852; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 853; GFX9-NEXT: s_mov_b64 exec, s[34:35] 854; GFX9-NEXT: s_waitcnt vmcnt(0) 855; GFX9-NEXT: s_setpc_b64 s[30:31] 856; 857; GFX10-LABEL: test_call_external_void_func_i16_imm: 858; GFX10: ; %bb.0: 859; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 860; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 861; GFX10-NEXT: s_or_saveexec_b32 s34, -1 862; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 863; GFX10-NEXT: s_waitcnt_depctr 0xffe3 864; GFX10-NEXT: s_mov_b32 exec_lo, s34 865; GFX10-NEXT: v_writelane_b32 v40, s33, 2 866; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b 867; GFX10-NEXT: s_mov_b32 s33, s32 868; GFX10-NEXT: s_addk_i32 s32, 0x200 869; GFX10-NEXT: s_getpc_b64 s[34:35] 870; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i16@rel32@lo+4 871; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i16@rel32@hi+12 872; GFX10-NEXT: v_writelane_b32 v40, s30, 0 873; GFX10-NEXT: v_writelane_b32 v40, s31, 1 874; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 875; GFX10-NEXT: v_readlane_b32 s31, v40, 1 876; GFX10-NEXT: v_readlane_b32 s30, v40, 0 877; GFX10-NEXT: s_addk_i32 s32, 0xfe00 878; GFX10-NEXT: v_readlane_b32 s33, v40, 2 879; GFX10-NEXT: s_or_saveexec_b32 s34, -1 880; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 881; GFX10-NEXT: s_waitcnt_depctr 0xffe3 882; GFX10-NEXT: s_mov_b32 exec_lo, s34 883; GFX10-NEXT: s_waitcnt vmcnt(0) 884; GFX10-NEXT: s_setpc_b64 s[30:31] 885; 886; GFX11-LABEL: test_call_external_void_func_i16_imm: 887; GFX11: ; %bb.0: 888; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 889; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 890; GFX11-NEXT: s_or_saveexec_b32 s0, -1 891; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 892; GFX11-NEXT: s_mov_b32 exec_lo, s0 893; GFX11-NEXT: v_writelane_b32 v40, s33, 2 894; GFX11-NEXT: v_mov_b32_e32 v0, 0x7b 895; GFX11-NEXT: s_mov_b32 s33, s32 896; GFX11-NEXT: s_add_i32 s32, s32, 16 897; GFX11-NEXT: s_getpc_b64 s[0:1] 898; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i16@rel32@lo+4 899; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i16@rel32@hi+12 900; GFX11-NEXT: v_writelane_b32 v40, s30, 0 901; GFX11-NEXT: v_writelane_b32 v40, s31, 1 902; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 903; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 904; GFX11-NEXT: v_readlane_b32 s31, v40, 1 905; GFX11-NEXT: v_readlane_b32 s30, v40, 0 906; GFX11-NEXT: s_add_i32 s32, s32, -16 907; GFX11-NEXT: v_readlane_b32 s33, v40, 2 908; GFX11-NEXT: s_or_saveexec_b32 s0, -1 909; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 910; GFX11-NEXT: s_mov_b32 exec_lo, s0 911; GFX11-NEXT: s_waitcnt vmcnt(0) 912; GFX11-NEXT: s_setpc_b64 s[30:31] 913; 914; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_imm: 915; GFX10-SCRATCH: ; %bb.0: 916; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 917; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 918; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 919; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 920; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 921; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 922; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 923; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x7b 924; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 925; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 926; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 927; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i16@rel32@lo+4 928; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i16@rel32@hi+12 929; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 930; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 931; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 932; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 933; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 934; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 935; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 936; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 937; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 938; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 939; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 940; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 941; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 942 call amdgpu_gfx void @external_void_func_i16(i16 123) 943 ret void 944} 945 946define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { 947; GFX9-LABEL: test_call_external_void_func_i16_signext: 948; GFX9: ; %bb.0: 949; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 950; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 951; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 952; GFX9-NEXT: s_mov_b64 exec, s[34:35] 953; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc 954; GFX9-NEXT: s_waitcnt vmcnt(0) 955; GFX9-NEXT: v_writelane_b32 v40, s33, 2 956; GFX9-NEXT: s_mov_b32 s33, s32 957; GFX9-NEXT: s_addk_i32 s32, 0x400 958; GFX9-NEXT: v_writelane_b32 v40, s30, 0 959; GFX9-NEXT: v_writelane_b32 v40, s31, 1 960; GFX9-NEXT: s_getpc_b64 s[34:35] 961; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i16_signext@rel32@lo+4 962; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i16_signext@rel32@hi+12 963; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 964; GFX9-NEXT: v_readlane_b32 s31, v40, 1 965; GFX9-NEXT: v_readlane_b32 s30, v40, 0 966; GFX9-NEXT: s_addk_i32 s32, 0xfc00 967; GFX9-NEXT: v_readlane_b32 s33, v40, 2 968; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 969; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 970; GFX9-NEXT: s_mov_b64 exec, s[34:35] 971; GFX9-NEXT: s_waitcnt vmcnt(0) 972; GFX9-NEXT: s_setpc_b64 s[30:31] 973; 974; GFX10-LABEL: test_call_external_void_func_i16_signext: 975; GFX10: ; %bb.0: 976; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 977; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 978; GFX10-NEXT: s_or_saveexec_b32 s34, -1 979; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 980; GFX10-NEXT: s_waitcnt_depctr 0xffe3 981; GFX10-NEXT: s_mov_b32 exec_lo, s34 982; GFX10-NEXT: global_load_ushort v0, v[0:1], off glc dlc 983; GFX10-NEXT: s_waitcnt vmcnt(0) 984; GFX10-NEXT: v_writelane_b32 v40, s33, 2 985; GFX10-NEXT: s_mov_b32 s33, s32 986; GFX10-NEXT: s_addk_i32 s32, 0x200 987; GFX10-NEXT: s_getpc_b64 s[34:35] 988; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i16_signext@rel32@lo+4 989; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i16_signext@rel32@hi+12 990; GFX10-NEXT: v_writelane_b32 v40, s30, 0 991; GFX10-NEXT: v_writelane_b32 v40, s31, 1 992; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 993; GFX10-NEXT: v_readlane_b32 s31, v40, 1 994; GFX10-NEXT: v_readlane_b32 s30, v40, 0 995; GFX10-NEXT: s_addk_i32 s32, 0xfe00 996; GFX10-NEXT: v_readlane_b32 s33, v40, 2 997; GFX10-NEXT: s_or_saveexec_b32 s34, -1 998; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 999; GFX10-NEXT: s_waitcnt_depctr 0xffe3 1000; GFX10-NEXT: s_mov_b32 exec_lo, s34 1001; GFX10-NEXT: s_waitcnt vmcnt(0) 1002; GFX10-NEXT: s_setpc_b64 s[30:31] 1003; 1004; GFX11-LABEL: test_call_external_void_func_i16_signext: 1005; GFX11: ; %bb.0: 1006; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1007; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1008; GFX11-NEXT: s_or_saveexec_b32 s0, -1 1009; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 1010; GFX11-NEXT: s_mov_b32 exec_lo, s0 1011; GFX11-NEXT: global_load_u16 v0, v[0:1], off glc dlc 1012; GFX11-NEXT: s_waitcnt vmcnt(0) 1013; GFX11-NEXT: v_writelane_b32 v40, s33, 2 1014; GFX11-NEXT: s_mov_b32 s33, s32 1015; GFX11-NEXT: s_add_i32 s32, s32, 16 1016; GFX11-NEXT: s_getpc_b64 s[0:1] 1017; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i16_signext@rel32@lo+4 1018; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i16_signext@rel32@hi+12 1019; GFX11-NEXT: v_writelane_b32 v40, s30, 0 1020; GFX11-NEXT: v_writelane_b32 v40, s31, 1 1021; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 1022; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1023; GFX11-NEXT: v_readlane_b32 s31, v40, 1 1024; GFX11-NEXT: v_readlane_b32 s30, v40, 0 1025; GFX11-NEXT: s_add_i32 s32, s32, -16 1026; GFX11-NEXT: v_readlane_b32 s33, v40, 2 1027; GFX11-NEXT: s_or_saveexec_b32 s0, -1 1028; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 1029; GFX11-NEXT: s_mov_b32 exec_lo, s0 1030; GFX11-NEXT: s_waitcnt vmcnt(0) 1031; GFX11-NEXT: s_setpc_b64 s[30:31] 1032; 1033; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_signext: 1034; GFX10-SCRATCH: ; %bb.0: 1035; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1036; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 1037; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 1038; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 1039; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 1040; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 1041; GFX10-SCRATCH-NEXT: global_load_ushort v0, v[0:1], off glc dlc 1042; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 1043; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 1044; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 1045; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 1046; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 1047; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i16_signext@rel32@lo+4 1048; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i16_signext@rel32@hi+12 1049; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 1050; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 1051; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 1052; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 1053; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 1054; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 1055; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 1056; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 1057; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 1058; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 1059; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 1060; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 1061; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 1062 %var = load volatile i16, i16 addrspace(1)* undef 1063 call amdgpu_gfx void @external_void_func_i16_signext(i16 signext %var) 1064 ret void 1065} 1066 1067define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { 1068; GFX9-LABEL: test_call_external_void_func_i16_zeroext: 1069; GFX9: ; %bb.0: 1070; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1071; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 1072; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1073; GFX9-NEXT: s_mov_b64 exec, s[34:35] 1074; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc 1075; GFX9-NEXT: s_waitcnt vmcnt(0) 1076; GFX9-NEXT: v_writelane_b32 v40, s33, 2 1077; GFX9-NEXT: s_mov_b32 s33, s32 1078; GFX9-NEXT: s_addk_i32 s32, 0x400 1079; GFX9-NEXT: v_writelane_b32 v40, s30, 0 1080; GFX9-NEXT: v_writelane_b32 v40, s31, 1 1081; GFX9-NEXT: s_getpc_b64 s[34:35] 1082; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i16_zeroext@rel32@lo+4 1083; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i16_zeroext@rel32@hi+12 1084; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 1085; GFX9-NEXT: v_readlane_b32 s31, v40, 1 1086; GFX9-NEXT: v_readlane_b32 s30, v40, 0 1087; GFX9-NEXT: s_addk_i32 s32, 0xfc00 1088; GFX9-NEXT: v_readlane_b32 s33, v40, 2 1089; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 1090; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1091; GFX9-NEXT: s_mov_b64 exec, s[34:35] 1092; GFX9-NEXT: s_waitcnt vmcnt(0) 1093; GFX9-NEXT: s_setpc_b64 s[30:31] 1094; 1095; GFX10-LABEL: test_call_external_void_func_i16_zeroext: 1096; GFX10: ; %bb.0: 1097; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1098; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1099; GFX10-NEXT: s_or_saveexec_b32 s34, -1 1100; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1101; GFX10-NEXT: s_waitcnt_depctr 0xffe3 1102; GFX10-NEXT: s_mov_b32 exec_lo, s34 1103; GFX10-NEXT: global_load_ushort v0, v[0:1], off glc dlc 1104; GFX10-NEXT: s_waitcnt vmcnt(0) 1105; GFX10-NEXT: v_writelane_b32 v40, s33, 2 1106; GFX10-NEXT: s_mov_b32 s33, s32 1107; GFX10-NEXT: s_addk_i32 s32, 0x200 1108; GFX10-NEXT: s_getpc_b64 s[34:35] 1109; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i16_zeroext@rel32@lo+4 1110; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i16_zeroext@rel32@hi+12 1111; GFX10-NEXT: v_writelane_b32 v40, s30, 0 1112; GFX10-NEXT: v_writelane_b32 v40, s31, 1 1113; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 1114; GFX10-NEXT: v_readlane_b32 s31, v40, 1 1115; GFX10-NEXT: v_readlane_b32 s30, v40, 0 1116; GFX10-NEXT: s_addk_i32 s32, 0xfe00 1117; GFX10-NEXT: v_readlane_b32 s33, v40, 2 1118; GFX10-NEXT: s_or_saveexec_b32 s34, -1 1119; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1120; GFX10-NEXT: s_waitcnt_depctr 0xffe3 1121; GFX10-NEXT: s_mov_b32 exec_lo, s34 1122; GFX10-NEXT: s_waitcnt vmcnt(0) 1123; GFX10-NEXT: s_setpc_b64 s[30:31] 1124; 1125; GFX11-LABEL: test_call_external_void_func_i16_zeroext: 1126; GFX11: ; %bb.0: 1127; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1128; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1129; GFX11-NEXT: s_or_saveexec_b32 s0, -1 1130; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 1131; GFX11-NEXT: s_mov_b32 exec_lo, s0 1132; GFX11-NEXT: global_load_u16 v0, v[0:1], off glc dlc 1133; GFX11-NEXT: s_waitcnt vmcnt(0) 1134; GFX11-NEXT: v_writelane_b32 v40, s33, 2 1135; GFX11-NEXT: s_mov_b32 s33, s32 1136; GFX11-NEXT: s_add_i32 s32, s32, 16 1137; GFX11-NEXT: s_getpc_b64 s[0:1] 1138; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i16_zeroext@rel32@lo+4 1139; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i16_zeroext@rel32@hi+12 1140; GFX11-NEXT: v_writelane_b32 v40, s30, 0 1141; GFX11-NEXT: v_writelane_b32 v40, s31, 1 1142; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 1143; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1144; GFX11-NEXT: v_readlane_b32 s31, v40, 1 1145; GFX11-NEXT: v_readlane_b32 s30, v40, 0 1146; GFX11-NEXT: s_add_i32 s32, s32, -16 1147; GFX11-NEXT: v_readlane_b32 s33, v40, 2 1148; GFX11-NEXT: s_or_saveexec_b32 s0, -1 1149; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 1150; GFX11-NEXT: s_mov_b32 exec_lo, s0 1151; GFX11-NEXT: s_waitcnt vmcnt(0) 1152; GFX11-NEXT: s_setpc_b64 s[30:31] 1153; 1154; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_zeroext: 1155; GFX10-SCRATCH: ; %bb.0: 1156; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1157; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 1158; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 1159; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 1160; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 1161; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 1162; GFX10-SCRATCH-NEXT: global_load_ushort v0, v[0:1], off glc dlc 1163; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 1164; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 1165; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 1166; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 1167; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 1168; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i16_zeroext@rel32@lo+4 1169; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i16_zeroext@rel32@hi+12 1170; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 1171; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 1172; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 1173; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 1174; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 1175; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 1176; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 1177; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 1178; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 1179; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 1180; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 1181; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 1182; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 1183 %var = load volatile i16, i16 addrspace(1)* undef 1184 call amdgpu_gfx void @external_void_func_i16_zeroext(i16 zeroext %var) 1185 ret void 1186} 1187 1188define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { 1189; GFX9-LABEL: test_call_external_void_func_i32_imm: 1190; GFX9: ; %bb.0: 1191; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1192; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 1193; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1194; GFX9-NEXT: s_mov_b64 exec, s[34:35] 1195; GFX9-NEXT: v_writelane_b32 v40, s33, 2 1196; GFX9-NEXT: s_mov_b32 s33, s32 1197; GFX9-NEXT: s_addk_i32 s32, 0x400 1198; GFX9-NEXT: v_writelane_b32 v40, s30, 0 1199; GFX9-NEXT: v_mov_b32_e32 v0, 42 1200; GFX9-NEXT: v_writelane_b32 v40, s31, 1 1201; GFX9-NEXT: s_getpc_b64 s[34:35] 1202; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i32@rel32@lo+4 1203; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i32@rel32@hi+12 1204; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 1205; GFX9-NEXT: v_readlane_b32 s31, v40, 1 1206; GFX9-NEXT: v_readlane_b32 s30, v40, 0 1207; GFX9-NEXT: s_addk_i32 s32, 0xfc00 1208; GFX9-NEXT: v_readlane_b32 s33, v40, 2 1209; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 1210; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1211; GFX9-NEXT: s_mov_b64 exec, s[34:35] 1212; GFX9-NEXT: s_waitcnt vmcnt(0) 1213; GFX9-NEXT: s_setpc_b64 s[30:31] 1214; 1215; GFX10-LABEL: test_call_external_void_func_i32_imm: 1216; GFX10: ; %bb.0: 1217; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1218; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1219; GFX10-NEXT: s_or_saveexec_b32 s34, -1 1220; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1221; GFX10-NEXT: s_waitcnt_depctr 0xffe3 1222; GFX10-NEXT: s_mov_b32 exec_lo, s34 1223; GFX10-NEXT: v_writelane_b32 v40, s33, 2 1224; GFX10-NEXT: v_mov_b32_e32 v0, 42 1225; GFX10-NEXT: s_mov_b32 s33, s32 1226; GFX10-NEXT: s_addk_i32 s32, 0x200 1227; GFX10-NEXT: s_getpc_b64 s[34:35] 1228; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i32@rel32@lo+4 1229; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i32@rel32@hi+12 1230; GFX10-NEXT: v_writelane_b32 v40, s30, 0 1231; GFX10-NEXT: v_writelane_b32 v40, s31, 1 1232; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 1233; GFX10-NEXT: v_readlane_b32 s31, v40, 1 1234; GFX10-NEXT: v_readlane_b32 s30, v40, 0 1235; GFX10-NEXT: s_addk_i32 s32, 0xfe00 1236; GFX10-NEXT: v_readlane_b32 s33, v40, 2 1237; GFX10-NEXT: s_or_saveexec_b32 s34, -1 1238; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1239; GFX10-NEXT: s_waitcnt_depctr 0xffe3 1240; GFX10-NEXT: s_mov_b32 exec_lo, s34 1241; GFX10-NEXT: s_waitcnt vmcnt(0) 1242; GFX10-NEXT: s_setpc_b64 s[30:31] 1243; 1244; GFX11-LABEL: test_call_external_void_func_i32_imm: 1245; GFX11: ; %bb.0: 1246; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1247; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1248; GFX11-NEXT: s_or_saveexec_b32 s0, -1 1249; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 1250; GFX11-NEXT: s_mov_b32 exec_lo, s0 1251; GFX11-NEXT: v_writelane_b32 v40, s33, 2 1252; GFX11-NEXT: v_mov_b32_e32 v0, 42 1253; GFX11-NEXT: s_mov_b32 s33, s32 1254; GFX11-NEXT: s_add_i32 s32, s32, 16 1255; GFX11-NEXT: s_getpc_b64 s[0:1] 1256; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i32@rel32@lo+4 1257; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i32@rel32@hi+12 1258; GFX11-NEXT: v_writelane_b32 v40, s30, 0 1259; GFX11-NEXT: v_writelane_b32 v40, s31, 1 1260; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 1261; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1262; GFX11-NEXT: v_readlane_b32 s31, v40, 1 1263; GFX11-NEXT: v_readlane_b32 s30, v40, 0 1264; GFX11-NEXT: s_add_i32 s32, s32, -16 1265; GFX11-NEXT: v_readlane_b32 s33, v40, 2 1266; GFX11-NEXT: s_or_saveexec_b32 s0, -1 1267; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 1268; GFX11-NEXT: s_mov_b32 exec_lo, s0 1269; GFX11-NEXT: s_waitcnt vmcnt(0) 1270; GFX11-NEXT: s_setpc_b64 s[30:31] 1271; 1272; GFX10-SCRATCH-LABEL: test_call_external_void_func_i32_imm: 1273; GFX10-SCRATCH: ; %bb.0: 1274; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1275; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 1276; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 1277; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 1278; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 1279; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 1280; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 1281; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 42 1282; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 1283; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 1284; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 1285; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i32@rel32@lo+4 1286; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i32@rel32@hi+12 1287; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 1288; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 1289; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 1290; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 1291; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 1292; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 1293; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 1294; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 1295; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 1296; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 1297; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 1298; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 1299; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 1300 call amdgpu_gfx void @external_void_func_i32(i32 42) 1301 ret void 1302} 1303 1304define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { 1305; GFX9-LABEL: test_call_external_void_func_i64_imm: 1306; GFX9: ; %bb.0: 1307; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1308; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 1309; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1310; GFX9-NEXT: s_mov_b64 exec, s[34:35] 1311; GFX9-NEXT: v_writelane_b32 v40, s33, 2 1312; GFX9-NEXT: s_mov_b32 s33, s32 1313; GFX9-NEXT: s_addk_i32 s32, 0x400 1314; GFX9-NEXT: v_writelane_b32 v40, s30, 0 1315; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b 1316; GFX9-NEXT: v_mov_b32_e32 v1, 0 1317; GFX9-NEXT: v_writelane_b32 v40, s31, 1 1318; GFX9-NEXT: s_getpc_b64 s[34:35] 1319; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i64@rel32@lo+4 1320; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i64@rel32@hi+12 1321; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 1322; GFX9-NEXT: v_readlane_b32 s31, v40, 1 1323; GFX9-NEXT: v_readlane_b32 s30, v40, 0 1324; GFX9-NEXT: s_addk_i32 s32, 0xfc00 1325; GFX9-NEXT: v_readlane_b32 s33, v40, 2 1326; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 1327; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1328; GFX9-NEXT: s_mov_b64 exec, s[34:35] 1329; GFX9-NEXT: s_waitcnt vmcnt(0) 1330; GFX9-NEXT: s_setpc_b64 s[30:31] 1331; 1332; GFX10-LABEL: test_call_external_void_func_i64_imm: 1333; GFX10: ; %bb.0: 1334; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1335; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1336; GFX10-NEXT: s_or_saveexec_b32 s34, -1 1337; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1338; GFX10-NEXT: s_waitcnt_depctr 0xffe3 1339; GFX10-NEXT: s_mov_b32 exec_lo, s34 1340; GFX10-NEXT: v_writelane_b32 v40, s33, 2 1341; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b 1342; GFX10-NEXT: v_mov_b32_e32 v1, 0 1343; GFX10-NEXT: s_mov_b32 s33, s32 1344; GFX10-NEXT: s_addk_i32 s32, 0x200 1345; GFX10-NEXT: v_writelane_b32 v40, s30, 0 1346; GFX10-NEXT: s_getpc_b64 s[34:35] 1347; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i64@rel32@lo+4 1348; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i64@rel32@hi+12 1349; GFX10-NEXT: v_writelane_b32 v40, s31, 1 1350; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 1351; GFX10-NEXT: v_readlane_b32 s31, v40, 1 1352; GFX10-NEXT: v_readlane_b32 s30, v40, 0 1353; GFX10-NEXT: s_addk_i32 s32, 0xfe00 1354; GFX10-NEXT: v_readlane_b32 s33, v40, 2 1355; GFX10-NEXT: s_or_saveexec_b32 s34, -1 1356; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1357; GFX10-NEXT: s_waitcnt_depctr 0xffe3 1358; GFX10-NEXT: s_mov_b32 exec_lo, s34 1359; GFX10-NEXT: s_waitcnt vmcnt(0) 1360; GFX10-NEXT: s_setpc_b64 s[30:31] 1361; 1362; GFX11-LABEL: test_call_external_void_func_i64_imm: 1363; GFX11: ; %bb.0: 1364; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1365; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1366; GFX11-NEXT: s_or_saveexec_b32 s0, -1 1367; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 1368; GFX11-NEXT: s_mov_b32 exec_lo, s0 1369; GFX11-NEXT: v_writelane_b32 v40, s33, 2 1370; GFX11-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0 1371; GFX11-NEXT: s_mov_b32 s33, s32 1372; GFX11-NEXT: s_add_i32 s32, s32, 16 1373; GFX11-NEXT: v_writelane_b32 v40, s30, 0 1374; GFX11-NEXT: s_getpc_b64 s[0:1] 1375; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i64@rel32@lo+4 1376; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i64@rel32@hi+12 1377; GFX11-NEXT: v_writelane_b32 v40, s31, 1 1378; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 1379; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1380; GFX11-NEXT: v_readlane_b32 s31, v40, 1 1381; GFX11-NEXT: v_readlane_b32 s30, v40, 0 1382; GFX11-NEXT: s_add_i32 s32, s32, -16 1383; GFX11-NEXT: v_readlane_b32 s33, v40, 2 1384; GFX11-NEXT: s_or_saveexec_b32 s0, -1 1385; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 1386; GFX11-NEXT: s_mov_b32 exec_lo, s0 1387; GFX11-NEXT: s_waitcnt vmcnt(0) 1388; GFX11-NEXT: s_setpc_b64 s[30:31] 1389; 1390; GFX10-SCRATCH-LABEL: test_call_external_void_func_i64_imm: 1391; GFX10-SCRATCH: ; %bb.0: 1392; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1393; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 1394; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 1395; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 1396; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 1397; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 1398; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 1399; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x7b 1400; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 1401; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 1402; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 1403; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 1404; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 1405; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i64@rel32@lo+4 1406; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i64@rel32@hi+12 1407; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 1408; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 1409; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 1410; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 1411; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 1412; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 1413; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 1414; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 1415; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 1416; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 1417; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 1418; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 1419 call amdgpu_gfx void @external_void_func_i64(i64 123) 1420 ret void 1421} 1422 1423define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { 1424; GFX9-LABEL: test_call_external_void_func_v2i64: 1425; GFX9: ; %bb.0: 1426; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1427; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 1428; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1429; GFX9-NEXT: s_mov_b64 exec, s[34:35] 1430; GFX9-NEXT: v_mov_b32_e32 v0, 0 1431; GFX9-NEXT: v_mov_b32_e32 v1, 0 1432; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 1433; GFX9-NEXT: v_writelane_b32 v40, s33, 2 1434; GFX9-NEXT: s_mov_b32 s33, s32 1435; GFX9-NEXT: s_addk_i32 s32, 0x400 1436; GFX9-NEXT: v_writelane_b32 v40, s30, 0 1437; GFX9-NEXT: v_writelane_b32 v40, s31, 1 1438; GFX9-NEXT: s_getpc_b64 s[34:35] 1439; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4 1440; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12 1441; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 1442; GFX9-NEXT: v_readlane_b32 s31, v40, 1 1443; GFX9-NEXT: v_readlane_b32 s30, v40, 0 1444; GFX9-NEXT: s_addk_i32 s32, 0xfc00 1445; GFX9-NEXT: v_readlane_b32 s33, v40, 2 1446; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 1447; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1448; GFX9-NEXT: s_mov_b64 exec, s[34:35] 1449; GFX9-NEXT: s_waitcnt vmcnt(0) 1450; GFX9-NEXT: s_setpc_b64 s[30:31] 1451; 1452; GFX10-LABEL: test_call_external_void_func_v2i64: 1453; GFX10: ; %bb.0: 1454; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1455; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1456; GFX10-NEXT: s_or_saveexec_b32 s34, -1 1457; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1458; GFX10-NEXT: s_waitcnt_depctr 0xffe3 1459; GFX10-NEXT: s_mov_b32 exec_lo, s34 1460; GFX10-NEXT: v_mov_b32_e32 v0, 0 1461; GFX10-NEXT: v_mov_b32_e32 v1, 0 1462; GFX10-NEXT: v_writelane_b32 v40, s33, 2 1463; GFX10-NEXT: s_mov_b32 s33, s32 1464; GFX10-NEXT: s_addk_i32 s32, 0x200 1465; GFX10-NEXT: s_getpc_b64 s[34:35] 1466; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4 1467; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12 1468; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 1469; GFX10-NEXT: v_writelane_b32 v40, s30, 0 1470; GFX10-NEXT: v_writelane_b32 v40, s31, 1 1471; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 1472; GFX10-NEXT: v_readlane_b32 s31, v40, 1 1473; GFX10-NEXT: v_readlane_b32 s30, v40, 0 1474; GFX10-NEXT: s_addk_i32 s32, 0xfe00 1475; GFX10-NEXT: v_readlane_b32 s33, v40, 2 1476; GFX10-NEXT: s_or_saveexec_b32 s34, -1 1477; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1478; GFX10-NEXT: s_waitcnt_depctr 0xffe3 1479; GFX10-NEXT: s_mov_b32 exec_lo, s34 1480; GFX10-NEXT: s_waitcnt vmcnt(0) 1481; GFX10-NEXT: s_setpc_b64 s[30:31] 1482; 1483; GFX11-LABEL: test_call_external_void_func_v2i64: 1484; GFX11: ; %bb.0: 1485; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1486; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1487; GFX11-NEXT: s_or_saveexec_b32 s0, -1 1488; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 1489; GFX11-NEXT: s_mov_b32 exec_lo, s0 1490; GFX11-NEXT: v_mov_b32_e32 v0, 0 1491; GFX11-NEXT: v_mov_b32_e32 v1, 0 1492; GFX11-NEXT: v_writelane_b32 v40, s33, 2 1493; GFX11-NEXT: s_mov_b32 s33, s32 1494; GFX11-NEXT: s_add_i32 s32, s32, 16 1495; GFX11-NEXT: s_getpc_b64 s[0:1] 1496; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i64@rel32@lo+4 1497; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64@rel32@hi+12 1498; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off 1499; GFX11-NEXT: v_writelane_b32 v40, s30, 0 1500; GFX11-NEXT: v_writelane_b32 v40, s31, 1 1501; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 1502; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1503; GFX11-NEXT: v_readlane_b32 s31, v40, 1 1504; GFX11-NEXT: v_readlane_b32 s30, v40, 0 1505; GFX11-NEXT: s_add_i32 s32, s32, -16 1506; GFX11-NEXT: v_readlane_b32 s33, v40, 2 1507; GFX11-NEXT: s_or_saveexec_b32 s0, -1 1508; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 1509; GFX11-NEXT: s_mov_b32 exec_lo, s0 1510; GFX11-NEXT: s_waitcnt vmcnt(0) 1511; GFX11-NEXT: s_setpc_b64 s[30:31] 1512; 1513; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64: 1514; GFX10-SCRATCH: ; %bb.0: 1515; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1516; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 1517; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 1518; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 1519; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 1520; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 1521; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 1522; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 1523; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 1524; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 1525; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 1526; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 1527; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i64@rel32@lo+4 1528; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64@rel32@hi+12 1529; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 1530; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 1531; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 1532; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 1533; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 1534; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 1535; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 1536; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 1537; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 1538; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 1539; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 1540; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 1541; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 1542; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 1543 %val = load <2 x i64>, <2 x i64> addrspace(1)* null 1544 call amdgpu_gfx void @external_void_func_v2i64(<2 x i64> %val) 1545 ret void 1546} 1547 1548define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { 1549; GFX9-LABEL: test_call_external_void_func_v2i64_imm: 1550; GFX9: ; %bb.0: 1551; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1552; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 1553; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1554; GFX9-NEXT: s_mov_b64 exec, s[34:35] 1555; GFX9-NEXT: v_writelane_b32 v40, s33, 2 1556; GFX9-NEXT: s_mov_b32 s33, s32 1557; GFX9-NEXT: s_addk_i32 s32, 0x400 1558; GFX9-NEXT: v_writelane_b32 v40, s30, 0 1559; GFX9-NEXT: v_mov_b32_e32 v0, 1 1560; GFX9-NEXT: v_mov_b32_e32 v1, 2 1561; GFX9-NEXT: v_mov_b32_e32 v2, 3 1562; GFX9-NEXT: v_mov_b32_e32 v3, 4 1563; GFX9-NEXT: v_writelane_b32 v40, s31, 1 1564; GFX9-NEXT: s_getpc_b64 s[34:35] 1565; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4 1566; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12 1567; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 1568; GFX9-NEXT: v_readlane_b32 s31, v40, 1 1569; GFX9-NEXT: v_readlane_b32 s30, v40, 0 1570; GFX9-NEXT: s_addk_i32 s32, 0xfc00 1571; GFX9-NEXT: v_readlane_b32 s33, v40, 2 1572; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 1573; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1574; GFX9-NEXT: s_mov_b64 exec, s[34:35] 1575; GFX9-NEXT: s_waitcnt vmcnt(0) 1576; GFX9-NEXT: s_setpc_b64 s[30:31] 1577; 1578; GFX10-LABEL: test_call_external_void_func_v2i64_imm: 1579; GFX10: ; %bb.0: 1580; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1581; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1582; GFX10-NEXT: s_or_saveexec_b32 s34, -1 1583; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1584; GFX10-NEXT: s_waitcnt_depctr 0xffe3 1585; GFX10-NEXT: s_mov_b32 exec_lo, s34 1586; GFX10-NEXT: v_writelane_b32 v40, s33, 2 1587; GFX10-NEXT: v_mov_b32_e32 v0, 1 1588; GFX10-NEXT: v_mov_b32_e32 v1, 2 1589; GFX10-NEXT: v_mov_b32_e32 v2, 3 1590; GFX10-NEXT: v_mov_b32_e32 v3, 4 1591; GFX10-NEXT: v_writelane_b32 v40, s30, 0 1592; GFX10-NEXT: s_mov_b32 s33, s32 1593; GFX10-NEXT: s_addk_i32 s32, 0x200 1594; GFX10-NEXT: s_getpc_b64 s[34:35] 1595; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4 1596; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12 1597; GFX10-NEXT: v_writelane_b32 v40, s31, 1 1598; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 1599; GFX10-NEXT: v_readlane_b32 s31, v40, 1 1600; GFX10-NEXT: v_readlane_b32 s30, v40, 0 1601; GFX10-NEXT: s_addk_i32 s32, 0xfe00 1602; GFX10-NEXT: v_readlane_b32 s33, v40, 2 1603; GFX10-NEXT: s_or_saveexec_b32 s34, -1 1604; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1605; GFX10-NEXT: s_waitcnt_depctr 0xffe3 1606; GFX10-NEXT: s_mov_b32 exec_lo, s34 1607; GFX10-NEXT: s_waitcnt vmcnt(0) 1608; GFX10-NEXT: s_setpc_b64 s[30:31] 1609; 1610; GFX11-LABEL: test_call_external_void_func_v2i64_imm: 1611; GFX11: ; %bb.0: 1612; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1613; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1614; GFX11-NEXT: s_or_saveexec_b32 s0, -1 1615; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 1616; GFX11-NEXT: s_mov_b32 exec_lo, s0 1617; GFX11-NEXT: v_writelane_b32 v40, s33, 2 1618; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 1619; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 1620; GFX11-NEXT: v_writelane_b32 v40, s30, 0 1621; GFX11-NEXT: s_mov_b32 s33, s32 1622; GFX11-NEXT: s_add_i32 s32, s32, 16 1623; GFX11-NEXT: s_getpc_b64 s[0:1] 1624; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i64@rel32@lo+4 1625; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64@rel32@hi+12 1626; GFX11-NEXT: v_writelane_b32 v40, s31, 1 1627; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 1628; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1629; GFX11-NEXT: v_readlane_b32 s31, v40, 1 1630; GFX11-NEXT: v_readlane_b32 s30, v40, 0 1631; GFX11-NEXT: s_add_i32 s32, s32, -16 1632; GFX11-NEXT: v_readlane_b32 s33, v40, 2 1633; GFX11-NEXT: s_or_saveexec_b32 s0, -1 1634; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 1635; GFX11-NEXT: s_mov_b32 exec_lo, s0 1636; GFX11-NEXT: s_waitcnt vmcnt(0) 1637; GFX11-NEXT: s_setpc_b64 s[30:31] 1638; 1639; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64_imm: 1640; GFX10-SCRATCH: ; %bb.0: 1641; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1642; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 1643; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 1644; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 1645; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 1646; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 1647; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 1648; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 1649; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2 1650; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3 1651; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4 1652; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 1653; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 1654; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 1655; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 1656; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i64@rel32@lo+4 1657; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64@rel32@hi+12 1658; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 1659; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 1660; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 1661; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 1662; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 1663; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 1664; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 1665; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 1666; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 1667; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 1668; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 1669; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 1670 call amdgpu_gfx void @external_void_func_v2i64(<2 x i64> <i64 8589934593, i64 17179869187>) 1671 ret void 1672} 1673 1674define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { 1675; GFX9-LABEL: test_call_external_void_func_v3i64: 1676; GFX9: ; %bb.0: 1677; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1678; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 1679; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1680; GFX9-NEXT: s_mov_b64 exec, s[34:35] 1681; GFX9-NEXT: v_mov_b32_e32 v0, 0 1682; GFX9-NEXT: v_mov_b32_e32 v1, 0 1683; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 1684; GFX9-NEXT: v_writelane_b32 v40, s33, 2 1685; GFX9-NEXT: s_mov_b32 s33, s32 1686; GFX9-NEXT: s_addk_i32 s32, 0x400 1687; GFX9-NEXT: v_writelane_b32 v40, s30, 0 1688; GFX9-NEXT: v_mov_b32_e32 v4, 1 1689; GFX9-NEXT: v_mov_b32_e32 v5, 2 1690; GFX9-NEXT: v_writelane_b32 v40, s31, 1 1691; GFX9-NEXT: s_getpc_b64 s[34:35] 1692; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i64@rel32@lo+4 1693; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64@rel32@hi+12 1694; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 1695; GFX9-NEXT: v_readlane_b32 s31, v40, 1 1696; GFX9-NEXT: v_readlane_b32 s30, v40, 0 1697; GFX9-NEXT: s_addk_i32 s32, 0xfc00 1698; GFX9-NEXT: v_readlane_b32 s33, v40, 2 1699; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 1700; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1701; GFX9-NEXT: s_mov_b64 exec, s[34:35] 1702; GFX9-NEXT: s_waitcnt vmcnt(0) 1703; GFX9-NEXT: s_setpc_b64 s[30:31] 1704; 1705; GFX10-LABEL: test_call_external_void_func_v3i64: 1706; GFX10: ; %bb.0: 1707; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1708; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1709; GFX10-NEXT: s_or_saveexec_b32 s34, -1 1710; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1711; GFX10-NEXT: s_waitcnt_depctr 0xffe3 1712; GFX10-NEXT: s_mov_b32 exec_lo, s34 1713; GFX10-NEXT: v_mov_b32_e32 v0, 0 1714; GFX10-NEXT: v_mov_b32_e32 v1, 0 1715; GFX10-NEXT: v_writelane_b32 v40, s33, 2 1716; GFX10-NEXT: v_mov_b32_e32 v4, 1 1717; GFX10-NEXT: v_mov_b32_e32 v5, 2 1718; GFX10-NEXT: s_mov_b32 s33, s32 1719; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 1720; GFX10-NEXT: v_writelane_b32 v40, s30, 0 1721; GFX10-NEXT: s_addk_i32 s32, 0x200 1722; GFX10-NEXT: s_getpc_b64 s[34:35] 1723; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i64@rel32@lo+4 1724; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64@rel32@hi+12 1725; GFX10-NEXT: v_writelane_b32 v40, s31, 1 1726; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 1727; GFX10-NEXT: v_readlane_b32 s31, v40, 1 1728; GFX10-NEXT: v_readlane_b32 s30, v40, 0 1729; GFX10-NEXT: s_addk_i32 s32, 0xfe00 1730; GFX10-NEXT: v_readlane_b32 s33, v40, 2 1731; GFX10-NEXT: s_or_saveexec_b32 s34, -1 1732; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1733; GFX10-NEXT: s_waitcnt_depctr 0xffe3 1734; GFX10-NEXT: s_mov_b32 exec_lo, s34 1735; GFX10-NEXT: s_waitcnt vmcnt(0) 1736; GFX10-NEXT: s_setpc_b64 s[30:31] 1737; 1738; GFX11-LABEL: test_call_external_void_func_v3i64: 1739; GFX11: ; %bb.0: 1740; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1741; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1742; GFX11-NEXT: s_or_saveexec_b32 s0, -1 1743; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 1744; GFX11-NEXT: s_mov_b32 exec_lo, s0 1745; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v5, 2 1746; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 1 1747; GFX11-NEXT: v_writelane_b32 v40, s33, 2 1748; GFX11-NEXT: s_mov_b32 s33, s32 1749; GFX11-NEXT: s_add_i32 s32, s32, 16 1750; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off 1751; GFX11-NEXT: s_getpc_b64 s[0:1] 1752; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i64@rel32@lo+4 1753; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i64@rel32@hi+12 1754; GFX11-NEXT: v_writelane_b32 v40, s30, 0 1755; GFX11-NEXT: v_writelane_b32 v40, s31, 1 1756; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 1757; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1758; GFX11-NEXT: v_readlane_b32 s31, v40, 1 1759; GFX11-NEXT: v_readlane_b32 s30, v40, 0 1760; GFX11-NEXT: s_add_i32 s32, s32, -16 1761; GFX11-NEXT: v_readlane_b32 s33, v40, 2 1762; GFX11-NEXT: s_or_saveexec_b32 s0, -1 1763; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 1764; GFX11-NEXT: s_mov_b32 exec_lo, s0 1765; GFX11-NEXT: s_waitcnt vmcnt(0) 1766; GFX11-NEXT: s_setpc_b64 s[30:31] 1767; 1768; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i64: 1769; GFX10-SCRATCH: ; %bb.0: 1770; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1771; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 1772; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 1773; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 1774; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 1775; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 1776; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 1777; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 1778; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 1779; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 1 1780; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 2 1781; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 1782; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 1783; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 1784; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 1785; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 1786; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i64@rel32@lo+4 1787; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i64@rel32@hi+12 1788; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 1789; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 1790; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 1791; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 1792; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 1793; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 1794; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 1795; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 1796; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 1797; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 1798; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 1799; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 1800 %load = load <2 x i64>, <2 x i64> addrspace(1)* null 1801 %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2> 1802 1803 call amdgpu_gfx void @external_void_func_v3i64(<3 x i64> %val) 1804 ret void 1805} 1806 1807define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { 1808; GFX9-LABEL: test_call_external_void_func_v4i64: 1809; GFX9: ; %bb.0: 1810; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1811; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 1812; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1813; GFX9-NEXT: s_mov_b64 exec, s[34:35] 1814; GFX9-NEXT: v_mov_b32_e32 v0, 0 1815; GFX9-NEXT: v_mov_b32_e32 v1, 0 1816; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 1817; GFX9-NEXT: v_writelane_b32 v40, s33, 2 1818; GFX9-NEXT: s_mov_b32 s33, s32 1819; GFX9-NEXT: s_addk_i32 s32, 0x400 1820; GFX9-NEXT: v_writelane_b32 v40, s30, 0 1821; GFX9-NEXT: v_mov_b32_e32 v4, 1 1822; GFX9-NEXT: v_mov_b32_e32 v5, 2 1823; GFX9-NEXT: v_mov_b32_e32 v6, 3 1824; GFX9-NEXT: v_mov_b32_e32 v7, 4 1825; GFX9-NEXT: v_writelane_b32 v40, s31, 1 1826; GFX9-NEXT: s_getpc_b64 s[34:35] 1827; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i64@rel32@lo+4 1828; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64@rel32@hi+12 1829; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 1830; GFX9-NEXT: v_readlane_b32 s31, v40, 1 1831; GFX9-NEXT: v_readlane_b32 s30, v40, 0 1832; GFX9-NEXT: s_addk_i32 s32, 0xfc00 1833; GFX9-NEXT: v_readlane_b32 s33, v40, 2 1834; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 1835; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1836; GFX9-NEXT: s_mov_b64 exec, s[34:35] 1837; GFX9-NEXT: s_waitcnt vmcnt(0) 1838; GFX9-NEXT: s_setpc_b64 s[30:31] 1839; 1840; GFX10-LABEL: test_call_external_void_func_v4i64: 1841; GFX10: ; %bb.0: 1842; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1843; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1844; GFX10-NEXT: s_or_saveexec_b32 s34, -1 1845; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1846; GFX10-NEXT: s_waitcnt_depctr 0xffe3 1847; GFX10-NEXT: s_mov_b32 exec_lo, s34 1848; GFX10-NEXT: v_mov_b32_e32 v0, 0 1849; GFX10-NEXT: v_mov_b32_e32 v1, 0 1850; GFX10-NEXT: v_writelane_b32 v40, s33, 2 1851; GFX10-NEXT: v_mov_b32_e32 v4, 1 1852; GFX10-NEXT: v_mov_b32_e32 v5, 2 1853; GFX10-NEXT: v_mov_b32_e32 v6, 3 1854; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 1855; GFX10-NEXT: v_writelane_b32 v40, s30, 0 1856; GFX10-NEXT: v_mov_b32_e32 v7, 4 1857; GFX10-NEXT: s_mov_b32 s33, s32 1858; GFX10-NEXT: s_addk_i32 s32, 0x200 1859; GFX10-NEXT: s_getpc_b64 s[34:35] 1860; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i64@rel32@lo+4 1861; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64@rel32@hi+12 1862; GFX10-NEXT: v_writelane_b32 v40, s31, 1 1863; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 1864; GFX10-NEXT: v_readlane_b32 s31, v40, 1 1865; GFX10-NEXT: v_readlane_b32 s30, v40, 0 1866; GFX10-NEXT: s_addk_i32 s32, 0xfe00 1867; GFX10-NEXT: v_readlane_b32 s33, v40, 2 1868; GFX10-NEXT: s_or_saveexec_b32 s34, -1 1869; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1870; GFX10-NEXT: s_waitcnt_depctr 0xffe3 1871; GFX10-NEXT: s_mov_b32 exec_lo, s34 1872; GFX10-NEXT: s_waitcnt vmcnt(0) 1873; GFX10-NEXT: s_setpc_b64 s[30:31] 1874; 1875; GFX11-LABEL: test_call_external_void_func_v4i64: 1876; GFX11: ; %bb.0: 1877; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1878; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1879; GFX11-NEXT: s_or_saveexec_b32 s0, -1 1880; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 1881; GFX11-NEXT: s_mov_b32 exec_lo, s0 1882; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v5, 2 1883; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 1 1884; GFX11-NEXT: v_writelane_b32 v40, s33, 2 1885; GFX11-NEXT: v_dual_mov_b32 v6, 3 :: v_dual_mov_b32 v7, 4 1886; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off 1887; GFX11-NEXT: s_mov_b32 s33, s32 1888; GFX11-NEXT: v_writelane_b32 v40, s30, 0 1889; GFX11-NEXT: s_add_i32 s32, s32, 16 1890; GFX11-NEXT: s_getpc_b64 s[0:1] 1891; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i64@rel32@lo+4 1892; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i64@rel32@hi+12 1893; GFX11-NEXT: v_writelane_b32 v40, s31, 1 1894; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 1895; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1896; GFX11-NEXT: v_readlane_b32 s31, v40, 1 1897; GFX11-NEXT: v_readlane_b32 s30, v40, 0 1898; GFX11-NEXT: s_add_i32 s32, s32, -16 1899; GFX11-NEXT: v_readlane_b32 s33, v40, 2 1900; GFX11-NEXT: s_or_saveexec_b32 s0, -1 1901; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 1902; GFX11-NEXT: s_mov_b32 exec_lo, s0 1903; GFX11-NEXT: s_waitcnt vmcnt(0) 1904; GFX11-NEXT: s_setpc_b64 s[30:31] 1905; 1906; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i64: 1907; GFX10-SCRATCH: ; %bb.0: 1908; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1909; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 1910; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 1911; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 1912; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 1913; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 1914; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 1915; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 1916; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 1917; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 1 1918; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 2 1919; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 3 1920; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 1921; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 1922; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 4 1923; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 1924; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 1925; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 1926; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i64@rel32@lo+4 1927; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i64@rel32@hi+12 1928; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 1929; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 1930; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 1931; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 1932; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 1933; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 1934; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 1935; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 1936; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 1937; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 1938; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 1939; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 1940 %load = load <2 x i64>, <2 x i64> addrspace(1)* null 1941 %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 17179869187>, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1942 call amdgpu_gfx void @external_void_func_v4i64(<4 x i64> %val) 1943 ret void 1944} 1945 1946define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { 1947; GFX9-LABEL: test_call_external_void_func_f16_imm: 1948; GFX9: ; %bb.0: 1949; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1950; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 1951; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1952; GFX9-NEXT: s_mov_b64 exec, s[34:35] 1953; GFX9-NEXT: v_writelane_b32 v40, s33, 2 1954; GFX9-NEXT: s_mov_b32 s33, s32 1955; GFX9-NEXT: s_addk_i32 s32, 0x400 1956; GFX9-NEXT: v_writelane_b32 v40, s30, 0 1957; GFX9-NEXT: v_mov_b32_e32 v0, 0x4400 1958; GFX9-NEXT: v_writelane_b32 v40, s31, 1 1959; GFX9-NEXT: s_getpc_b64 s[34:35] 1960; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f16@rel32@lo+4 1961; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f16@rel32@hi+12 1962; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 1963; GFX9-NEXT: v_readlane_b32 s31, v40, 1 1964; GFX9-NEXT: v_readlane_b32 s30, v40, 0 1965; GFX9-NEXT: s_addk_i32 s32, 0xfc00 1966; GFX9-NEXT: v_readlane_b32 s33, v40, 2 1967; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 1968; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1969; GFX9-NEXT: s_mov_b64 exec, s[34:35] 1970; GFX9-NEXT: s_waitcnt vmcnt(0) 1971; GFX9-NEXT: s_setpc_b64 s[30:31] 1972; 1973; GFX10-LABEL: test_call_external_void_func_f16_imm: 1974; GFX10: ; %bb.0: 1975; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1976; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1977; GFX10-NEXT: s_or_saveexec_b32 s34, -1 1978; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1979; GFX10-NEXT: s_waitcnt_depctr 0xffe3 1980; GFX10-NEXT: s_mov_b32 exec_lo, s34 1981; GFX10-NEXT: v_writelane_b32 v40, s33, 2 1982; GFX10-NEXT: v_mov_b32_e32 v0, 0x4400 1983; GFX10-NEXT: s_mov_b32 s33, s32 1984; GFX10-NEXT: s_addk_i32 s32, 0x200 1985; GFX10-NEXT: s_getpc_b64 s[34:35] 1986; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f16@rel32@lo+4 1987; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f16@rel32@hi+12 1988; GFX10-NEXT: v_writelane_b32 v40, s30, 0 1989; GFX10-NEXT: v_writelane_b32 v40, s31, 1 1990; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 1991; GFX10-NEXT: v_readlane_b32 s31, v40, 1 1992; GFX10-NEXT: v_readlane_b32 s30, v40, 0 1993; GFX10-NEXT: s_addk_i32 s32, 0xfe00 1994; GFX10-NEXT: v_readlane_b32 s33, v40, 2 1995; GFX10-NEXT: s_or_saveexec_b32 s34, -1 1996; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 1997; GFX10-NEXT: s_waitcnt_depctr 0xffe3 1998; GFX10-NEXT: s_mov_b32 exec_lo, s34 1999; GFX10-NEXT: s_waitcnt vmcnt(0) 2000; GFX10-NEXT: s_setpc_b64 s[30:31] 2001; 2002; GFX11-LABEL: test_call_external_void_func_f16_imm: 2003; GFX11: ; %bb.0: 2004; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2005; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2006; GFX11-NEXT: s_or_saveexec_b32 s0, -1 2007; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 2008; GFX11-NEXT: s_mov_b32 exec_lo, s0 2009; GFX11-NEXT: v_writelane_b32 v40, s33, 2 2010; GFX11-NEXT: v_mov_b32_e32 v0, 0x4400 2011; GFX11-NEXT: s_mov_b32 s33, s32 2012; GFX11-NEXT: s_add_i32 s32, s32, 16 2013; GFX11-NEXT: s_getpc_b64 s[0:1] 2014; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_f16@rel32@lo+4 2015; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_f16@rel32@hi+12 2016; GFX11-NEXT: v_writelane_b32 v40, s30, 0 2017; GFX11-NEXT: v_writelane_b32 v40, s31, 1 2018; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 2019; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2020; GFX11-NEXT: v_readlane_b32 s31, v40, 1 2021; GFX11-NEXT: v_readlane_b32 s30, v40, 0 2022; GFX11-NEXT: s_add_i32 s32, s32, -16 2023; GFX11-NEXT: v_readlane_b32 s33, v40, 2 2024; GFX11-NEXT: s_or_saveexec_b32 s0, -1 2025; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 2026; GFX11-NEXT: s_mov_b32 exec_lo, s0 2027; GFX11-NEXT: s_waitcnt vmcnt(0) 2028; GFX11-NEXT: s_setpc_b64 s[30:31] 2029; 2030; GFX10-SCRATCH-LABEL: test_call_external_void_func_f16_imm: 2031; GFX10-SCRATCH: ; %bb.0: 2032; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2033; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 2034; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 2035; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 2036; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 2037; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 2038; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 2039; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x4400 2040; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 2041; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 2042; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 2043; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f16@rel32@lo+4 2044; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f16@rel32@hi+12 2045; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 2046; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 2047; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 2048; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 2049; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 2050; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 2051; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 2052; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 2053; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 2054; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 2055; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 2056; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 2057; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 2058 call amdgpu_gfx void @external_void_func_f16(half 4.0) 2059 ret void 2060} 2061 2062define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { 2063; GFX9-LABEL: test_call_external_void_func_f32_imm: 2064; GFX9: ; %bb.0: 2065; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2066; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 2067; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 2068; GFX9-NEXT: s_mov_b64 exec, s[34:35] 2069; GFX9-NEXT: v_writelane_b32 v40, s33, 2 2070; GFX9-NEXT: s_mov_b32 s33, s32 2071; GFX9-NEXT: s_addk_i32 s32, 0x400 2072; GFX9-NEXT: v_writelane_b32 v40, s30, 0 2073; GFX9-NEXT: v_mov_b32_e32 v0, 4.0 2074; GFX9-NEXT: v_writelane_b32 v40, s31, 1 2075; GFX9-NEXT: s_getpc_b64 s[34:35] 2076; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f32@rel32@lo+4 2077; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f32@rel32@hi+12 2078; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 2079; GFX9-NEXT: v_readlane_b32 s31, v40, 1 2080; GFX9-NEXT: v_readlane_b32 s30, v40, 0 2081; GFX9-NEXT: s_addk_i32 s32, 0xfc00 2082; GFX9-NEXT: v_readlane_b32 s33, v40, 2 2083; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 2084; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 2085; GFX9-NEXT: s_mov_b64 exec, s[34:35] 2086; GFX9-NEXT: s_waitcnt vmcnt(0) 2087; GFX9-NEXT: s_setpc_b64 s[30:31] 2088; 2089; GFX10-LABEL: test_call_external_void_func_f32_imm: 2090; GFX10: ; %bb.0: 2091; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2092; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2093; GFX10-NEXT: s_or_saveexec_b32 s34, -1 2094; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 2095; GFX10-NEXT: s_waitcnt_depctr 0xffe3 2096; GFX10-NEXT: s_mov_b32 exec_lo, s34 2097; GFX10-NEXT: v_writelane_b32 v40, s33, 2 2098; GFX10-NEXT: v_mov_b32_e32 v0, 4.0 2099; GFX10-NEXT: s_mov_b32 s33, s32 2100; GFX10-NEXT: s_addk_i32 s32, 0x200 2101; GFX10-NEXT: s_getpc_b64 s[34:35] 2102; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f32@rel32@lo+4 2103; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f32@rel32@hi+12 2104; GFX10-NEXT: v_writelane_b32 v40, s30, 0 2105; GFX10-NEXT: v_writelane_b32 v40, s31, 1 2106; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 2107; GFX10-NEXT: v_readlane_b32 s31, v40, 1 2108; GFX10-NEXT: v_readlane_b32 s30, v40, 0 2109; GFX10-NEXT: s_addk_i32 s32, 0xfe00 2110; GFX10-NEXT: v_readlane_b32 s33, v40, 2 2111; GFX10-NEXT: s_or_saveexec_b32 s34, -1 2112; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 2113; GFX10-NEXT: s_waitcnt_depctr 0xffe3 2114; GFX10-NEXT: s_mov_b32 exec_lo, s34 2115; GFX10-NEXT: s_waitcnt vmcnt(0) 2116; GFX10-NEXT: s_setpc_b64 s[30:31] 2117; 2118; GFX11-LABEL: test_call_external_void_func_f32_imm: 2119; GFX11: ; %bb.0: 2120; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2121; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2122; GFX11-NEXT: s_or_saveexec_b32 s0, -1 2123; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 2124; GFX11-NEXT: s_mov_b32 exec_lo, s0 2125; GFX11-NEXT: v_writelane_b32 v40, s33, 2 2126; GFX11-NEXT: v_mov_b32_e32 v0, 4.0 2127; GFX11-NEXT: s_mov_b32 s33, s32 2128; GFX11-NEXT: s_add_i32 s32, s32, 16 2129; GFX11-NEXT: s_getpc_b64 s[0:1] 2130; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_f32@rel32@lo+4 2131; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_f32@rel32@hi+12 2132; GFX11-NEXT: v_writelane_b32 v40, s30, 0 2133; GFX11-NEXT: v_writelane_b32 v40, s31, 1 2134; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 2135; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2136; GFX11-NEXT: v_readlane_b32 s31, v40, 1 2137; GFX11-NEXT: v_readlane_b32 s30, v40, 0 2138; GFX11-NEXT: s_add_i32 s32, s32, -16 2139; GFX11-NEXT: v_readlane_b32 s33, v40, 2 2140; GFX11-NEXT: s_or_saveexec_b32 s0, -1 2141; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 2142; GFX11-NEXT: s_mov_b32 exec_lo, s0 2143; GFX11-NEXT: s_waitcnt vmcnt(0) 2144; GFX11-NEXT: s_setpc_b64 s[30:31] 2145; 2146; GFX10-SCRATCH-LABEL: test_call_external_void_func_f32_imm: 2147; GFX10-SCRATCH: ; %bb.0: 2148; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2149; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 2150; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 2151; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 2152; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 2153; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 2154; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 2155; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 4.0 2156; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 2157; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 2158; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 2159; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f32@rel32@lo+4 2160; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f32@rel32@hi+12 2161; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 2162; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 2163; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 2164; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 2165; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 2166; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 2167; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 2168; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 2169; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 2170; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 2171; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 2172; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 2173; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 2174 call amdgpu_gfx void @external_void_func_f32(float 4.0) 2175 ret void 2176} 2177 2178define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { 2179; GFX9-LABEL: test_call_external_void_func_v2f32_imm: 2180; GFX9: ; %bb.0: 2181; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2182; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 2183; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 2184; GFX9-NEXT: s_mov_b64 exec, s[34:35] 2185; GFX9-NEXT: v_writelane_b32 v40, s33, 2 2186; GFX9-NEXT: s_mov_b32 s33, s32 2187; GFX9-NEXT: s_addk_i32 s32, 0x400 2188; GFX9-NEXT: v_writelane_b32 v40, s30, 0 2189; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 2190; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 2191; GFX9-NEXT: v_writelane_b32 v40, s31, 1 2192; GFX9-NEXT: s_getpc_b64 s[34:35] 2193; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f32@rel32@lo+4 2194; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f32@rel32@hi+12 2195; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 2196; GFX9-NEXT: v_readlane_b32 s31, v40, 1 2197; GFX9-NEXT: v_readlane_b32 s30, v40, 0 2198; GFX9-NEXT: s_addk_i32 s32, 0xfc00 2199; GFX9-NEXT: v_readlane_b32 s33, v40, 2 2200; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 2201; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 2202; GFX9-NEXT: s_mov_b64 exec, s[34:35] 2203; GFX9-NEXT: s_waitcnt vmcnt(0) 2204; GFX9-NEXT: s_setpc_b64 s[30:31] 2205; 2206; GFX10-LABEL: test_call_external_void_func_v2f32_imm: 2207; GFX10: ; %bb.0: 2208; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2209; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2210; GFX10-NEXT: s_or_saveexec_b32 s34, -1 2211; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 2212; GFX10-NEXT: s_waitcnt_depctr 0xffe3 2213; GFX10-NEXT: s_mov_b32 exec_lo, s34 2214; GFX10-NEXT: v_writelane_b32 v40, s33, 2 2215; GFX10-NEXT: v_mov_b32_e32 v0, 1.0 2216; GFX10-NEXT: v_mov_b32_e32 v1, 2.0 2217; GFX10-NEXT: s_mov_b32 s33, s32 2218; GFX10-NEXT: s_addk_i32 s32, 0x200 2219; GFX10-NEXT: v_writelane_b32 v40, s30, 0 2220; GFX10-NEXT: s_getpc_b64 s[34:35] 2221; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f32@rel32@lo+4 2222; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f32@rel32@hi+12 2223; GFX10-NEXT: v_writelane_b32 v40, s31, 1 2224; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 2225; GFX10-NEXT: v_readlane_b32 s31, v40, 1 2226; GFX10-NEXT: v_readlane_b32 s30, v40, 0 2227; GFX10-NEXT: s_addk_i32 s32, 0xfe00 2228; GFX10-NEXT: v_readlane_b32 s33, v40, 2 2229; GFX10-NEXT: s_or_saveexec_b32 s34, -1 2230; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 2231; GFX10-NEXT: s_waitcnt_depctr 0xffe3 2232; GFX10-NEXT: s_mov_b32 exec_lo, s34 2233; GFX10-NEXT: s_waitcnt vmcnt(0) 2234; GFX10-NEXT: s_setpc_b64 s[30:31] 2235; 2236; GFX11-LABEL: test_call_external_void_func_v2f32_imm: 2237; GFX11: ; %bb.0: 2238; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2239; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2240; GFX11-NEXT: s_or_saveexec_b32 s0, -1 2241; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 2242; GFX11-NEXT: s_mov_b32 exec_lo, s0 2243; GFX11-NEXT: v_writelane_b32 v40, s33, 2 2244; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0 2245; GFX11-NEXT: s_mov_b32 s33, s32 2246; GFX11-NEXT: s_add_i32 s32, s32, 16 2247; GFX11-NEXT: v_writelane_b32 v40, s30, 0 2248; GFX11-NEXT: s_getpc_b64 s[0:1] 2249; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2f32@rel32@lo+4 2250; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2f32@rel32@hi+12 2251; GFX11-NEXT: v_writelane_b32 v40, s31, 1 2252; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 2253; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2254; GFX11-NEXT: v_readlane_b32 s31, v40, 1 2255; GFX11-NEXT: v_readlane_b32 s30, v40, 0 2256; GFX11-NEXT: s_add_i32 s32, s32, -16 2257; GFX11-NEXT: v_readlane_b32 s33, v40, 2 2258; GFX11-NEXT: s_or_saveexec_b32 s0, -1 2259; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 2260; GFX11-NEXT: s_mov_b32 exec_lo, s0 2261; GFX11-NEXT: s_waitcnt vmcnt(0) 2262; GFX11-NEXT: s_setpc_b64 s[30:31] 2263; 2264; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f32_imm: 2265; GFX10-SCRATCH: ; %bb.0: 2266; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2267; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 2268; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 2269; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 2270; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 2271; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 2272; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 2273; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1.0 2274; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0 2275; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 2276; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 2277; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 2278; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 2279; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f32@rel32@lo+4 2280; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f32@rel32@hi+12 2281; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 2282; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 2283; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 2284; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 2285; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 2286; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 2287; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 2288; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 2289; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 2290; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 2291; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 2292; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 2293 call amdgpu_gfx void @external_void_func_v2f32(<2 x float> <float 1.0, float 2.0>) 2294 ret void 2295} 2296 2297define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { 2298; GFX9-LABEL: test_call_external_void_func_v3f32_imm: 2299; GFX9: ; %bb.0: 2300; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2301; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 2302; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 2303; GFX9-NEXT: s_mov_b64 exec, s[34:35] 2304; GFX9-NEXT: v_writelane_b32 v40, s33, 2 2305; GFX9-NEXT: s_mov_b32 s33, s32 2306; GFX9-NEXT: s_addk_i32 s32, 0x400 2307; GFX9-NEXT: v_writelane_b32 v40, s30, 0 2308; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 2309; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 2310; GFX9-NEXT: v_mov_b32_e32 v2, 4.0 2311; GFX9-NEXT: v_writelane_b32 v40, s31, 1 2312; GFX9-NEXT: s_getpc_b64 s[34:35] 2313; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f32@rel32@lo+4 2314; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f32@rel32@hi+12 2315; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 2316; GFX9-NEXT: v_readlane_b32 s31, v40, 1 2317; GFX9-NEXT: v_readlane_b32 s30, v40, 0 2318; GFX9-NEXT: s_addk_i32 s32, 0xfc00 2319; GFX9-NEXT: v_readlane_b32 s33, v40, 2 2320; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 2321; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 2322; GFX9-NEXT: s_mov_b64 exec, s[34:35] 2323; GFX9-NEXT: s_waitcnt vmcnt(0) 2324; GFX9-NEXT: s_setpc_b64 s[30:31] 2325; 2326; GFX10-LABEL: test_call_external_void_func_v3f32_imm: 2327; GFX10: ; %bb.0: 2328; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2329; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2330; GFX10-NEXT: s_or_saveexec_b32 s34, -1 2331; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 2332; GFX10-NEXT: s_waitcnt_depctr 0xffe3 2333; GFX10-NEXT: s_mov_b32 exec_lo, s34 2334; GFX10-NEXT: v_writelane_b32 v40, s33, 2 2335; GFX10-NEXT: v_mov_b32_e32 v0, 1.0 2336; GFX10-NEXT: v_mov_b32_e32 v1, 2.0 2337; GFX10-NEXT: v_mov_b32_e32 v2, 4.0 2338; GFX10-NEXT: s_mov_b32 s33, s32 2339; GFX10-NEXT: v_writelane_b32 v40, s30, 0 2340; GFX10-NEXT: s_addk_i32 s32, 0x200 2341; GFX10-NEXT: s_getpc_b64 s[34:35] 2342; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f32@rel32@lo+4 2343; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f32@rel32@hi+12 2344; GFX10-NEXT: v_writelane_b32 v40, s31, 1 2345; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 2346; GFX10-NEXT: v_readlane_b32 s31, v40, 1 2347; GFX10-NEXT: v_readlane_b32 s30, v40, 0 2348; GFX10-NEXT: s_addk_i32 s32, 0xfe00 2349; GFX10-NEXT: v_readlane_b32 s33, v40, 2 2350; GFX10-NEXT: s_or_saveexec_b32 s34, -1 2351; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 2352; GFX10-NEXT: s_waitcnt_depctr 0xffe3 2353; GFX10-NEXT: s_mov_b32 exec_lo, s34 2354; GFX10-NEXT: s_waitcnt vmcnt(0) 2355; GFX10-NEXT: s_setpc_b64 s[30:31] 2356; 2357; GFX11-LABEL: test_call_external_void_func_v3f32_imm: 2358; GFX11: ; %bb.0: 2359; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2360; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2361; GFX11-NEXT: s_or_saveexec_b32 s0, -1 2362; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 2363; GFX11-NEXT: s_mov_b32 exec_lo, s0 2364; GFX11-NEXT: v_writelane_b32 v40, s33, 2 2365; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0 2366; GFX11-NEXT: v_mov_b32_e32 v2, 4.0 2367; GFX11-NEXT: s_mov_b32 s33, s32 2368; GFX11-NEXT: v_writelane_b32 v40, s30, 0 2369; GFX11-NEXT: s_add_i32 s32, s32, 16 2370; GFX11-NEXT: s_getpc_b64 s[0:1] 2371; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3f32@rel32@lo+4 2372; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3f32@rel32@hi+12 2373; GFX11-NEXT: v_writelane_b32 v40, s31, 1 2374; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 2375; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2376; GFX11-NEXT: v_readlane_b32 s31, v40, 1 2377; GFX11-NEXT: v_readlane_b32 s30, v40, 0 2378; GFX11-NEXT: s_add_i32 s32, s32, -16 2379; GFX11-NEXT: v_readlane_b32 s33, v40, 2 2380; GFX11-NEXT: s_or_saveexec_b32 s0, -1 2381; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 2382; GFX11-NEXT: s_mov_b32 exec_lo, s0 2383; GFX11-NEXT: s_waitcnt vmcnt(0) 2384; GFX11-NEXT: s_setpc_b64 s[30:31] 2385; 2386; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f32_imm: 2387; GFX10-SCRATCH: ; %bb.0: 2388; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2389; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 2390; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 2391; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 2392; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 2393; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 2394; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 2395; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1.0 2396; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0 2397; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 4.0 2398; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 2399; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 2400; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 2401; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 2402; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f32@rel32@lo+4 2403; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f32@rel32@hi+12 2404; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 2405; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 2406; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 2407; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 2408; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 2409; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 2410; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 2411; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 2412; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 2413; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 2414; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 2415; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 2416 call amdgpu_gfx void @external_void_func_v3f32(<3 x float> <float 1.0, float 2.0, float 4.0>) 2417 ret void 2418} 2419 2420define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { 2421; GFX9-LABEL: test_call_external_void_func_v5f32_imm: 2422; GFX9: ; %bb.0: 2423; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2424; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 2425; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 2426; GFX9-NEXT: s_mov_b64 exec, s[34:35] 2427; GFX9-NEXT: v_writelane_b32 v40, s33, 2 2428; GFX9-NEXT: s_mov_b32 s33, s32 2429; GFX9-NEXT: s_addk_i32 s32, 0x400 2430; GFX9-NEXT: v_writelane_b32 v40, s30, 0 2431; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 2432; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 2433; GFX9-NEXT: v_mov_b32_e32 v2, 4.0 2434; GFX9-NEXT: v_mov_b32_e32 v3, -1.0 2435; GFX9-NEXT: v_mov_b32_e32 v4, 0.5 2436; GFX9-NEXT: v_writelane_b32 v40, s31, 1 2437; GFX9-NEXT: s_getpc_b64 s[34:35] 2438; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5f32@rel32@lo+4 2439; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5f32@rel32@hi+12 2440; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 2441; GFX9-NEXT: v_readlane_b32 s31, v40, 1 2442; GFX9-NEXT: v_readlane_b32 s30, v40, 0 2443; GFX9-NEXT: s_addk_i32 s32, 0xfc00 2444; GFX9-NEXT: v_readlane_b32 s33, v40, 2 2445; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 2446; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 2447; GFX9-NEXT: s_mov_b64 exec, s[34:35] 2448; GFX9-NEXT: s_waitcnt vmcnt(0) 2449; GFX9-NEXT: s_setpc_b64 s[30:31] 2450; 2451; GFX10-LABEL: test_call_external_void_func_v5f32_imm: 2452; GFX10: ; %bb.0: 2453; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2454; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2455; GFX10-NEXT: s_or_saveexec_b32 s34, -1 2456; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 2457; GFX10-NEXT: s_waitcnt_depctr 0xffe3 2458; GFX10-NEXT: s_mov_b32 exec_lo, s34 2459; GFX10-NEXT: v_writelane_b32 v40, s33, 2 2460; GFX10-NEXT: v_mov_b32_e32 v0, 1.0 2461; GFX10-NEXT: v_mov_b32_e32 v1, 2.0 2462; GFX10-NEXT: v_mov_b32_e32 v2, 4.0 2463; GFX10-NEXT: v_mov_b32_e32 v3, -1.0 2464; GFX10-NEXT: v_writelane_b32 v40, s30, 0 2465; GFX10-NEXT: v_mov_b32_e32 v4, 0.5 2466; GFX10-NEXT: s_mov_b32 s33, s32 2467; GFX10-NEXT: s_addk_i32 s32, 0x200 2468; GFX10-NEXT: s_getpc_b64 s[34:35] 2469; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5f32@rel32@lo+4 2470; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5f32@rel32@hi+12 2471; GFX10-NEXT: v_writelane_b32 v40, s31, 1 2472; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 2473; GFX10-NEXT: v_readlane_b32 s31, v40, 1 2474; GFX10-NEXT: v_readlane_b32 s30, v40, 0 2475; GFX10-NEXT: s_addk_i32 s32, 0xfe00 2476; GFX10-NEXT: v_readlane_b32 s33, v40, 2 2477; GFX10-NEXT: s_or_saveexec_b32 s34, -1 2478; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 2479; GFX10-NEXT: s_waitcnt_depctr 0xffe3 2480; GFX10-NEXT: s_mov_b32 exec_lo, s34 2481; GFX10-NEXT: s_waitcnt vmcnt(0) 2482; GFX10-NEXT: s_setpc_b64 s[30:31] 2483; 2484; GFX11-LABEL: test_call_external_void_func_v5f32_imm: 2485; GFX11: ; %bb.0: 2486; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2487; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2488; GFX11-NEXT: s_or_saveexec_b32 s0, -1 2489; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 2490; GFX11-NEXT: s_mov_b32 exec_lo, s0 2491; GFX11-NEXT: v_writelane_b32 v40, s33, 2 2492; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0 2493; GFX11-NEXT: v_dual_mov_b32 v2, 4.0 :: v_dual_mov_b32 v3, -1.0 2494; GFX11-NEXT: v_writelane_b32 v40, s30, 0 2495; GFX11-NEXT: v_mov_b32_e32 v4, 0.5 2496; GFX11-NEXT: s_mov_b32 s33, s32 2497; GFX11-NEXT: s_add_i32 s32, s32, 16 2498; GFX11-NEXT: s_getpc_b64 s[0:1] 2499; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v5f32@rel32@lo+4 2500; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v5f32@rel32@hi+12 2501; GFX11-NEXT: v_writelane_b32 v40, s31, 1 2502; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 2503; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2504; GFX11-NEXT: v_readlane_b32 s31, v40, 1 2505; GFX11-NEXT: v_readlane_b32 s30, v40, 0 2506; GFX11-NEXT: s_add_i32 s32, s32, -16 2507; GFX11-NEXT: v_readlane_b32 s33, v40, 2 2508; GFX11-NEXT: s_or_saveexec_b32 s0, -1 2509; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 2510; GFX11-NEXT: s_mov_b32 exec_lo, s0 2511; GFX11-NEXT: s_waitcnt vmcnt(0) 2512; GFX11-NEXT: s_setpc_b64 s[30:31] 2513; 2514; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5f32_imm: 2515; GFX10-SCRATCH: ; %bb.0: 2516; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2517; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 2518; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 2519; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 2520; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 2521; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 2522; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 2523; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1.0 2524; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0 2525; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 4.0 2526; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, -1.0 2527; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 2528; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0.5 2529; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 2530; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 2531; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 2532; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v5f32@rel32@lo+4 2533; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5f32@rel32@hi+12 2534; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 2535; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 2536; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 2537; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 2538; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 2539; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 2540; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 2541; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 2542; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 2543; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 2544; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 2545; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 2546 call amdgpu_gfx void @external_void_func_v5f32(<5 x float> <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>) 2547 ret void 2548} 2549 2550define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { 2551; GFX9-LABEL: test_call_external_void_func_f64_imm: 2552; GFX9: ; %bb.0: 2553; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2554; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 2555; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 2556; GFX9-NEXT: s_mov_b64 exec, s[34:35] 2557; GFX9-NEXT: v_writelane_b32 v40, s33, 2 2558; GFX9-NEXT: s_mov_b32 s33, s32 2559; GFX9-NEXT: s_addk_i32 s32, 0x400 2560; GFX9-NEXT: v_writelane_b32 v40, s30, 0 2561; GFX9-NEXT: v_mov_b32_e32 v0, 0 2562; GFX9-NEXT: v_mov_b32_e32 v1, 0x40100000 2563; GFX9-NEXT: v_writelane_b32 v40, s31, 1 2564; GFX9-NEXT: s_getpc_b64 s[34:35] 2565; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f64@rel32@lo+4 2566; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f64@rel32@hi+12 2567; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 2568; GFX9-NEXT: v_readlane_b32 s31, v40, 1 2569; GFX9-NEXT: v_readlane_b32 s30, v40, 0 2570; GFX9-NEXT: s_addk_i32 s32, 0xfc00 2571; GFX9-NEXT: v_readlane_b32 s33, v40, 2 2572; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 2573; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 2574; GFX9-NEXT: s_mov_b64 exec, s[34:35] 2575; GFX9-NEXT: s_waitcnt vmcnt(0) 2576; GFX9-NEXT: s_setpc_b64 s[30:31] 2577; 2578; GFX10-LABEL: test_call_external_void_func_f64_imm: 2579; GFX10: ; %bb.0: 2580; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2581; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2582; GFX10-NEXT: s_or_saveexec_b32 s34, -1 2583; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 2584; GFX10-NEXT: s_waitcnt_depctr 0xffe3 2585; GFX10-NEXT: s_mov_b32 exec_lo, s34 2586; GFX10-NEXT: v_writelane_b32 v40, s33, 2 2587; GFX10-NEXT: v_mov_b32_e32 v0, 0 2588; GFX10-NEXT: v_mov_b32_e32 v1, 0x40100000 2589; GFX10-NEXT: s_mov_b32 s33, s32 2590; GFX10-NEXT: s_addk_i32 s32, 0x200 2591; GFX10-NEXT: v_writelane_b32 v40, s30, 0 2592; GFX10-NEXT: s_getpc_b64 s[34:35] 2593; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f64@rel32@lo+4 2594; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f64@rel32@hi+12 2595; GFX10-NEXT: v_writelane_b32 v40, s31, 1 2596; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 2597; GFX10-NEXT: v_readlane_b32 s31, v40, 1 2598; GFX10-NEXT: v_readlane_b32 s30, v40, 0 2599; GFX10-NEXT: s_addk_i32 s32, 0xfe00 2600; GFX10-NEXT: v_readlane_b32 s33, v40, 2 2601; GFX10-NEXT: s_or_saveexec_b32 s34, -1 2602; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 2603; GFX10-NEXT: s_waitcnt_depctr 0xffe3 2604; GFX10-NEXT: s_mov_b32 exec_lo, s34 2605; GFX10-NEXT: s_waitcnt vmcnt(0) 2606; GFX10-NEXT: s_setpc_b64 s[30:31] 2607; 2608; GFX11-LABEL: test_call_external_void_func_f64_imm: 2609; GFX11: ; %bb.0: 2610; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2611; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2612; GFX11-NEXT: s_or_saveexec_b32 s0, -1 2613; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 2614; GFX11-NEXT: s_mov_b32 exec_lo, s0 2615; GFX11-NEXT: v_writelane_b32 v40, s33, 2 2616; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x40100000 2617; GFX11-NEXT: s_mov_b32 s33, s32 2618; GFX11-NEXT: s_add_i32 s32, s32, 16 2619; GFX11-NEXT: v_writelane_b32 v40, s30, 0 2620; GFX11-NEXT: s_getpc_b64 s[0:1] 2621; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_f64@rel32@lo+4 2622; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_f64@rel32@hi+12 2623; GFX11-NEXT: v_writelane_b32 v40, s31, 1 2624; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 2625; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2626; GFX11-NEXT: v_readlane_b32 s31, v40, 1 2627; GFX11-NEXT: v_readlane_b32 s30, v40, 0 2628; GFX11-NEXT: s_add_i32 s32, s32, -16 2629; GFX11-NEXT: v_readlane_b32 s33, v40, 2 2630; GFX11-NEXT: s_or_saveexec_b32 s0, -1 2631; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 2632; GFX11-NEXT: s_mov_b32 exec_lo, s0 2633; GFX11-NEXT: s_waitcnt vmcnt(0) 2634; GFX11-NEXT: s_setpc_b64 s[30:31] 2635; 2636; GFX10-SCRATCH-LABEL: test_call_external_void_func_f64_imm: 2637; GFX10-SCRATCH: ; %bb.0: 2638; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2639; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 2640; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 2641; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 2642; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 2643; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 2644; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 2645; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 2646; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x40100000 2647; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 2648; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 2649; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 2650; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 2651; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f64@rel32@lo+4 2652; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f64@rel32@hi+12 2653; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 2654; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 2655; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 2656; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 2657; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 2658; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 2659; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 2660; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 2661; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 2662; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 2663; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 2664; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 2665 call amdgpu_gfx void @external_void_func_f64(double 4.0) 2666 ret void 2667} 2668 2669define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { 2670; GFX9-LABEL: test_call_external_void_func_v2f64_imm: 2671; GFX9: ; %bb.0: 2672; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2673; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 2674; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 2675; GFX9-NEXT: s_mov_b64 exec, s[34:35] 2676; GFX9-NEXT: v_writelane_b32 v40, s33, 2 2677; GFX9-NEXT: s_mov_b32 s33, s32 2678; GFX9-NEXT: s_addk_i32 s32, 0x400 2679; GFX9-NEXT: v_writelane_b32 v40, s30, 0 2680; GFX9-NEXT: v_mov_b32_e32 v0, 0 2681; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 2682; GFX9-NEXT: v_mov_b32_e32 v2, 0 2683; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000 2684; GFX9-NEXT: v_writelane_b32 v40, s31, 1 2685; GFX9-NEXT: s_getpc_b64 s[34:35] 2686; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f64@rel32@lo+4 2687; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f64@rel32@hi+12 2688; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 2689; GFX9-NEXT: v_readlane_b32 s31, v40, 1 2690; GFX9-NEXT: v_readlane_b32 s30, v40, 0 2691; GFX9-NEXT: s_addk_i32 s32, 0xfc00 2692; GFX9-NEXT: v_readlane_b32 s33, v40, 2 2693; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 2694; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 2695; GFX9-NEXT: s_mov_b64 exec, s[34:35] 2696; GFX9-NEXT: s_waitcnt vmcnt(0) 2697; GFX9-NEXT: s_setpc_b64 s[30:31] 2698; 2699; GFX10-LABEL: test_call_external_void_func_v2f64_imm: 2700; GFX10: ; %bb.0: 2701; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2702; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2703; GFX10-NEXT: s_or_saveexec_b32 s34, -1 2704; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 2705; GFX10-NEXT: s_waitcnt_depctr 0xffe3 2706; GFX10-NEXT: s_mov_b32 exec_lo, s34 2707; GFX10-NEXT: v_writelane_b32 v40, s33, 2 2708; GFX10-NEXT: v_mov_b32_e32 v0, 0 2709; GFX10-NEXT: v_mov_b32_e32 v1, 2.0 2710; GFX10-NEXT: v_mov_b32_e32 v2, 0 2711; GFX10-NEXT: v_mov_b32_e32 v3, 0x40100000 2712; GFX10-NEXT: v_writelane_b32 v40, s30, 0 2713; GFX10-NEXT: s_mov_b32 s33, s32 2714; GFX10-NEXT: s_addk_i32 s32, 0x200 2715; GFX10-NEXT: s_getpc_b64 s[34:35] 2716; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f64@rel32@lo+4 2717; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f64@rel32@hi+12 2718; GFX10-NEXT: v_writelane_b32 v40, s31, 1 2719; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 2720; GFX10-NEXT: v_readlane_b32 s31, v40, 1 2721; GFX10-NEXT: v_readlane_b32 s30, v40, 0 2722; GFX10-NEXT: s_addk_i32 s32, 0xfe00 2723; GFX10-NEXT: v_readlane_b32 s33, v40, 2 2724; GFX10-NEXT: s_or_saveexec_b32 s34, -1 2725; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 2726; GFX10-NEXT: s_waitcnt_depctr 0xffe3 2727; GFX10-NEXT: s_mov_b32 exec_lo, s34 2728; GFX10-NEXT: s_waitcnt vmcnt(0) 2729; GFX10-NEXT: s_setpc_b64 s[30:31] 2730; 2731; GFX11-LABEL: test_call_external_void_func_v2f64_imm: 2732; GFX11: ; %bb.0: 2733; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2734; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2735; GFX11-NEXT: s_or_saveexec_b32 s0, -1 2736; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 2737; GFX11-NEXT: s_mov_b32 exec_lo, s0 2738; GFX11-NEXT: v_writelane_b32 v40, s33, 2 2739; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 2.0 2740; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0x40100000 2741; GFX11-NEXT: v_writelane_b32 v40, s30, 0 2742; GFX11-NEXT: s_mov_b32 s33, s32 2743; GFX11-NEXT: s_add_i32 s32, s32, 16 2744; GFX11-NEXT: s_getpc_b64 s[0:1] 2745; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2f64@rel32@lo+4 2746; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2f64@rel32@hi+12 2747; GFX11-NEXT: v_writelane_b32 v40, s31, 1 2748; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 2749; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2750; GFX11-NEXT: v_readlane_b32 s31, v40, 1 2751; GFX11-NEXT: v_readlane_b32 s30, v40, 0 2752; GFX11-NEXT: s_add_i32 s32, s32, -16 2753; GFX11-NEXT: v_readlane_b32 s33, v40, 2 2754; GFX11-NEXT: s_or_saveexec_b32 s0, -1 2755; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 2756; GFX11-NEXT: s_mov_b32 exec_lo, s0 2757; GFX11-NEXT: s_waitcnt vmcnt(0) 2758; GFX11-NEXT: s_setpc_b64 s[30:31] 2759; 2760; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f64_imm: 2761; GFX10-SCRATCH: ; %bb.0: 2762; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2763; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 2764; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 2765; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 2766; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 2767; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 2768; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 2769; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 2770; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0 2771; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0 2772; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 0x40100000 2773; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 2774; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 2775; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 2776; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 2777; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f64@rel32@lo+4 2778; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f64@rel32@hi+12 2779; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 2780; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 2781; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 2782; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 2783; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 2784; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 2785; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 2786; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 2787; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 2788; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 2789; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 2790; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 2791 call amdgpu_gfx void @external_void_func_v2f64(<2 x double> <double 2.0, double 4.0>) 2792 ret void 2793} 2794 2795define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { 2796; GFX9-LABEL: test_call_external_void_func_v3f64_imm: 2797; GFX9: ; %bb.0: 2798; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2799; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 2800; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 2801; GFX9-NEXT: s_mov_b64 exec, s[34:35] 2802; GFX9-NEXT: v_writelane_b32 v40, s33, 2 2803; GFX9-NEXT: s_mov_b32 s33, s32 2804; GFX9-NEXT: s_addk_i32 s32, 0x400 2805; GFX9-NEXT: v_writelane_b32 v40, s30, 0 2806; GFX9-NEXT: v_mov_b32_e32 v0, 0 2807; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 2808; GFX9-NEXT: v_mov_b32_e32 v2, 0 2809; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000 2810; GFX9-NEXT: v_mov_b32_e32 v4, 0 2811; GFX9-NEXT: v_mov_b32_e32 v5, 0x40200000 2812; GFX9-NEXT: v_writelane_b32 v40, s31, 1 2813; GFX9-NEXT: s_getpc_b64 s[34:35] 2814; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f64@rel32@lo+4 2815; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f64@rel32@hi+12 2816; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 2817; GFX9-NEXT: v_readlane_b32 s31, v40, 1 2818; GFX9-NEXT: v_readlane_b32 s30, v40, 0 2819; GFX9-NEXT: s_addk_i32 s32, 0xfc00 2820; GFX9-NEXT: v_readlane_b32 s33, v40, 2 2821; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 2822; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 2823; GFX9-NEXT: s_mov_b64 exec, s[34:35] 2824; GFX9-NEXT: s_waitcnt vmcnt(0) 2825; GFX9-NEXT: s_setpc_b64 s[30:31] 2826; 2827; GFX10-LABEL: test_call_external_void_func_v3f64_imm: 2828; GFX10: ; %bb.0: 2829; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2830; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2831; GFX10-NEXT: s_or_saveexec_b32 s34, -1 2832; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 2833; GFX10-NEXT: s_waitcnt_depctr 0xffe3 2834; GFX10-NEXT: s_mov_b32 exec_lo, s34 2835; GFX10-NEXT: v_writelane_b32 v40, s33, 2 2836; GFX10-NEXT: v_mov_b32_e32 v0, 0 2837; GFX10-NEXT: v_mov_b32_e32 v1, 2.0 2838; GFX10-NEXT: v_mov_b32_e32 v2, 0 2839; GFX10-NEXT: v_mov_b32_e32 v3, 0x40100000 2840; GFX10-NEXT: v_writelane_b32 v40, s30, 0 2841; GFX10-NEXT: v_mov_b32_e32 v4, 0 2842; GFX10-NEXT: v_mov_b32_e32 v5, 0x40200000 2843; GFX10-NEXT: s_mov_b32 s33, s32 2844; GFX10-NEXT: s_addk_i32 s32, 0x200 2845; GFX10-NEXT: v_writelane_b32 v40, s31, 1 2846; GFX10-NEXT: s_getpc_b64 s[34:35] 2847; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f64@rel32@lo+4 2848; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f64@rel32@hi+12 2849; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 2850; GFX10-NEXT: v_readlane_b32 s31, v40, 1 2851; GFX10-NEXT: v_readlane_b32 s30, v40, 0 2852; GFX10-NEXT: s_addk_i32 s32, 0xfe00 2853; GFX10-NEXT: v_readlane_b32 s33, v40, 2 2854; GFX10-NEXT: s_or_saveexec_b32 s34, -1 2855; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 2856; GFX10-NEXT: s_waitcnt_depctr 0xffe3 2857; GFX10-NEXT: s_mov_b32 exec_lo, s34 2858; GFX10-NEXT: s_waitcnt vmcnt(0) 2859; GFX10-NEXT: s_setpc_b64 s[30:31] 2860; 2861; GFX11-LABEL: test_call_external_void_func_v3f64_imm: 2862; GFX11: ; %bb.0: 2863; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2864; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2865; GFX11-NEXT: s_or_saveexec_b32 s0, -1 2866; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 2867; GFX11-NEXT: s_mov_b32 exec_lo, s0 2868; GFX11-NEXT: v_writelane_b32 v40, s33, 2 2869; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 2.0 2870; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0x40100000 2871; GFX11-NEXT: v_writelane_b32 v40, s30, 0 2872; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0x40200000 2873; GFX11-NEXT: s_mov_b32 s33, s32 2874; GFX11-NEXT: s_add_i32 s32, s32, 16 2875; GFX11-NEXT: v_writelane_b32 v40, s31, 1 2876; GFX11-NEXT: s_getpc_b64 s[0:1] 2877; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3f64@rel32@lo+4 2878; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3f64@rel32@hi+12 2879; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2880; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 2881; GFX11-NEXT: v_readlane_b32 s31, v40, 1 2882; GFX11-NEXT: v_readlane_b32 s30, v40, 0 2883; GFX11-NEXT: s_add_i32 s32, s32, -16 2884; GFX11-NEXT: v_readlane_b32 s33, v40, 2 2885; GFX11-NEXT: s_or_saveexec_b32 s0, -1 2886; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 2887; GFX11-NEXT: s_mov_b32 exec_lo, s0 2888; GFX11-NEXT: s_waitcnt vmcnt(0) 2889; GFX11-NEXT: s_setpc_b64 s[30:31] 2890; 2891; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f64_imm: 2892; GFX10-SCRATCH: ; %bb.0: 2893; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2894; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 2895; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 2896; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 2897; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 2898; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 2899; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 2900; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 2901; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0 2902; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0 2903; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 0x40100000 2904; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 2905; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0 2906; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 0x40200000 2907; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 2908; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 2909; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 2910; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 2911; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f64@rel32@lo+4 2912; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f64@rel32@hi+12 2913; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 2914; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 2915; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 2916; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 2917; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 2918; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 2919; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 2920; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 2921; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 2922; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 2923; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 2924 call amdgpu_gfx void @external_void_func_v3f64(<3 x double> <double 2.0, double 4.0, double 8.0>) 2925 ret void 2926} 2927 2928define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { 2929; GFX9-LABEL: test_call_external_void_func_v2i16: 2930; GFX9: ; %bb.0: 2931; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2932; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 2933; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 2934; GFX9-NEXT: s_mov_b64 exec, s[34:35] 2935; GFX9-NEXT: global_load_dword v0, v[0:1], off 2936; GFX9-NEXT: v_writelane_b32 v40, s33, 2 2937; GFX9-NEXT: s_mov_b32 s33, s32 2938; GFX9-NEXT: s_addk_i32 s32, 0x400 2939; GFX9-NEXT: v_writelane_b32 v40, s30, 0 2940; GFX9-NEXT: v_writelane_b32 v40, s31, 1 2941; GFX9-NEXT: s_getpc_b64 s[34:35] 2942; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i16@rel32@lo+4 2943; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i16@rel32@hi+12 2944; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 2945; GFX9-NEXT: v_readlane_b32 s31, v40, 1 2946; GFX9-NEXT: v_readlane_b32 s30, v40, 0 2947; GFX9-NEXT: s_addk_i32 s32, 0xfc00 2948; GFX9-NEXT: v_readlane_b32 s33, v40, 2 2949; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 2950; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 2951; GFX9-NEXT: s_mov_b64 exec, s[34:35] 2952; GFX9-NEXT: s_waitcnt vmcnt(0) 2953; GFX9-NEXT: s_setpc_b64 s[30:31] 2954; 2955; GFX10-LABEL: test_call_external_void_func_v2i16: 2956; GFX10: ; %bb.0: 2957; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2958; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2959; GFX10-NEXT: s_or_saveexec_b32 s34, -1 2960; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 2961; GFX10-NEXT: s_waitcnt_depctr 0xffe3 2962; GFX10-NEXT: s_mov_b32 exec_lo, s34 2963; GFX10-NEXT: global_load_dword v0, v[0:1], off 2964; GFX10-NEXT: v_writelane_b32 v40, s33, 2 2965; GFX10-NEXT: s_mov_b32 s33, s32 2966; GFX10-NEXT: s_addk_i32 s32, 0x200 2967; GFX10-NEXT: s_getpc_b64 s[34:35] 2968; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i16@rel32@lo+4 2969; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i16@rel32@hi+12 2970; GFX10-NEXT: v_writelane_b32 v40, s30, 0 2971; GFX10-NEXT: v_writelane_b32 v40, s31, 1 2972; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 2973; GFX10-NEXT: v_readlane_b32 s31, v40, 1 2974; GFX10-NEXT: v_readlane_b32 s30, v40, 0 2975; GFX10-NEXT: s_addk_i32 s32, 0xfe00 2976; GFX10-NEXT: v_readlane_b32 s33, v40, 2 2977; GFX10-NEXT: s_or_saveexec_b32 s34, -1 2978; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 2979; GFX10-NEXT: s_waitcnt_depctr 0xffe3 2980; GFX10-NEXT: s_mov_b32 exec_lo, s34 2981; GFX10-NEXT: s_waitcnt vmcnt(0) 2982; GFX10-NEXT: s_setpc_b64 s[30:31] 2983; 2984; GFX11-LABEL: test_call_external_void_func_v2i16: 2985; GFX11: ; %bb.0: 2986; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2987; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2988; GFX11-NEXT: s_or_saveexec_b32 s0, -1 2989; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 2990; GFX11-NEXT: s_mov_b32 exec_lo, s0 2991; GFX11-NEXT: global_load_b32 v0, v[0:1], off 2992; GFX11-NEXT: v_writelane_b32 v40, s33, 2 2993; GFX11-NEXT: s_mov_b32 s33, s32 2994; GFX11-NEXT: s_add_i32 s32, s32, 16 2995; GFX11-NEXT: s_getpc_b64 s[0:1] 2996; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i16@rel32@lo+4 2997; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i16@rel32@hi+12 2998; GFX11-NEXT: v_writelane_b32 v40, s30, 0 2999; GFX11-NEXT: v_writelane_b32 v40, s31, 1 3000; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 3001; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3002; GFX11-NEXT: v_readlane_b32 s31, v40, 1 3003; GFX11-NEXT: v_readlane_b32 s30, v40, 0 3004; GFX11-NEXT: s_add_i32 s32, s32, -16 3005; GFX11-NEXT: v_readlane_b32 s33, v40, 2 3006; GFX11-NEXT: s_or_saveexec_b32 s0, -1 3007; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 3008; GFX11-NEXT: s_mov_b32 exec_lo, s0 3009; GFX11-NEXT: s_waitcnt vmcnt(0) 3010; GFX11-NEXT: s_setpc_b64 s[30:31] 3011; 3012; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i16: 3013; GFX10-SCRATCH: ; %bb.0: 3014; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3015; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 3016; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 3017; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 3018; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 3019; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 3020; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off 3021; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 3022; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 3023; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 3024; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 3025; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i16@rel32@lo+4 3026; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i16@rel32@hi+12 3027; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 3028; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 3029; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 3030; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 3031; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 3032; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 3033; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 3034; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 3035; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 3036; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 3037; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 3038; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 3039; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 3040 %val = load <2 x i16>, <2 x i16> addrspace(1)* undef 3041 call amdgpu_gfx void @external_void_func_v2i16(<2 x i16> %val) 3042 ret void 3043} 3044 3045define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { 3046; GFX9-LABEL: test_call_external_void_func_v3i16: 3047; GFX9: ; %bb.0: 3048; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3049; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 3050; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 3051; GFX9-NEXT: s_mov_b64 exec, s[34:35] 3052; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 3053; GFX9-NEXT: v_writelane_b32 v40, s33, 2 3054; GFX9-NEXT: s_mov_b32 s33, s32 3055; GFX9-NEXT: s_addk_i32 s32, 0x400 3056; GFX9-NEXT: v_writelane_b32 v40, s30, 0 3057; GFX9-NEXT: v_writelane_b32 v40, s31, 1 3058; GFX9-NEXT: s_getpc_b64 s[34:35] 3059; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4 3060; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12 3061; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 3062; GFX9-NEXT: v_readlane_b32 s31, v40, 1 3063; GFX9-NEXT: v_readlane_b32 s30, v40, 0 3064; GFX9-NEXT: s_addk_i32 s32, 0xfc00 3065; GFX9-NEXT: v_readlane_b32 s33, v40, 2 3066; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 3067; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 3068; GFX9-NEXT: s_mov_b64 exec, s[34:35] 3069; GFX9-NEXT: s_waitcnt vmcnt(0) 3070; GFX9-NEXT: s_setpc_b64 s[30:31] 3071; 3072; GFX10-LABEL: test_call_external_void_func_v3i16: 3073; GFX10: ; %bb.0: 3074; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3075; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3076; GFX10-NEXT: s_or_saveexec_b32 s34, -1 3077; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 3078; GFX10-NEXT: s_waitcnt_depctr 0xffe3 3079; GFX10-NEXT: s_mov_b32 exec_lo, s34 3080; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 3081; GFX10-NEXT: v_writelane_b32 v40, s33, 2 3082; GFX10-NEXT: s_mov_b32 s33, s32 3083; GFX10-NEXT: s_addk_i32 s32, 0x200 3084; GFX10-NEXT: s_getpc_b64 s[34:35] 3085; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4 3086; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12 3087; GFX10-NEXT: v_writelane_b32 v40, s30, 0 3088; GFX10-NEXT: v_writelane_b32 v40, s31, 1 3089; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 3090; GFX10-NEXT: v_readlane_b32 s31, v40, 1 3091; GFX10-NEXT: v_readlane_b32 s30, v40, 0 3092; GFX10-NEXT: s_addk_i32 s32, 0xfe00 3093; GFX10-NEXT: v_readlane_b32 s33, v40, 2 3094; GFX10-NEXT: s_or_saveexec_b32 s34, -1 3095; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 3096; GFX10-NEXT: s_waitcnt_depctr 0xffe3 3097; GFX10-NEXT: s_mov_b32 exec_lo, s34 3098; GFX10-NEXT: s_waitcnt vmcnt(0) 3099; GFX10-NEXT: s_setpc_b64 s[30:31] 3100; 3101; GFX11-LABEL: test_call_external_void_func_v3i16: 3102; GFX11: ; %bb.0: 3103; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3104; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3105; GFX11-NEXT: s_or_saveexec_b32 s0, -1 3106; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 3107; GFX11-NEXT: s_mov_b32 exec_lo, s0 3108; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off 3109; GFX11-NEXT: v_writelane_b32 v40, s33, 2 3110; GFX11-NEXT: s_mov_b32 s33, s32 3111; GFX11-NEXT: s_add_i32 s32, s32, 16 3112; GFX11-NEXT: s_getpc_b64 s[0:1] 3113; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i16@rel32@lo+4 3114; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16@rel32@hi+12 3115; GFX11-NEXT: v_writelane_b32 v40, s30, 0 3116; GFX11-NEXT: v_writelane_b32 v40, s31, 1 3117; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 3118; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3119; GFX11-NEXT: v_readlane_b32 s31, v40, 1 3120; GFX11-NEXT: v_readlane_b32 s30, v40, 0 3121; GFX11-NEXT: s_add_i32 s32, s32, -16 3122; GFX11-NEXT: v_readlane_b32 s33, v40, 2 3123; GFX11-NEXT: s_or_saveexec_b32 s0, -1 3124; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 3125; GFX11-NEXT: s_mov_b32 exec_lo, s0 3126; GFX11-NEXT: s_waitcnt vmcnt(0) 3127; GFX11-NEXT: s_setpc_b64 s[30:31] 3128; 3129; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16: 3130; GFX10-SCRATCH: ; %bb.0: 3131; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3132; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 3133; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 3134; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 3135; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 3136; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 3137; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 3138; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 3139; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 3140; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 3141; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 3142; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i16@rel32@lo+4 3143; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16@rel32@hi+12 3144; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 3145; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 3146; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 3147; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 3148; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 3149; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 3150; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 3151; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 3152; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 3153; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 3154; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 3155; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 3156; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 3157 %val = load <3 x i16>, <3 x i16> addrspace(1)* undef 3158 call amdgpu_gfx void @external_void_func_v3i16(<3 x i16> %val) 3159 ret void 3160} 3161 3162define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { 3163; GFX9-LABEL: test_call_external_void_func_v3f16: 3164; GFX9: ; %bb.0: 3165; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3166; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 3167; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 3168; GFX9-NEXT: s_mov_b64 exec, s[34:35] 3169; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 3170; GFX9-NEXT: v_writelane_b32 v40, s33, 2 3171; GFX9-NEXT: s_mov_b32 s33, s32 3172; GFX9-NEXT: s_addk_i32 s32, 0x400 3173; GFX9-NEXT: v_writelane_b32 v40, s30, 0 3174; GFX9-NEXT: v_writelane_b32 v40, s31, 1 3175; GFX9-NEXT: s_getpc_b64 s[34:35] 3176; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4 3177; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12 3178; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 3179; GFX9-NEXT: v_readlane_b32 s31, v40, 1 3180; GFX9-NEXT: v_readlane_b32 s30, v40, 0 3181; GFX9-NEXT: s_addk_i32 s32, 0xfc00 3182; GFX9-NEXT: v_readlane_b32 s33, v40, 2 3183; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 3184; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 3185; GFX9-NEXT: s_mov_b64 exec, s[34:35] 3186; GFX9-NEXT: s_waitcnt vmcnt(0) 3187; GFX9-NEXT: s_setpc_b64 s[30:31] 3188; 3189; GFX10-LABEL: test_call_external_void_func_v3f16: 3190; GFX10: ; %bb.0: 3191; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3192; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3193; GFX10-NEXT: s_or_saveexec_b32 s34, -1 3194; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 3195; GFX10-NEXT: s_waitcnt_depctr 0xffe3 3196; GFX10-NEXT: s_mov_b32 exec_lo, s34 3197; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 3198; GFX10-NEXT: v_writelane_b32 v40, s33, 2 3199; GFX10-NEXT: s_mov_b32 s33, s32 3200; GFX10-NEXT: s_addk_i32 s32, 0x200 3201; GFX10-NEXT: s_getpc_b64 s[34:35] 3202; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4 3203; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12 3204; GFX10-NEXT: v_writelane_b32 v40, s30, 0 3205; GFX10-NEXT: v_writelane_b32 v40, s31, 1 3206; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 3207; GFX10-NEXT: v_readlane_b32 s31, v40, 1 3208; GFX10-NEXT: v_readlane_b32 s30, v40, 0 3209; GFX10-NEXT: s_addk_i32 s32, 0xfe00 3210; GFX10-NEXT: v_readlane_b32 s33, v40, 2 3211; GFX10-NEXT: s_or_saveexec_b32 s34, -1 3212; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 3213; GFX10-NEXT: s_waitcnt_depctr 0xffe3 3214; GFX10-NEXT: s_mov_b32 exec_lo, s34 3215; GFX10-NEXT: s_waitcnt vmcnt(0) 3216; GFX10-NEXT: s_setpc_b64 s[30:31] 3217; 3218; GFX11-LABEL: test_call_external_void_func_v3f16: 3219; GFX11: ; %bb.0: 3220; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3221; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3222; GFX11-NEXT: s_or_saveexec_b32 s0, -1 3223; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 3224; GFX11-NEXT: s_mov_b32 exec_lo, s0 3225; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off 3226; GFX11-NEXT: v_writelane_b32 v40, s33, 2 3227; GFX11-NEXT: s_mov_b32 s33, s32 3228; GFX11-NEXT: s_add_i32 s32, s32, 16 3229; GFX11-NEXT: s_getpc_b64 s[0:1] 3230; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3f16@rel32@lo+4 3231; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16@rel32@hi+12 3232; GFX11-NEXT: v_writelane_b32 v40, s30, 0 3233; GFX11-NEXT: v_writelane_b32 v40, s31, 1 3234; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 3235; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3236; GFX11-NEXT: v_readlane_b32 s31, v40, 1 3237; GFX11-NEXT: v_readlane_b32 s30, v40, 0 3238; GFX11-NEXT: s_add_i32 s32, s32, -16 3239; GFX11-NEXT: v_readlane_b32 s33, v40, 2 3240; GFX11-NEXT: s_or_saveexec_b32 s0, -1 3241; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 3242; GFX11-NEXT: s_mov_b32 exec_lo, s0 3243; GFX11-NEXT: s_waitcnt vmcnt(0) 3244; GFX11-NEXT: s_setpc_b64 s[30:31] 3245; 3246; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16: 3247; GFX10-SCRATCH: ; %bb.0: 3248; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3249; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 3250; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 3251; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 3252; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 3253; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 3254; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 3255; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 3256; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 3257; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 3258; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 3259; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f16@rel32@lo+4 3260; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16@rel32@hi+12 3261; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 3262; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 3263; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 3264; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 3265; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 3266; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 3267; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 3268; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 3269; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 3270; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 3271; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 3272; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 3273; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 3274 %val = load <3 x half>, <3 x half> addrspace(1)* undef 3275 call amdgpu_gfx void @external_void_func_v3f16(<3 x half> %val) 3276 ret void 3277} 3278 3279define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { 3280; GFX9-LABEL: test_call_external_void_func_v3i16_imm: 3281; GFX9: ; %bb.0: 3282; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3283; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 3284; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 3285; GFX9-NEXT: s_mov_b64 exec, s[34:35] 3286; GFX9-NEXT: v_writelane_b32 v40, s33, 2 3287; GFX9-NEXT: s_mov_b32 s33, s32 3288; GFX9-NEXT: s_addk_i32 s32, 0x400 3289; GFX9-NEXT: v_writelane_b32 v40, s30, 0 3290; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001 3291; GFX9-NEXT: v_mov_b32_e32 v1, 3 3292; GFX9-NEXT: v_writelane_b32 v40, s31, 1 3293; GFX9-NEXT: s_getpc_b64 s[34:35] 3294; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4 3295; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12 3296; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 3297; GFX9-NEXT: v_readlane_b32 s31, v40, 1 3298; GFX9-NEXT: v_readlane_b32 s30, v40, 0 3299; GFX9-NEXT: s_addk_i32 s32, 0xfc00 3300; GFX9-NEXT: v_readlane_b32 s33, v40, 2 3301; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 3302; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 3303; GFX9-NEXT: s_mov_b64 exec, s[34:35] 3304; GFX9-NEXT: s_waitcnt vmcnt(0) 3305; GFX9-NEXT: s_setpc_b64 s[30:31] 3306; 3307; GFX10-LABEL: test_call_external_void_func_v3i16_imm: 3308; GFX10: ; %bb.0: 3309; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3310; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3311; GFX10-NEXT: s_or_saveexec_b32 s34, -1 3312; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 3313; GFX10-NEXT: s_waitcnt_depctr 0xffe3 3314; GFX10-NEXT: s_mov_b32 exec_lo, s34 3315; GFX10-NEXT: v_writelane_b32 v40, s33, 2 3316; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001 3317; GFX10-NEXT: v_mov_b32_e32 v1, 3 3318; GFX10-NEXT: s_mov_b32 s33, s32 3319; GFX10-NEXT: s_addk_i32 s32, 0x200 3320; GFX10-NEXT: v_writelane_b32 v40, s30, 0 3321; GFX10-NEXT: s_getpc_b64 s[34:35] 3322; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4 3323; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12 3324; GFX10-NEXT: v_writelane_b32 v40, s31, 1 3325; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 3326; GFX10-NEXT: v_readlane_b32 s31, v40, 1 3327; GFX10-NEXT: v_readlane_b32 s30, v40, 0 3328; GFX10-NEXT: s_addk_i32 s32, 0xfe00 3329; GFX10-NEXT: v_readlane_b32 s33, v40, 2 3330; GFX10-NEXT: s_or_saveexec_b32 s34, -1 3331; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 3332; GFX10-NEXT: s_waitcnt_depctr 0xffe3 3333; GFX10-NEXT: s_mov_b32 exec_lo, s34 3334; GFX10-NEXT: s_waitcnt vmcnt(0) 3335; GFX10-NEXT: s_setpc_b64 s[30:31] 3336; 3337; GFX11-LABEL: test_call_external_void_func_v3i16_imm: 3338; GFX11: ; %bb.0: 3339; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3340; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3341; GFX11-NEXT: s_or_saveexec_b32 s0, -1 3342; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 3343; GFX11-NEXT: s_mov_b32 exec_lo, s0 3344; GFX11-NEXT: v_writelane_b32 v40, s33, 2 3345; GFX11-NEXT: v_dual_mov_b32 v0, 0x20001 :: v_dual_mov_b32 v1, 3 3346; GFX11-NEXT: s_mov_b32 s33, s32 3347; GFX11-NEXT: s_add_i32 s32, s32, 16 3348; GFX11-NEXT: v_writelane_b32 v40, s30, 0 3349; GFX11-NEXT: s_getpc_b64 s[0:1] 3350; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i16@rel32@lo+4 3351; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16@rel32@hi+12 3352; GFX11-NEXT: v_writelane_b32 v40, s31, 1 3353; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 3354; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3355; GFX11-NEXT: v_readlane_b32 s31, v40, 1 3356; GFX11-NEXT: v_readlane_b32 s30, v40, 0 3357; GFX11-NEXT: s_add_i32 s32, s32, -16 3358; GFX11-NEXT: v_readlane_b32 s33, v40, 2 3359; GFX11-NEXT: s_or_saveexec_b32 s0, -1 3360; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 3361; GFX11-NEXT: s_mov_b32 exec_lo, s0 3362; GFX11-NEXT: s_waitcnt vmcnt(0) 3363; GFX11-NEXT: s_setpc_b64 s[30:31] 3364; 3365; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16_imm: 3366; GFX10-SCRATCH: ; %bb.0: 3367; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3368; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 3369; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 3370; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 3371; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 3372; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 3373; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 3374; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x20001 3375; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 3 3376; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 3377; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 3378; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 3379; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 3380; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i16@rel32@lo+4 3381; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16@rel32@hi+12 3382; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 3383; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 3384; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 3385; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 3386; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 3387; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 3388; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 3389; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 3390; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 3391; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 3392; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 3393; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 3394 call amdgpu_gfx void @external_void_func_v3i16(<3 x i16> <i16 1, i16 2, i16 3>) 3395 ret void 3396} 3397 3398define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { 3399; GFX9-LABEL: test_call_external_void_func_v3f16_imm: 3400; GFX9: ; %bb.0: 3401; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3402; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 3403; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 3404; GFX9-NEXT: s_mov_b64 exec, s[34:35] 3405; GFX9-NEXT: v_writelane_b32 v40, s33, 2 3406; GFX9-NEXT: s_mov_b32 s33, s32 3407; GFX9-NEXT: s_addk_i32 s32, 0x400 3408; GFX9-NEXT: v_writelane_b32 v40, s30, 0 3409; GFX9-NEXT: v_mov_b32_e32 v0, 0x40003c00 3410; GFX9-NEXT: v_mov_b32_e32 v1, 0x4400 3411; GFX9-NEXT: v_writelane_b32 v40, s31, 1 3412; GFX9-NEXT: s_getpc_b64 s[34:35] 3413; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4 3414; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12 3415; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 3416; GFX9-NEXT: v_readlane_b32 s31, v40, 1 3417; GFX9-NEXT: v_readlane_b32 s30, v40, 0 3418; GFX9-NEXT: s_addk_i32 s32, 0xfc00 3419; GFX9-NEXT: v_readlane_b32 s33, v40, 2 3420; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 3421; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 3422; GFX9-NEXT: s_mov_b64 exec, s[34:35] 3423; GFX9-NEXT: s_waitcnt vmcnt(0) 3424; GFX9-NEXT: s_setpc_b64 s[30:31] 3425; 3426; GFX10-LABEL: test_call_external_void_func_v3f16_imm: 3427; GFX10: ; %bb.0: 3428; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3429; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3430; GFX10-NEXT: s_or_saveexec_b32 s34, -1 3431; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 3432; GFX10-NEXT: s_waitcnt_depctr 0xffe3 3433; GFX10-NEXT: s_mov_b32 exec_lo, s34 3434; GFX10-NEXT: v_writelane_b32 v40, s33, 2 3435; GFX10-NEXT: v_mov_b32_e32 v0, 0x40003c00 3436; GFX10-NEXT: v_mov_b32_e32 v1, 0x4400 3437; GFX10-NEXT: s_mov_b32 s33, s32 3438; GFX10-NEXT: s_addk_i32 s32, 0x200 3439; GFX10-NEXT: v_writelane_b32 v40, s30, 0 3440; GFX10-NEXT: s_getpc_b64 s[34:35] 3441; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4 3442; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12 3443; GFX10-NEXT: v_writelane_b32 v40, s31, 1 3444; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 3445; GFX10-NEXT: v_readlane_b32 s31, v40, 1 3446; GFX10-NEXT: v_readlane_b32 s30, v40, 0 3447; GFX10-NEXT: s_addk_i32 s32, 0xfe00 3448; GFX10-NEXT: v_readlane_b32 s33, v40, 2 3449; GFX10-NEXT: s_or_saveexec_b32 s34, -1 3450; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 3451; GFX10-NEXT: s_waitcnt_depctr 0xffe3 3452; GFX10-NEXT: s_mov_b32 exec_lo, s34 3453; GFX10-NEXT: s_waitcnt vmcnt(0) 3454; GFX10-NEXT: s_setpc_b64 s[30:31] 3455; 3456; GFX11-LABEL: test_call_external_void_func_v3f16_imm: 3457; GFX11: ; %bb.0: 3458; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3459; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3460; GFX11-NEXT: s_or_saveexec_b32 s0, -1 3461; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 3462; GFX11-NEXT: s_mov_b32 exec_lo, s0 3463; GFX11-NEXT: v_writelane_b32 v40, s33, 2 3464; GFX11-NEXT: v_mov_b32_e32 v0, 0x40003c00 3465; GFX11-NEXT: v_mov_b32_e32 v1, 0x4400 3466; GFX11-NEXT: s_mov_b32 s33, s32 3467; GFX11-NEXT: s_add_i32 s32, s32, 16 3468; GFX11-NEXT: v_writelane_b32 v40, s30, 0 3469; GFX11-NEXT: s_getpc_b64 s[0:1] 3470; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3f16@rel32@lo+4 3471; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16@rel32@hi+12 3472; GFX11-NEXT: v_writelane_b32 v40, s31, 1 3473; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 3474; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3475; GFX11-NEXT: v_readlane_b32 s31, v40, 1 3476; GFX11-NEXT: v_readlane_b32 s30, v40, 0 3477; GFX11-NEXT: s_add_i32 s32, s32, -16 3478; GFX11-NEXT: v_readlane_b32 s33, v40, 2 3479; GFX11-NEXT: s_or_saveexec_b32 s0, -1 3480; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 3481; GFX11-NEXT: s_mov_b32 exec_lo, s0 3482; GFX11-NEXT: s_waitcnt vmcnt(0) 3483; GFX11-NEXT: s_setpc_b64 s[30:31] 3484; 3485; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16_imm: 3486; GFX10-SCRATCH: ; %bb.0: 3487; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3488; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 3489; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 3490; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 3491; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 3492; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 3493; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 3494; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x40003c00 3495; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x4400 3496; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 3497; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 3498; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 3499; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 3500; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f16@rel32@lo+4 3501; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16@rel32@hi+12 3502; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 3503; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 3504; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 3505; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 3506; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 3507; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 3508; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 3509; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 3510; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 3511; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 3512; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 3513; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 3514 call amdgpu_gfx void @external_void_func_v3f16(<3 x half> <half 1.0, half 2.0, half 4.0>) 3515 ret void 3516} 3517 3518define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { 3519; GFX9-LABEL: test_call_external_void_func_v4i16: 3520; GFX9: ; %bb.0: 3521; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3522; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 3523; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 3524; GFX9-NEXT: s_mov_b64 exec, s[34:35] 3525; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 3526; GFX9-NEXT: v_writelane_b32 v40, s33, 2 3527; GFX9-NEXT: s_mov_b32 s33, s32 3528; GFX9-NEXT: s_addk_i32 s32, 0x400 3529; GFX9-NEXT: v_writelane_b32 v40, s30, 0 3530; GFX9-NEXT: v_writelane_b32 v40, s31, 1 3531; GFX9-NEXT: s_getpc_b64 s[34:35] 3532; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4 3533; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12 3534; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 3535; GFX9-NEXT: v_readlane_b32 s31, v40, 1 3536; GFX9-NEXT: v_readlane_b32 s30, v40, 0 3537; GFX9-NEXT: s_addk_i32 s32, 0xfc00 3538; GFX9-NEXT: v_readlane_b32 s33, v40, 2 3539; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 3540; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 3541; GFX9-NEXT: s_mov_b64 exec, s[34:35] 3542; GFX9-NEXT: s_waitcnt vmcnt(0) 3543; GFX9-NEXT: s_setpc_b64 s[30:31] 3544; 3545; GFX10-LABEL: test_call_external_void_func_v4i16: 3546; GFX10: ; %bb.0: 3547; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3548; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3549; GFX10-NEXT: s_or_saveexec_b32 s34, -1 3550; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 3551; GFX10-NEXT: s_waitcnt_depctr 0xffe3 3552; GFX10-NEXT: s_mov_b32 exec_lo, s34 3553; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 3554; GFX10-NEXT: v_writelane_b32 v40, s33, 2 3555; GFX10-NEXT: s_mov_b32 s33, s32 3556; GFX10-NEXT: s_addk_i32 s32, 0x200 3557; GFX10-NEXT: s_getpc_b64 s[34:35] 3558; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4 3559; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12 3560; GFX10-NEXT: v_writelane_b32 v40, s30, 0 3561; GFX10-NEXT: v_writelane_b32 v40, s31, 1 3562; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 3563; GFX10-NEXT: v_readlane_b32 s31, v40, 1 3564; GFX10-NEXT: v_readlane_b32 s30, v40, 0 3565; GFX10-NEXT: s_addk_i32 s32, 0xfe00 3566; GFX10-NEXT: v_readlane_b32 s33, v40, 2 3567; GFX10-NEXT: s_or_saveexec_b32 s34, -1 3568; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 3569; GFX10-NEXT: s_waitcnt_depctr 0xffe3 3570; GFX10-NEXT: s_mov_b32 exec_lo, s34 3571; GFX10-NEXT: s_waitcnt vmcnt(0) 3572; GFX10-NEXT: s_setpc_b64 s[30:31] 3573; 3574; GFX11-LABEL: test_call_external_void_func_v4i16: 3575; GFX11: ; %bb.0: 3576; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3577; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3578; GFX11-NEXT: s_or_saveexec_b32 s0, -1 3579; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 3580; GFX11-NEXT: s_mov_b32 exec_lo, s0 3581; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off 3582; GFX11-NEXT: v_writelane_b32 v40, s33, 2 3583; GFX11-NEXT: s_mov_b32 s33, s32 3584; GFX11-NEXT: s_add_i32 s32, s32, 16 3585; GFX11-NEXT: s_getpc_b64 s[0:1] 3586; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i16@rel32@lo+4 3587; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16@rel32@hi+12 3588; GFX11-NEXT: v_writelane_b32 v40, s30, 0 3589; GFX11-NEXT: v_writelane_b32 v40, s31, 1 3590; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 3591; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3592; GFX11-NEXT: v_readlane_b32 s31, v40, 1 3593; GFX11-NEXT: v_readlane_b32 s30, v40, 0 3594; GFX11-NEXT: s_add_i32 s32, s32, -16 3595; GFX11-NEXT: v_readlane_b32 s33, v40, 2 3596; GFX11-NEXT: s_or_saveexec_b32 s0, -1 3597; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 3598; GFX11-NEXT: s_mov_b32 exec_lo, s0 3599; GFX11-NEXT: s_waitcnt vmcnt(0) 3600; GFX11-NEXT: s_setpc_b64 s[30:31] 3601; 3602; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16: 3603; GFX10-SCRATCH: ; %bb.0: 3604; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3605; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 3606; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 3607; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 3608; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 3609; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 3610; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 3611; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 3612; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 3613; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 3614; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 3615; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i16@rel32@lo+4 3616; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16@rel32@hi+12 3617; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 3618; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 3619; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 3620; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 3621; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 3622; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 3623; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 3624; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 3625; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 3626; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 3627; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 3628; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 3629; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 3630 %val = load <4 x i16>, <4 x i16> addrspace(1)* undef 3631 call amdgpu_gfx void @external_void_func_v4i16(<4 x i16> %val) 3632 ret void 3633} 3634 3635define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { 3636; GFX9-LABEL: test_call_external_void_func_v4i16_imm: 3637; GFX9: ; %bb.0: 3638; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3639; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 3640; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 3641; GFX9-NEXT: s_mov_b64 exec, s[34:35] 3642; GFX9-NEXT: v_writelane_b32 v40, s33, 2 3643; GFX9-NEXT: s_mov_b32 s33, s32 3644; GFX9-NEXT: s_addk_i32 s32, 0x400 3645; GFX9-NEXT: v_writelane_b32 v40, s30, 0 3646; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001 3647; GFX9-NEXT: v_mov_b32_e32 v1, 0x40003 3648; GFX9-NEXT: v_writelane_b32 v40, s31, 1 3649; GFX9-NEXT: s_getpc_b64 s[34:35] 3650; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4 3651; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12 3652; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 3653; GFX9-NEXT: v_readlane_b32 s31, v40, 1 3654; GFX9-NEXT: v_readlane_b32 s30, v40, 0 3655; GFX9-NEXT: s_addk_i32 s32, 0xfc00 3656; GFX9-NEXT: v_readlane_b32 s33, v40, 2 3657; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 3658; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 3659; GFX9-NEXT: s_mov_b64 exec, s[34:35] 3660; GFX9-NEXT: s_waitcnt vmcnt(0) 3661; GFX9-NEXT: s_setpc_b64 s[30:31] 3662; 3663; GFX10-LABEL: test_call_external_void_func_v4i16_imm: 3664; GFX10: ; %bb.0: 3665; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3666; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3667; GFX10-NEXT: s_or_saveexec_b32 s34, -1 3668; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 3669; GFX10-NEXT: s_waitcnt_depctr 0xffe3 3670; GFX10-NEXT: s_mov_b32 exec_lo, s34 3671; GFX10-NEXT: v_writelane_b32 v40, s33, 2 3672; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001 3673; GFX10-NEXT: v_mov_b32_e32 v1, 0x40003 3674; GFX10-NEXT: s_mov_b32 s33, s32 3675; GFX10-NEXT: s_addk_i32 s32, 0x200 3676; GFX10-NEXT: v_writelane_b32 v40, s30, 0 3677; GFX10-NEXT: s_getpc_b64 s[34:35] 3678; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4 3679; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12 3680; GFX10-NEXT: v_writelane_b32 v40, s31, 1 3681; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 3682; GFX10-NEXT: v_readlane_b32 s31, v40, 1 3683; GFX10-NEXT: v_readlane_b32 s30, v40, 0 3684; GFX10-NEXT: s_addk_i32 s32, 0xfe00 3685; GFX10-NEXT: v_readlane_b32 s33, v40, 2 3686; GFX10-NEXT: s_or_saveexec_b32 s34, -1 3687; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 3688; GFX10-NEXT: s_waitcnt_depctr 0xffe3 3689; GFX10-NEXT: s_mov_b32 exec_lo, s34 3690; GFX10-NEXT: s_waitcnt vmcnt(0) 3691; GFX10-NEXT: s_setpc_b64 s[30:31] 3692; 3693; GFX11-LABEL: test_call_external_void_func_v4i16_imm: 3694; GFX11: ; %bb.0: 3695; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3696; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3697; GFX11-NEXT: s_or_saveexec_b32 s0, -1 3698; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 3699; GFX11-NEXT: s_mov_b32 exec_lo, s0 3700; GFX11-NEXT: v_writelane_b32 v40, s33, 2 3701; GFX11-NEXT: v_mov_b32_e32 v0, 0x20001 3702; GFX11-NEXT: v_mov_b32_e32 v1, 0x40003 3703; GFX11-NEXT: s_mov_b32 s33, s32 3704; GFX11-NEXT: s_add_i32 s32, s32, 16 3705; GFX11-NEXT: v_writelane_b32 v40, s30, 0 3706; GFX11-NEXT: s_getpc_b64 s[0:1] 3707; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i16@rel32@lo+4 3708; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16@rel32@hi+12 3709; GFX11-NEXT: v_writelane_b32 v40, s31, 1 3710; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 3711; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3712; GFX11-NEXT: v_readlane_b32 s31, v40, 1 3713; GFX11-NEXT: v_readlane_b32 s30, v40, 0 3714; GFX11-NEXT: s_add_i32 s32, s32, -16 3715; GFX11-NEXT: v_readlane_b32 s33, v40, 2 3716; GFX11-NEXT: s_or_saveexec_b32 s0, -1 3717; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 3718; GFX11-NEXT: s_mov_b32 exec_lo, s0 3719; GFX11-NEXT: s_waitcnt vmcnt(0) 3720; GFX11-NEXT: s_setpc_b64 s[30:31] 3721; 3722; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16_imm: 3723; GFX10-SCRATCH: ; %bb.0: 3724; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3725; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 3726; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 3727; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 3728; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 3729; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 3730; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 3731; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x20001 3732; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x40003 3733; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 3734; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 3735; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 3736; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 3737; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i16@rel32@lo+4 3738; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16@rel32@hi+12 3739; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 3740; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 3741; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 3742; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 3743; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 3744; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 3745; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 3746; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 3747; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 3748; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 3749; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 3750; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 3751 call amdgpu_gfx void @external_void_func_v4i16(<4 x i16> <i16 1, i16 2, i16 3, i16 4>) 3752 ret void 3753} 3754 3755define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { 3756; GFX9-LABEL: test_call_external_void_func_v2f16: 3757; GFX9: ; %bb.0: 3758; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3759; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 3760; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 3761; GFX9-NEXT: s_mov_b64 exec, s[34:35] 3762; GFX9-NEXT: global_load_dword v0, v[0:1], off 3763; GFX9-NEXT: v_writelane_b32 v40, s33, 2 3764; GFX9-NEXT: s_mov_b32 s33, s32 3765; GFX9-NEXT: s_addk_i32 s32, 0x400 3766; GFX9-NEXT: v_writelane_b32 v40, s30, 0 3767; GFX9-NEXT: v_writelane_b32 v40, s31, 1 3768; GFX9-NEXT: s_getpc_b64 s[34:35] 3769; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f16@rel32@lo+4 3770; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f16@rel32@hi+12 3771; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 3772; GFX9-NEXT: v_readlane_b32 s31, v40, 1 3773; GFX9-NEXT: v_readlane_b32 s30, v40, 0 3774; GFX9-NEXT: s_addk_i32 s32, 0xfc00 3775; GFX9-NEXT: v_readlane_b32 s33, v40, 2 3776; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 3777; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 3778; GFX9-NEXT: s_mov_b64 exec, s[34:35] 3779; GFX9-NEXT: s_waitcnt vmcnt(0) 3780; GFX9-NEXT: s_setpc_b64 s[30:31] 3781; 3782; GFX10-LABEL: test_call_external_void_func_v2f16: 3783; GFX10: ; %bb.0: 3784; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3785; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3786; GFX10-NEXT: s_or_saveexec_b32 s34, -1 3787; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 3788; GFX10-NEXT: s_waitcnt_depctr 0xffe3 3789; GFX10-NEXT: s_mov_b32 exec_lo, s34 3790; GFX10-NEXT: global_load_dword v0, v[0:1], off 3791; GFX10-NEXT: v_writelane_b32 v40, s33, 2 3792; GFX10-NEXT: s_mov_b32 s33, s32 3793; GFX10-NEXT: s_addk_i32 s32, 0x200 3794; GFX10-NEXT: s_getpc_b64 s[34:35] 3795; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f16@rel32@lo+4 3796; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f16@rel32@hi+12 3797; GFX10-NEXT: v_writelane_b32 v40, s30, 0 3798; GFX10-NEXT: v_writelane_b32 v40, s31, 1 3799; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 3800; GFX10-NEXT: v_readlane_b32 s31, v40, 1 3801; GFX10-NEXT: v_readlane_b32 s30, v40, 0 3802; GFX10-NEXT: s_addk_i32 s32, 0xfe00 3803; GFX10-NEXT: v_readlane_b32 s33, v40, 2 3804; GFX10-NEXT: s_or_saveexec_b32 s34, -1 3805; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 3806; GFX10-NEXT: s_waitcnt_depctr 0xffe3 3807; GFX10-NEXT: s_mov_b32 exec_lo, s34 3808; GFX10-NEXT: s_waitcnt vmcnt(0) 3809; GFX10-NEXT: s_setpc_b64 s[30:31] 3810; 3811; GFX11-LABEL: test_call_external_void_func_v2f16: 3812; GFX11: ; %bb.0: 3813; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3814; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3815; GFX11-NEXT: s_or_saveexec_b32 s0, -1 3816; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 3817; GFX11-NEXT: s_mov_b32 exec_lo, s0 3818; GFX11-NEXT: global_load_b32 v0, v[0:1], off 3819; GFX11-NEXT: v_writelane_b32 v40, s33, 2 3820; GFX11-NEXT: s_mov_b32 s33, s32 3821; GFX11-NEXT: s_add_i32 s32, s32, 16 3822; GFX11-NEXT: s_getpc_b64 s[0:1] 3823; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2f16@rel32@lo+4 3824; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2f16@rel32@hi+12 3825; GFX11-NEXT: v_writelane_b32 v40, s30, 0 3826; GFX11-NEXT: v_writelane_b32 v40, s31, 1 3827; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 3828; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3829; GFX11-NEXT: v_readlane_b32 s31, v40, 1 3830; GFX11-NEXT: v_readlane_b32 s30, v40, 0 3831; GFX11-NEXT: s_add_i32 s32, s32, -16 3832; GFX11-NEXT: v_readlane_b32 s33, v40, 2 3833; GFX11-NEXT: s_or_saveexec_b32 s0, -1 3834; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 3835; GFX11-NEXT: s_mov_b32 exec_lo, s0 3836; GFX11-NEXT: s_waitcnt vmcnt(0) 3837; GFX11-NEXT: s_setpc_b64 s[30:31] 3838; 3839; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f16: 3840; GFX10-SCRATCH: ; %bb.0: 3841; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3842; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 3843; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 3844; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 3845; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 3846; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 3847; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off 3848; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 3849; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 3850; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 3851; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 3852; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f16@rel32@lo+4 3853; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f16@rel32@hi+12 3854; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 3855; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 3856; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 3857; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 3858; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 3859; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 3860; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 3861; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 3862; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 3863; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 3864; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 3865; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 3866; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 3867 %val = load <2 x half>, <2 x half> addrspace(1)* undef 3868 call amdgpu_gfx void @external_void_func_v2f16(<2 x half> %val) 3869 ret void 3870} 3871 3872define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { 3873; GFX9-LABEL: test_call_external_void_func_v2i32: 3874; GFX9: ; %bb.0: 3875; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3876; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 3877; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 3878; GFX9-NEXT: s_mov_b64 exec, s[34:35] 3879; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 3880; GFX9-NEXT: v_writelane_b32 v40, s33, 2 3881; GFX9-NEXT: s_mov_b32 s33, s32 3882; GFX9-NEXT: s_addk_i32 s32, 0x400 3883; GFX9-NEXT: v_writelane_b32 v40, s30, 0 3884; GFX9-NEXT: v_writelane_b32 v40, s31, 1 3885; GFX9-NEXT: s_getpc_b64 s[34:35] 3886; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4 3887; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12 3888; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 3889; GFX9-NEXT: v_readlane_b32 s31, v40, 1 3890; GFX9-NEXT: v_readlane_b32 s30, v40, 0 3891; GFX9-NEXT: s_addk_i32 s32, 0xfc00 3892; GFX9-NEXT: v_readlane_b32 s33, v40, 2 3893; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 3894; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 3895; GFX9-NEXT: s_mov_b64 exec, s[34:35] 3896; GFX9-NEXT: s_waitcnt vmcnt(0) 3897; GFX9-NEXT: s_setpc_b64 s[30:31] 3898; 3899; GFX10-LABEL: test_call_external_void_func_v2i32: 3900; GFX10: ; %bb.0: 3901; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3902; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3903; GFX10-NEXT: s_or_saveexec_b32 s34, -1 3904; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 3905; GFX10-NEXT: s_waitcnt_depctr 0xffe3 3906; GFX10-NEXT: s_mov_b32 exec_lo, s34 3907; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 3908; GFX10-NEXT: v_writelane_b32 v40, s33, 2 3909; GFX10-NEXT: s_mov_b32 s33, s32 3910; GFX10-NEXT: s_addk_i32 s32, 0x200 3911; GFX10-NEXT: s_getpc_b64 s[34:35] 3912; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4 3913; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12 3914; GFX10-NEXT: v_writelane_b32 v40, s30, 0 3915; GFX10-NEXT: v_writelane_b32 v40, s31, 1 3916; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 3917; GFX10-NEXT: v_readlane_b32 s31, v40, 1 3918; GFX10-NEXT: v_readlane_b32 s30, v40, 0 3919; GFX10-NEXT: s_addk_i32 s32, 0xfe00 3920; GFX10-NEXT: v_readlane_b32 s33, v40, 2 3921; GFX10-NEXT: s_or_saveexec_b32 s34, -1 3922; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 3923; GFX10-NEXT: s_waitcnt_depctr 0xffe3 3924; GFX10-NEXT: s_mov_b32 exec_lo, s34 3925; GFX10-NEXT: s_waitcnt vmcnt(0) 3926; GFX10-NEXT: s_setpc_b64 s[30:31] 3927; 3928; GFX11-LABEL: test_call_external_void_func_v2i32: 3929; GFX11: ; %bb.0: 3930; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3931; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3932; GFX11-NEXT: s_or_saveexec_b32 s0, -1 3933; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 3934; GFX11-NEXT: s_mov_b32 exec_lo, s0 3935; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off 3936; GFX11-NEXT: v_writelane_b32 v40, s33, 2 3937; GFX11-NEXT: s_mov_b32 s33, s32 3938; GFX11-NEXT: s_add_i32 s32, s32, 16 3939; GFX11-NEXT: s_getpc_b64 s[0:1] 3940; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i32@rel32@lo+4 3941; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32@rel32@hi+12 3942; GFX11-NEXT: v_writelane_b32 v40, s30, 0 3943; GFX11-NEXT: v_writelane_b32 v40, s31, 1 3944; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 3945; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3946; GFX11-NEXT: v_readlane_b32 s31, v40, 1 3947; GFX11-NEXT: v_readlane_b32 s30, v40, 0 3948; GFX11-NEXT: s_add_i32 s32, s32, -16 3949; GFX11-NEXT: v_readlane_b32 s33, v40, 2 3950; GFX11-NEXT: s_or_saveexec_b32 s0, -1 3951; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 3952; GFX11-NEXT: s_mov_b32 exec_lo, s0 3953; GFX11-NEXT: s_waitcnt vmcnt(0) 3954; GFX11-NEXT: s_setpc_b64 s[30:31] 3955; 3956; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32: 3957; GFX10-SCRATCH: ; %bb.0: 3958; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3959; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 3960; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 3961; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 3962; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 3963; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 3964; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off 3965; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 3966; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 3967; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 3968; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 3969; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i32@rel32@lo+4 3970; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32@rel32@hi+12 3971; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 3972; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 3973; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 3974; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 3975; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 3976; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 3977; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 3978; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 3979; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 3980; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 3981; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 3982; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 3983; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 3984 %val = load <2 x i32>, <2 x i32> addrspace(1)* undef 3985 call amdgpu_gfx void @external_void_func_v2i32(<2 x i32> %val) 3986 ret void 3987} 3988 3989define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { 3990; GFX9-LABEL: test_call_external_void_func_v2i32_imm: 3991; GFX9: ; %bb.0: 3992; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3993; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 3994; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 3995; GFX9-NEXT: s_mov_b64 exec, s[34:35] 3996; GFX9-NEXT: v_writelane_b32 v40, s33, 2 3997; GFX9-NEXT: s_mov_b32 s33, s32 3998; GFX9-NEXT: s_addk_i32 s32, 0x400 3999; GFX9-NEXT: v_writelane_b32 v40, s30, 0 4000; GFX9-NEXT: v_mov_b32_e32 v0, 1 4001; GFX9-NEXT: v_mov_b32_e32 v1, 2 4002; GFX9-NEXT: v_writelane_b32 v40, s31, 1 4003; GFX9-NEXT: s_getpc_b64 s[34:35] 4004; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4 4005; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12 4006; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 4007; GFX9-NEXT: v_readlane_b32 s31, v40, 1 4008; GFX9-NEXT: v_readlane_b32 s30, v40, 0 4009; GFX9-NEXT: s_addk_i32 s32, 0xfc00 4010; GFX9-NEXT: v_readlane_b32 s33, v40, 2 4011; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 4012; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 4013; GFX9-NEXT: s_mov_b64 exec, s[34:35] 4014; GFX9-NEXT: s_waitcnt vmcnt(0) 4015; GFX9-NEXT: s_setpc_b64 s[30:31] 4016; 4017; GFX10-LABEL: test_call_external_void_func_v2i32_imm: 4018; GFX10: ; %bb.0: 4019; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4020; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4021; GFX10-NEXT: s_or_saveexec_b32 s34, -1 4022; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 4023; GFX10-NEXT: s_waitcnt_depctr 0xffe3 4024; GFX10-NEXT: s_mov_b32 exec_lo, s34 4025; GFX10-NEXT: v_writelane_b32 v40, s33, 2 4026; GFX10-NEXT: v_mov_b32_e32 v0, 1 4027; GFX10-NEXT: v_mov_b32_e32 v1, 2 4028; GFX10-NEXT: s_mov_b32 s33, s32 4029; GFX10-NEXT: s_addk_i32 s32, 0x200 4030; GFX10-NEXT: v_writelane_b32 v40, s30, 0 4031; GFX10-NEXT: s_getpc_b64 s[34:35] 4032; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4 4033; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12 4034; GFX10-NEXT: v_writelane_b32 v40, s31, 1 4035; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 4036; GFX10-NEXT: v_readlane_b32 s31, v40, 1 4037; GFX10-NEXT: v_readlane_b32 s30, v40, 0 4038; GFX10-NEXT: s_addk_i32 s32, 0xfe00 4039; GFX10-NEXT: v_readlane_b32 s33, v40, 2 4040; GFX10-NEXT: s_or_saveexec_b32 s34, -1 4041; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 4042; GFX10-NEXT: s_waitcnt_depctr 0xffe3 4043; GFX10-NEXT: s_mov_b32 exec_lo, s34 4044; GFX10-NEXT: s_waitcnt vmcnt(0) 4045; GFX10-NEXT: s_setpc_b64 s[30:31] 4046; 4047; GFX11-LABEL: test_call_external_void_func_v2i32_imm: 4048; GFX11: ; %bb.0: 4049; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4050; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4051; GFX11-NEXT: s_or_saveexec_b32 s0, -1 4052; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 4053; GFX11-NEXT: s_mov_b32 exec_lo, s0 4054; GFX11-NEXT: v_writelane_b32 v40, s33, 2 4055; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 4056; GFX11-NEXT: s_mov_b32 s33, s32 4057; GFX11-NEXT: s_add_i32 s32, s32, 16 4058; GFX11-NEXT: v_writelane_b32 v40, s30, 0 4059; GFX11-NEXT: s_getpc_b64 s[0:1] 4060; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i32@rel32@lo+4 4061; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32@rel32@hi+12 4062; GFX11-NEXT: v_writelane_b32 v40, s31, 1 4063; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 4064; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 4065; GFX11-NEXT: v_readlane_b32 s31, v40, 1 4066; GFX11-NEXT: v_readlane_b32 s30, v40, 0 4067; GFX11-NEXT: s_add_i32 s32, s32, -16 4068; GFX11-NEXT: v_readlane_b32 s33, v40, 2 4069; GFX11-NEXT: s_or_saveexec_b32 s0, -1 4070; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 4071; GFX11-NEXT: s_mov_b32 exec_lo, s0 4072; GFX11-NEXT: s_waitcnt vmcnt(0) 4073; GFX11-NEXT: s_setpc_b64 s[30:31] 4074; 4075; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32_imm: 4076; GFX10-SCRATCH: ; %bb.0: 4077; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4078; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 4079; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 4080; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 4081; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 4082; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 4083; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 4084; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 4085; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2 4086; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 4087; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 4088; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 4089; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 4090; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i32@rel32@lo+4 4091; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32@rel32@hi+12 4092; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 4093; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 4094; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 4095; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 4096; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 4097; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 4098; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 4099; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 4100; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 4101; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 4102; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 4103; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 4104 call amdgpu_gfx void @external_void_func_v2i32(<2 x i32> <i32 1, i32 2>) 4105 ret void 4106} 4107 4108define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { 4109; GFX9-LABEL: test_call_external_void_func_v3i32_imm: 4110; GFX9: ; %bb.0: 4111; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4112; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 4113; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 4114; GFX9-NEXT: s_mov_b64 exec, s[34:35] 4115; GFX9-NEXT: v_writelane_b32 v40, s33, 2 4116; GFX9-NEXT: s_mov_b32 s33, s32 4117; GFX9-NEXT: s_addk_i32 s32, 0x400 4118; GFX9-NEXT: v_writelane_b32 v40, s30, 0 4119; GFX9-NEXT: v_mov_b32_e32 v0, 3 4120; GFX9-NEXT: v_mov_b32_e32 v1, 4 4121; GFX9-NEXT: v_mov_b32_e32 v2, 5 4122; GFX9-NEXT: v_writelane_b32 v40, s31, 1 4123; GFX9-NEXT: s_getpc_b64 s[34:35] 4124; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i32@rel32@lo+4 4125; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32@rel32@hi+12 4126; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 4127; GFX9-NEXT: v_readlane_b32 s31, v40, 1 4128; GFX9-NEXT: v_readlane_b32 s30, v40, 0 4129; GFX9-NEXT: s_addk_i32 s32, 0xfc00 4130; GFX9-NEXT: v_readlane_b32 s33, v40, 2 4131; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 4132; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 4133; GFX9-NEXT: s_mov_b64 exec, s[34:35] 4134; GFX9-NEXT: s_waitcnt vmcnt(0) 4135; GFX9-NEXT: s_setpc_b64 s[30:31] 4136; 4137; GFX10-LABEL: test_call_external_void_func_v3i32_imm: 4138; GFX10: ; %bb.0: 4139; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4140; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4141; GFX10-NEXT: s_or_saveexec_b32 s34, -1 4142; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 4143; GFX10-NEXT: s_waitcnt_depctr 0xffe3 4144; GFX10-NEXT: s_mov_b32 exec_lo, s34 4145; GFX10-NEXT: v_writelane_b32 v40, s33, 2 4146; GFX10-NEXT: v_mov_b32_e32 v0, 3 4147; GFX10-NEXT: v_mov_b32_e32 v1, 4 4148; GFX10-NEXT: v_mov_b32_e32 v2, 5 4149; GFX10-NEXT: s_mov_b32 s33, s32 4150; GFX10-NEXT: v_writelane_b32 v40, s30, 0 4151; GFX10-NEXT: s_addk_i32 s32, 0x200 4152; GFX10-NEXT: s_getpc_b64 s[34:35] 4153; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i32@rel32@lo+4 4154; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32@rel32@hi+12 4155; GFX10-NEXT: v_writelane_b32 v40, s31, 1 4156; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 4157; GFX10-NEXT: v_readlane_b32 s31, v40, 1 4158; GFX10-NEXT: v_readlane_b32 s30, v40, 0 4159; GFX10-NEXT: s_addk_i32 s32, 0xfe00 4160; GFX10-NEXT: v_readlane_b32 s33, v40, 2 4161; GFX10-NEXT: s_or_saveexec_b32 s34, -1 4162; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 4163; GFX10-NEXT: s_waitcnt_depctr 0xffe3 4164; GFX10-NEXT: s_mov_b32 exec_lo, s34 4165; GFX10-NEXT: s_waitcnt vmcnt(0) 4166; GFX10-NEXT: s_setpc_b64 s[30:31] 4167; 4168; GFX11-LABEL: test_call_external_void_func_v3i32_imm: 4169; GFX11: ; %bb.0: 4170; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4171; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4172; GFX11-NEXT: s_or_saveexec_b32 s0, -1 4173; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 4174; GFX11-NEXT: s_mov_b32 exec_lo, s0 4175; GFX11-NEXT: v_writelane_b32 v40, s33, 2 4176; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 4 4177; GFX11-NEXT: v_mov_b32_e32 v2, 5 4178; GFX11-NEXT: s_mov_b32 s33, s32 4179; GFX11-NEXT: v_writelane_b32 v40, s30, 0 4180; GFX11-NEXT: s_add_i32 s32, s32, 16 4181; GFX11-NEXT: s_getpc_b64 s[0:1] 4182; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i32@rel32@lo+4 4183; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32@rel32@hi+12 4184; GFX11-NEXT: v_writelane_b32 v40, s31, 1 4185; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 4186; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 4187; GFX11-NEXT: v_readlane_b32 s31, v40, 1 4188; GFX11-NEXT: v_readlane_b32 s30, v40, 0 4189; GFX11-NEXT: s_add_i32 s32, s32, -16 4190; GFX11-NEXT: v_readlane_b32 s33, v40, 2 4191; GFX11-NEXT: s_or_saveexec_b32 s0, -1 4192; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 4193; GFX11-NEXT: s_mov_b32 exec_lo, s0 4194; GFX11-NEXT: s_waitcnt vmcnt(0) 4195; GFX11-NEXT: s_setpc_b64 s[30:31] 4196; 4197; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_imm: 4198; GFX10-SCRATCH: ; %bb.0: 4199; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4200; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 4201; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 4202; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 4203; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 4204; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 4205; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 4206; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3 4207; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 4 4208; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 5 4209; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 4210; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 4211; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 4212; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 4213; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i32@rel32@lo+4 4214; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32@rel32@hi+12 4215; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 4216; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 4217; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 4218; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 4219; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 4220; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 4221; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 4222; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 4223; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 4224; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 4225; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 4226; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 4227 call amdgpu_gfx void @external_void_func_v3i32(<3 x i32> <i32 3, i32 4, i32 5>) 4228 ret void 4229} 4230 4231define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { 4232; GFX9-LABEL: test_call_external_void_func_v3i32_i32: 4233; GFX9: ; %bb.0: 4234; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4235; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 4236; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 4237; GFX9-NEXT: s_mov_b64 exec, s[34:35] 4238; GFX9-NEXT: v_writelane_b32 v40, s33, 2 4239; GFX9-NEXT: s_mov_b32 s33, s32 4240; GFX9-NEXT: s_addk_i32 s32, 0x400 4241; GFX9-NEXT: v_writelane_b32 v40, s30, 0 4242; GFX9-NEXT: v_mov_b32_e32 v0, 3 4243; GFX9-NEXT: v_mov_b32_e32 v1, 4 4244; GFX9-NEXT: v_mov_b32_e32 v2, 5 4245; GFX9-NEXT: v_mov_b32_e32 v3, 6 4246; GFX9-NEXT: v_writelane_b32 v40, s31, 1 4247; GFX9-NEXT: s_getpc_b64 s[34:35] 4248; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_i32@rel32@lo+4 4249; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_i32@rel32@hi+12 4250; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 4251; GFX9-NEXT: v_readlane_b32 s31, v40, 1 4252; GFX9-NEXT: v_readlane_b32 s30, v40, 0 4253; GFX9-NEXT: s_addk_i32 s32, 0xfc00 4254; GFX9-NEXT: v_readlane_b32 s33, v40, 2 4255; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 4256; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 4257; GFX9-NEXT: s_mov_b64 exec, s[34:35] 4258; GFX9-NEXT: s_waitcnt vmcnt(0) 4259; GFX9-NEXT: s_setpc_b64 s[30:31] 4260; 4261; GFX10-LABEL: test_call_external_void_func_v3i32_i32: 4262; GFX10: ; %bb.0: 4263; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4264; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4265; GFX10-NEXT: s_or_saveexec_b32 s34, -1 4266; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 4267; GFX10-NEXT: s_waitcnt_depctr 0xffe3 4268; GFX10-NEXT: s_mov_b32 exec_lo, s34 4269; GFX10-NEXT: v_writelane_b32 v40, s33, 2 4270; GFX10-NEXT: v_mov_b32_e32 v0, 3 4271; GFX10-NEXT: v_mov_b32_e32 v1, 4 4272; GFX10-NEXT: v_mov_b32_e32 v2, 5 4273; GFX10-NEXT: v_mov_b32_e32 v3, 6 4274; GFX10-NEXT: v_writelane_b32 v40, s30, 0 4275; GFX10-NEXT: s_mov_b32 s33, s32 4276; GFX10-NEXT: s_addk_i32 s32, 0x200 4277; GFX10-NEXT: s_getpc_b64 s[34:35] 4278; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_i32@rel32@lo+4 4279; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_i32@rel32@hi+12 4280; GFX10-NEXT: v_writelane_b32 v40, s31, 1 4281; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 4282; GFX10-NEXT: v_readlane_b32 s31, v40, 1 4283; GFX10-NEXT: v_readlane_b32 s30, v40, 0 4284; GFX10-NEXT: s_addk_i32 s32, 0xfe00 4285; GFX10-NEXT: v_readlane_b32 s33, v40, 2 4286; GFX10-NEXT: s_or_saveexec_b32 s34, -1 4287; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 4288; GFX10-NEXT: s_waitcnt_depctr 0xffe3 4289; GFX10-NEXT: s_mov_b32 exec_lo, s34 4290; GFX10-NEXT: s_waitcnt vmcnt(0) 4291; GFX10-NEXT: s_setpc_b64 s[30:31] 4292; 4293; GFX11-LABEL: test_call_external_void_func_v3i32_i32: 4294; GFX11: ; %bb.0: 4295; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4296; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4297; GFX11-NEXT: s_or_saveexec_b32 s0, -1 4298; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 4299; GFX11-NEXT: s_mov_b32 exec_lo, s0 4300; GFX11-NEXT: v_writelane_b32 v40, s33, 2 4301; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 4 4302; GFX11-NEXT: v_dual_mov_b32 v2, 5 :: v_dual_mov_b32 v3, 6 4303; GFX11-NEXT: v_writelane_b32 v40, s30, 0 4304; GFX11-NEXT: s_mov_b32 s33, s32 4305; GFX11-NEXT: s_add_i32 s32, s32, 16 4306; GFX11-NEXT: s_getpc_b64 s[0:1] 4307; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i32_i32@rel32@lo+4 4308; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_i32@rel32@hi+12 4309; GFX11-NEXT: v_writelane_b32 v40, s31, 1 4310; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 4311; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 4312; GFX11-NEXT: v_readlane_b32 s31, v40, 1 4313; GFX11-NEXT: v_readlane_b32 s30, v40, 0 4314; GFX11-NEXT: s_add_i32 s32, s32, -16 4315; GFX11-NEXT: v_readlane_b32 s33, v40, 2 4316; GFX11-NEXT: s_or_saveexec_b32 s0, -1 4317; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 4318; GFX11-NEXT: s_mov_b32 exec_lo, s0 4319; GFX11-NEXT: s_waitcnt vmcnt(0) 4320; GFX11-NEXT: s_setpc_b64 s[30:31] 4321; 4322; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_i32: 4323; GFX10-SCRATCH: ; %bb.0: 4324; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4325; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 4326; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 4327; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 4328; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 4329; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 4330; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 4331; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3 4332; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 4 4333; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 5 4334; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 6 4335; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 4336; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 4337; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 4338; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 4339; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i32_i32@rel32@lo+4 4340; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_i32@rel32@hi+12 4341; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 4342; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 4343; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 4344; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 4345; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 4346; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 4347; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 4348; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 4349; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 4350; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 4351; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 4352; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 4353 call amdgpu_gfx void @external_void_func_v3i32_i32(<3 x i32> <i32 3, i32 4, i32 5>, i32 6) 4354 ret void 4355} 4356 4357define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { 4358; GFX9-LABEL: test_call_external_void_func_v4i32: 4359; GFX9: ; %bb.0: 4360; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4361; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 4362; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 4363; GFX9-NEXT: s_mov_b64 exec, s[34:35] 4364; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 4365; GFX9-NEXT: v_writelane_b32 v40, s33, 2 4366; GFX9-NEXT: s_mov_b32 s33, s32 4367; GFX9-NEXT: s_addk_i32 s32, 0x400 4368; GFX9-NEXT: v_writelane_b32 v40, s30, 0 4369; GFX9-NEXT: v_writelane_b32 v40, s31, 1 4370; GFX9-NEXT: s_getpc_b64 s[34:35] 4371; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4 4372; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12 4373; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 4374; GFX9-NEXT: v_readlane_b32 s31, v40, 1 4375; GFX9-NEXT: v_readlane_b32 s30, v40, 0 4376; GFX9-NEXT: s_addk_i32 s32, 0xfc00 4377; GFX9-NEXT: v_readlane_b32 s33, v40, 2 4378; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 4379; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 4380; GFX9-NEXT: s_mov_b64 exec, s[34:35] 4381; GFX9-NEXT: s_waitcnt vmcnt(0) 4382; GFX9-NEXT: s_setpc_b64 s[30:31] 4383; 4384; GFX10-LABEL: test_call_external_void_func_v4i32: 4385; GFX10: ; %bb.0: 4386; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4387; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4388; GFX10-NEXT: s_or_saveexec_b32 s34, -1 4389; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 4390; GFX10-NEXT: s_waitcnt_depctr 0xffe3 4391; GFX10-NEXT: s_mov_b32 exec_lo, s34 4392; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 4393; GFX10-NEXT: v_writelane_b32 v40, s33, 2 4394; GFX10-NEXT: s_mov_b32 s33, s32 4395; GFX10-NEXT: s_addk_i32 s32, 0x200 4396; GFX10-NEXT: s_getpc_b64 s[34:35] 4397; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4 4398; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12 4399; GFX10-NEXT: v_writelane_b32 v40, s30, 0 4400; GFX10-NEXT: v_writelane_b32 v40, s31, 1 4401; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 4402; GFX10-NEXT: v_readlane_b32 s31, v40, 1 4403; GFX10-NEXT: v_readlane_b32 s30, v40, 0 4404; GFX10-NEXT: s_addk_i32 s32, 0xfe00 4405; GFX10-NEXT: v_readlane_b32 s33, v40, 2 4406; GFX10-NEXT: s_or_saveexec_b32 s34, -1 4407; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 4408; GFX10-NEXT: s_waitcnt_depctr 0xffe3 4409; GFX10-NEXT: s_mov_b32 exec_lo, s34 4410; GFX10-NEXT: s_waitcnt vmcnt(0) 4411; GFX10-NEXT: s_setpc_b64 s[30:31] 4412; 4413; GFX11-LABEL: test_call_external_void_func_v4i32: 4414; GFX11: ; %bb.0: 4415; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4416; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4417; GFX11-NEXT: s_or_saveexec_b32 s0, -1 4418; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 4419; GFX11-NEXT: s_mov_b32 exec_lo, s0 4420; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off 4421; GFX11-NEXT: v_writelane_b32 v40, s33, 2 4422; GFX11-NEXT: s_mov_b32 s33, s32 4423; GFX11-NEXT: s_add_i32 s32, s32, 16 4424; GFX11-NEXT: s_getpc_b64 s[0:1] 4425; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i32@rel32@lo+4 4426; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32@rel32@hi+12 4427; GFX11-NEXT: v_writelane_b32 v40, s30, 0 4428; GFX11-NEXT: v_writelane_b32 v40, s31, 1 4429; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 4430; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 4431; GFX11-NEXT: v_readlane_b32 s31, v40, 1 4432; GFX11-NEXT: v_readlane_b32 s30, v40, 0 4433; GFX11-NEXT: s_add_i32 s32, s32, -16 4434; GFX11-NEXT: v_readlane_b32 s33, v40, 2 4435; GFX11-NEXT: s_or_saveexec_b32 s0, -1 4436; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 4437; GFX11-NEXT: s_mov_b32 exec_lo, s0 4438; GFX11-NEXT: s_waitcnt vmcnt(0) 4439; GFX11-NEXT: s_setpc_b64 s[30:31] 4440; 4441; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32: 4442; GFX10-SCRATCH: ; %bb.0: 4443; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4444; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 4445; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 4446; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 4447; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 4448; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 4449; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off 4450; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 4451; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 4452; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 4453; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 4454; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i32@rel32@lo+4 4455; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32@rel32@hi+12 4456; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 4457; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 4458; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 4459; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 4460; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 4461; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 4462; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 4463; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 4464; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 4465; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 4466; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 4467; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 4468; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 4469 %val = load <4 x i32>, <4 x i32> addrspace(1)* undef 4470 call amdgpu_gfx void @external_void_func_v4i32(<4 x i32> %val) 4471 ret void 4472} 4473 4474define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { 4475; GFX9-LABEL: test_call_external_void_func_v4i32_imm: 4476; GFX9: ; %bb.0: 4477; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4478; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 4479; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 4480; GFX9-NEXT: s_mov_b64 exec, s[34:35] 4481; GFX9-NEXT: v_writelane_b32 v40, s33, 2 4482; GFX9-NEXT: s_mov_b32 s33, s32 4483; GFX9-NEXT: s_addk_i32 s32, 0x400 4484; GFX9-NEXT: v_writelane_b32 v40, s30, 0 4485; GFX9-NEXT: v_mov_b32_e32 v0, 1 4486; GFX9-NEXT: v_mov_b32_e32 v1, 2 4487; GFX9-NEXT: v_mov_b32_e32 v2, 3 4488; GFX9-NEXT: v_mov_b32_e32 v3, 4 4489; GFX9-NEXT: v_writelane_b32 v40, s31, 1 4490; GFX9-NEXT: s_getpc_b64 s[34:35] 4491; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4 4492; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12 4493; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 4494; GFX9-NEXT: v_readlane_b32 s31, v40, 1 4495; GFX9-NEXT: v_readlane_b32 s30, v40, 0 4496; GFX9-NEXT: s_addk_i32 s32, 0xfc00 4497; GFX9-NEXT: v_readlane_b32 s33, v40, 2 4498; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 4499; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 4500; GFX9-NEXT: s_mov_b64 exec, s[34:35] 4501; GFX9-NEXT: s_waitcnt vmcnt(0) 4502; GFX9-NEXT: s_setpc_b64 s[30:31] 4503; 4504; GFX10-LABEL: test_call_external_void_func_v4i32_imm: 4505; GFX10: ; %bb.0: 4506; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4507; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4508; GFX10-NEXT: s_or_saveexec_b32 s34, -1 4509; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 4510; GFX10-NEXT: s_waitcnt_depctr 0xffe3 4511; GFX10-NEXT: s_mov_b32 exec_lo, s34 4512; GFX10-NEXT: v_writelane_b32 v40, s33, 2 4513; GFX10-NEXT: v_mov_b32_e32 v0, 1 4514; GFX10-NEXT: v_mov_b32_e32 v1, 2 4515; GFX10-NEXT: v_mov_b32_e32 v2, 3 4516; GFX10-NEXT: v_mov_b32_e32 v3, 4 4517; GFX10-NEXT: v_writelane_b32 v40, s30, 0 4518; GFX10-NEXT: s_mov_b32 s33, s32 4519; GFX10-NEXT: s_addk_i32 s32, 0x200 4520; GFX10-NEXT: s_getpc_b64 s[34:35] 4521; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4 4522; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12 4523; GFX10-NEXT: v_writelane_b32 v40, s31, 1 4524; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 4525; GFX10-NEXT: v_readlane_b32 s31, v40, 1 4526; GFX10-NEXT: v_readlane_b32 s30, v40, 0 4527; GFX10-NEXT: s_addk_i32 s32, 0xfe00 4528; GFX10-NEXT: v_readlane_b32 s33, v40, 2 4529; GFX10-NEXT: s_or_saveexec_b32 s34, -1 4530; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 4531; GFX10-NEXT: s_waitcnt_depctr 0xffe3 4532; GFX10-NEXT: s_mov_b32 exec_lo, s34 4533; GFX10-NEXT: s_waitcnt vmcnt(0) 4534; GFX10-NEXT: s_setpc_b64 s[30:31] 4535; 4536; GFX11-LABEL: test_call_external_void_func_v4i32_imm: 4537; GFX11: ; %bb.0: 4538; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4539; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4540; GFX11-NEXT: s_or_saveexec_b32 s0, -1 4541; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 4542; GFX11-NEXT: s_mov_b32 exec_lo, s0 4543; GFX11-NEXT: v_writelane_b32 v40, s33, 2 4544; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 4545; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 4546; GFX11-NEXT: v_writelane_b32 v40, s30, 0 4547; GFX11-NEXT: s_mov_b32 s33, s32 4548; GFX11-NEXT: s_add_i32 s32, s32, 16 4549; GFX11-NEXT: s_getpc_b64 s[0:1] 4550; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i32@rel32@lo+4 4551; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32@rel32@hi+12 4552; GFX11-NEXT: v_writelane_b32 v40, s31, 1 4553; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 4554; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 4555; GFX11-NEXT: v_readlane_b32 s31, v40, 1 4556; GFX11-NEXT: v_readlane_b32 s30, v40, 0 4557; GFX11-NEXT: s_add_i32 s32, s32, -16 4558; GFX11-NEXT: v_readlane_b32 s33, v40, 2 4559; GFX11-NEXT: s_or_saveexec_b32 s0, -1 4560; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 4561; GFX11-NEXT: s_mov_b32 exec_lo, s0 4562; GFX11-NEXT: s_waitcnt vmcnt(0) 4563; GFX11-NEXT: s_setpc_b64 s[30:31] 4564; 4565; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32_imm: 4566; GFX10-SCRATCH: ; %bb.0: 4567; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4568; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 4569; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 4570; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 4571; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 4572; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 4573; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 4574; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 4575; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2 4576; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3 4577; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4 4578; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 4579; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 4580; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 4581; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 4582; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i32@rel32@lo+4 4583; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32@rel32@hi+12 4584; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 4585; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 4586; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 4587; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 4588; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 4589; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 4590; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 4591; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 4592; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 4593; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 4594; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 4595; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 4596 call amdgpu_gfx void @external_void_func_v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>) 4597 ret void 4598} 4599 4600define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { 4601; GFX9-LABEL: test_call_external_void_func_v5i32_imm: 4602; GFX9: ; %bb.0: 4603; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4604; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 4605; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 4606; GFX9-NEXT: s_mov_b64 exec, s[34:35] 4607; GFX9-NEXT: v_writelane_b32 v40, s33, 2 4608; GFX9-NEXT: s_mov_b32 s33, s32 4609; GFX9-NEXT: s_addk_i32 s32, 0x400 4610; GFX9-NEXT: v_writelane_b32 v40, s30, 0 4611; GFX9-NEXT: v_mov_b32_e32 v0, 1 4612; GFX9-NEXT: v_mov_b32_e32 v1, 2 4613; GFX9-NEXT: v_mov_b32_e32 v2, 3 4614; GFX9-NEXT: v_mov_b32_e32 v3, 4 4615; GFX9-NEXT: v_mov_b32_e32 v4, 5 4616; GFX9-NEXT: v_writelane_b32 v40, s31, 1 4617; GFX9-NEXT: s_getpc_b64 s[34:35] 4618; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5i32@rel32@lo+4 4619; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5i32@rel32@hi+12 4620; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 4621; GFX9-NEXT: v_readlane_b32 s31, v40, 1 4622; GFX9-NEXT: v_readlane_b32 s30, v40, 0 4623; GFX9-NEXT: s_addk_i32 s32, 0xfc00 4624; GFX9-NEXT: v_readlane_b32 s33, v40, 2 4625; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 4626; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 4627; GFX9-NEXT: s_mov_b64 exec, s[34:35] 4628; GFX9-NEXT: s_waitcnt vmcnt(0) 4629; GFX9-NEXT: s_setpc_b64 s[30:31] 4630; 4631; GFX10-LABEL: test_call_external_void_func_v5i32_imm: 4632; GFX10: ; %bb.0: 4633; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4634; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4635; GFX10-NEXT: s_or_saveexec_b32 s34, -1 4636; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 4637; GFX10-NEXT: s_waitcnt_depctr 0xffe3 4638; GFX10-NEXT: s_mov_b32 exec_lo, s34 4639; GFX10-NEXT: v_writelane_b32 v40, s33, 2 4640; GFX10-NEXT: v_mov_b32_e32 v0, 1 4641; GFX10-NEXT: v_mov_b32_e32 v1, 2 4642; GFX10-NEXT: v_mov_b32_e32 v2, 3 4643; GFX10-NEXT: v_mov_b32_e32 v3, 4 4644; GFX10-NEXT: v_writelane_b32 v40, s30, 0 4645; GFX10-NEXT: v_mov_b32_e32 v4, 5 4646; GFX10-NEXT: s_mov_b32 s33, s32 4647; GFX10-NEXT: s_addk_i32 s32, 0x200 4648; GFX10-NEXT: s_getpc_b64 s[34:35] 4649; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5i32@rel32@lo+4 4650; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5i32@rel32@hi+12 4651; GFX10-NEXT: v_writelane_b32 v40, s31, 1 4652; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 4653; GFX10-NEXT: v_readlane_b32 s31, v40, 1 4654; GFX10-NEXT: v_readlane_b32 s30, v40, 0 4655; GFX10-NEXT: s_addk_i32 s32, 0xfe00 4656; GFX10-NEXT: v_readlane_b32 s33, v40, 2 4657; GFX10-NEXT: s_or_saveexec_b32 s34, -1 4658; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 4659; GFX10-NEXT: s_waitcnt_depctr 0xffe3 4660; GFX10-NEXT: s_mov_b32 exec_lo, s34 4661; GFX10-NEXT: s_waitcnt vmcnt(0) 4662; GFX10-NEXT: s_setpc_b64 s[30:31] 4663; 4664; GFX11-LABEL: test_call_external_void_func_v5i32_imm: 4665; GFX11: ; %bb.0: 4666; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4667; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4668; GFX11-NEXT: s_or_saveexec_b32 s0, -1 4669; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 4670; GFX11-NEXT: s_mov_b32 exec_lo, s0 4671; GFX11-NEXT: v_writelane_b32 v40, s33, 2 4672; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 4673; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 4674; GFX11-NEXT: v_writelane_b32 v40, s30, 0 4675; GFX11-NEXT: v_mov_b32_e32 v4, 5 4676; GFX11-NEXT: s_mov_b32 s33, s32 4677; GFX11-NEXT: s_add_i32 s32, s32, 16 4678; GFX11-NEXT: s_getpc_b64 s[0:1] 4679; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v5i32@rel32@lo+4 4680; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v5i32@rel32@hi+12 4681; GFX11-NEXT: v_writelane_b32 v40, s31, 1 4682; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 4683; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 4684; GFX11-NEXT: v_readlane_b32 s31, v40, 1 4685; GFX11-NEXT: v_readlane_b32 s30, v40, 0 4686; GFX11-NEXT: s_add_i32 s32, s32, -16 4687; GFX11-NEXT: v_readlane_b32 s33, v40, 2 4688; GFX11-NEXT: s_or_saveexec_b32 s0, -1 4689; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 4690; GFX11-NEXT: s_mov_b32 exec_lo, s0 4691; GFX11-NEXT: s_waitcnt vmcnt(0) 4692; GFX11-NEXT: s_setpc_b64 s[30:31] 4693; 4694; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5i32_imm: 4695; GFX10-SCRATCH: ; %bb.0: 4696; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4697; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 4698; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 4699; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 4700; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 4701; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 4702; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 4703; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 4704; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2 4705; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3 4706; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4 4707; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 4708; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 5 4709; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 4710; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 4711; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 4712; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v5i32@rel32@lo+4 4713; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5i32@rel32@hi+12 4714; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 4715; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 4716; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 4717; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 4718; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 4719; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 4720; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 4721; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 4722; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 4723; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 4724; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 4725; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 4726 call amdgpu_gfx void @external_void_func_v5i32(<5 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5>) 4727 ret void 4728} 4729 4730define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { 4731; GFX9-LABEL: test_call_external_void_func_v8i32: 4732; GFX9: ; %bb.0: 4733; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4734; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 4735; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 4736; GFX9-NEXT: s_mov_b64 exec, s[34:35] 4737; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 4738; GFX9-NEXT: v_mov_b32_e32 v8, 0 4739; GFX9-NEXT: v_writelane_b32 v40, s33, 2 4740; GFX9-NEXT: s_mov_b32 s33, s32 4741; GFX9-NEXT: s_addk_i32 s32, 0x400 4742; GFX9-NEXT: s_waitcnt lgkmcnt(0) 4743; GFX9-NEXT: global_load_dwordx4 v[0:3], v8, s[34:35] 4744; GFX9-NEXT: global_load_dwordx4 v[4:7], v8, s[34:35] offset:16 4745; GFX9-NEXT: v_writelane_b32 v40, s30, 0 4746; GFX9-NEXT: v_writelane_b32 v40, s31, 1 4747; GFX9-NEXT: s_getpc_b64 s[34:35] 4748; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4 4749; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12 4750; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 4751; GFX9-NEXT: v_readlane_b32 s31, v40, 1 4752; GFX9-NEXT: v_readlane_b32 s30, v40, 0 4753; GFX9-NEXT: s_addk_i32 s32, 0xfc00 4754; GFX9-NEXT: v_readlane_b32 s33, v40, 2 4755; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 4756; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 4757; GFX9-NEXT: s_mov_b64 exec, s[34:35] 4758; GFX9-NEXT: s_waitcnt vmcnt(0) 4759; GFX9-NEXT: s_setpc_b64 s[30:31] 4760; 4761; GFX10-LABEL: test_call_external_void_func_v8i32: 4762; GFX10: ; %bb.0: 4763; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4764; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4765; GFX10-NEXT: s_or_saveexec_b32 s34, -1 4766; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 4767; GFX10-NEXT: s_waitcnt_depctr 0xffe3 4768; GFX10-NEXT: s_mov_b32 exec_lo, s34 4769; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 4770; GFX10-NEXT: v_mov_b32_e32 v8, 0 4771; GFX10-NEXT: v_writelane_b32 v40, s33, 2 4772; GFX10-NEXT: s_mov_b32 s33, s32 4773; GFX10-NEXT: s_addk_i32 s32, 0x200 4774; GFX10-NEXT: s_waitcnt lgkmcnt(0) 4775; GFX10-NEXT: s_clause 0x1 4776; GFX10-NEXT: global_load_dwordx4 v[0:3], v8, s[34:35] 4777; GFX10-NEXT: global_load_dwordx4 v[4:7], v8, s[34:35] offset:16 4778; GFX10-NEXT: v_writelane_b32 v40, s30, 0 4779; GFX10-NEXT: s_getpc_b64 s[34:35] 4780; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4 4781; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12 4782; GFX10-NEXT: v_writelane_b32 v40, s31, 1 4783; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 4784; GFX10-NEXT: v_readlane_b32 s31, v40, 1 4785; GFX10-NEXT: v_readlane_b32 s30, v40, 0 4786; GFX10-NEXT: s_addk_i32 s32, 0xfe00 4787; GFX10-NEXT: v_readlane_b32 s33, v40, 2 4788; GFX10-NEXT: s_or_saveexec_b32 s34, -1 4789; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 4790; GFX10-NEXT: s_waitcnt_depctr 0xffe3 4791; GFX10-NEXT: s_mov_b32 exec_lo, s34 4792; GFX10-NEXT: s_waitcnt vmcnt(0) 4793; GFX10-NEXT: s_setpc_b64 s[30:31] 4794; 4795; GFX11-LABEL: test_call_external_void_func_v8i32: 4796; GFX11: ; %bb.0: 4797; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4798; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4799; GFX11-NEXT: s_or_saveexec_b32 s0, -1 4800; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 4801; GFX11-NEXT: s_mov_b32 exec_lo, s0 4802; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 4803; GFX11-NEXT: v_mov_b32_e32 v4, 0 4804; GFX11-NEXT: v_writelane_b32 v40, s33, 2 4805; GFX11-NEXT: s_mov_b32 s33, s32 4806; GFX11-NEXT: s_add_i32 s32, s32, 16 4807; GFX11-NEXT: s_waitcnt lgkmcnt(0) 4808; GFX11-NEXT: s_clause 0x1 4809; GFX11-NEXT: global_load_b128 v[0:3], v4, s[0:1] 4810; GFX11-NEXT: global_load_b128 v[4:7], v4, s[0:1] offset:16 4811; GFX11-NEXT: v_writelane_b32 v40, s30, 0 4812; GFX11-NEXT: s_getpc_b64 s[0:1] 4813; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v8i32@rel32@lo+4 4814; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12 4815; GFX11-NEXT: v_writelane_b32 v40, s31, 1 4816; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 4817; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 4818; GFX11-NEXT: v_readlane_b32 s31, v40, 1 4819; GFX11-NEXT: v_readlane_b32 s30, v40, 0 4820; GFX11-NEXT: s_add_i32 s32, s32, -16 4821; GFX11-NEXT: v_readlane_b32 s33, v40, 2 4822; GFX11-NEXT: s_or_saveexec_b32 s0, -1 4823; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 4824; GFX11-NEXT: s_mov_b32 exec_lo, s0 4825; GFX11-NEXT: s_waitcnt vmcnt(0) 4826; GFX11-NEXT: s_setpc_b64 s[30:31] 4827; 4828; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32: 4829; GFX10-SCRATCH: ; %bb.0: 4830; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4831; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 4832; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 4833; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 4834; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 4835; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 4836; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 4837; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, 0 4838; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 4839; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 4840; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 4841; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) 4842; GFX10-SCRATCH-NEXT: s_clause 0x1 4843; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v8, s[0:1] 4844; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[4:7], v8, s[0:1] offset:16 4845; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 4846; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 4847; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32@rel32@lo+4 4848; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12 4849; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 4850; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 4851; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 4852; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 4853; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 4854; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 4855; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 4856; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 4857; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 4858; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 4859; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 4860; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 4861 %ptr = load <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef 4862 %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr 4863 call amdgpu_gfx void @external_void_func_v8i32(<8 x i32> %val) 4864 ret void 4865} 4866 4867define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { 4868; GFX9-LABEL: test_call_external_void_func_v8i32_imm: 4869; GFX9: ; %bb.0: 4870; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4871; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 4872; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 4873; GFX9-NEXT: s_mov_b64 exec, s[34:35] 4874; GFX9-NEXT: v_writelane_b32 v40, s33, 2 4875; GFX9-NEXT: s_mov_b32 s33, s32 4876; GFX9-NEXT: s_addk_i32 s32, 0x400 4877; GFX9-NEXT: v_writelane_b32 v40, s30, 0 4878; GFX9-NEXT: v_mov_b32_e32 v0, 1 4879; GFX9-NEXT: v_mov_b32_e32 v1, 2 4880; GFX9-NEXT: v_mov_b32_e32 v2, 3 4881; GFX9-NEXT: v_mov_b32_e32 v3, 4 4882; GFX9-NEXT: v_mov_b32_e32 v4, 5 4883; GFX9-NEXT: v_mov_b32_e32 v5, 6 4884; GFX9-NEXT: v_mov_b32_e32 v6, 7 4885; GFX9-NEXT: v_mov_b32_e32 v7, 8 4886; GFX9-NEXT: v_writelane_b32 v40, s31, 1 4887; GFX9-NEXT: s_getpc_b64 s[34:35] 4888; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4 4889; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12 4890; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 4891; GFX9-NEXT: v_readlane_b32 s31, v40, 1 4892; GFX9-NEXT: v_readlane_b32 s30, v40, 0 4893; GFX9-NEXT: s_addk_i32 s32, 0xfc00 4894; GFX9-NEXT: v_readlane_b32 s33, v40, 2 4895; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 4896; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 4897; GFX9-NEXT: s_mov_b64 exec, s[34:35] 4898; GFX9-NEXT: s_waitcnt vmcnt(0) 4899; GFX9-NEXT: s_setpc_b64 s[30:31] 4900; 4901; GFX10-LABEL: test_call_external_void_func_v8i32_imm: 4902; GFX10: ; %bb.0: 4903; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4904; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4905; GFX10-NEXT: s_or_saveexec_b32 s34, -1 4906; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 4907; GFX10-NEXT: s_waitcnt_depctr 0xffe3 4908; GFX10-NEXT: s_mov_b32 exec_lo, s34 4909; GFX10-NEXT: v_writelane_b32 v40, s33, 2 4910; GFX10-NEXT: v_mov_b32_e32 v0, 1 4911; GFX10-NEXT: v_mov_b32_e32 v1, 2 4912; GFX10-NEXT: v_mov_b32_e32 v2, 3 4913; GFX10-NEXT: v_mov_b32_e32 v3, 4 4914; GFX10-NEXT: v_writelane_b32 v40, s30, 0 4915; GFX10-NEXT: v_mov_b32_e32 v4, 5 4916; GFX10-NEXT: v_mov_b32_e32 v5, 6 4917; GFX10-NEXT: v_mov_b32_e32 v6, 7 4918; GFX10-NEXT: v_mov_b32_e32 v7, 8 4919; GFX10-NEXT: s_mov_b32 s33, s32 4920; GFX10-NEXT: s_addk_i32 s32, 0x200 4921; GFX10-NEXT: v_writelane_b32 v40, s31, 1 4922; GFX10-NEXT: s_getpc_b64 s[34:35] 4923; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4 4924; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12 4925; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 4926; GFX10-NEXT: v_readlane_b32 s31, v40, 1 4927; GFX10-NEXT: v_readlane_b32 s30, v40, 0 4928; GFX10-NEXT: s_addk_i32 s32, 0xfe00 4929; GFX10-NEXT: v_readlane_b32 s33, v40, 2 4930; GFX10-NEXT: s_or_saveexec_b32 s34, -1 4931; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 4932; GFX10-NEXT: s_waitcnt_depctr 0xffe3 4933; GFX10-NEXT: s_mov_b32 exec_lo, s34 4934; GFX10-NEXT: s_waitcnt vmcnt(0) 4935; GFX10-NEXT: s_setpc_b64 s[30:31] 4936; 4937; GFX11-LABEL: test_call_external_void_func_v8i32_imm: 4938; GFX11: ; %bb.0: 4939; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4940; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4941; GFX11-NEXT: s_or_saveexec_b32 s0, -1 4942; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 4943; GFX11-NEXT: s_mov_b32 exec_lo, s0 4944; GFX11-NEXT: v_writelane_b32 v40, s33, 2 4945; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 4946; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 4947; GFX11-NEXT: v_writelane_b32 v40, s30, 0 4948; GFX11-NEXT: v_dual_mov_b32 v4, 5 :: v_dual_mov_b32 v5, 6 4949; GFX11-NEXT: v_dual_mov_b32 v6, 7 :: v_dual_mov_b32 v7, 8 4950; GFX11-NEXT: s_mov_b32 s33, s32 4951; GFX11-NEXT: s_add_i32 s32, s32, 16 4952; GFX11-NEXT: v_writelane_b32 v40, s31, 1 4953; GFX11-NEXT: s_getpc_b64 s[0:1] 4954; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v8i32@rel32@lo+4 4955; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12 4956; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 4957; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 4958; GFX11-NEXT: v_readlane_b32 s31, v40, 1 4959; GFX11-NEXT: v_readlane_b32 s30, v40, 0 4960; GFX11-NEXT: s_add_i32 s32, s32, -16 4961; GFX11-NEXT: v_readlane_b32 s33, v40, 2 4962; GFX11-NEXT: s_or_saveexec_b32 s0, -1 4963; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 4964; GFX11-NEXT: s_mov_b32 exec_lo, s0 4965; GFX11-NEXT: s_waitcnt vmcnt(0) 4966; GFX11-NEXT: s_setpc_b64 s[30:31] 4967; 4968; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32_imm: 4969; GFX10-SCRATCH: ; %bb.0: 4970; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4971; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 4972; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 4973; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 4974; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 4975; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 4976; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 4977; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 4978; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2 4979; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3 4980; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4 4981; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 4982; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 5 4983; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 6 4984; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 7 4985; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 8 4986; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 4987; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 4988; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 4989; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 4990; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32@rel32@lo+4 4991; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12 4992; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 4993; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 4994; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 4995; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 4996; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 4997; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 4998; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 4999; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 5000; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 5001; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 5002; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 5003 call amdgpu_gfx void @external_void_func_v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>) 5004 ret void 5005} 5006 5007define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { 5008; GFX9-LABEL: test_call_external_void_func_v16i32: 5009; GFX9: ; %bb.0: 5010; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5011; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 5012; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 5013; GFX9-NEXT: s_mov_b64 exec, s[34:35] 5014; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 5015; GFX9-NEXT: v_mov_b32_e32 v16, 0 5016; GFX9-NEXT: v_writelane_b32 v40, s33, 2 5017; GFX9-NEXT: s_mov_b32 s33, s32 5018; GFX9-NEXT: s_addk_i32 s32, 0x400 5019; GFX9-NEXT: s_waitcnt lgkmcnt(0) 5020; GFX9-NEXT: global_load_dwordx4 v[0:3], v16, s[34:35] 5021; GFX9-NEXT: global_load_dwordx4 v[4:7], v16, s[34:35] offset:16 5022; GFX9-NEXT: global_load_dwordx4 v[8:11], v16, s[34:35] offset:32 5023; GFX9-NEXT: global_load_dwordx4 v[12:15], v16, s[34:35] offset:48 5024; GFX9-NEXT: v_writelane_b32 v40, s30, 0 5025; GFX9-NEXT: v_writelane_b32 v40, s31, 1 5026; GFX9-NEXT: s_getpc_b64 s[34:35] 5027; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v16i32@rel32@lo+4 5028; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32@rel32@hi+12 5029; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 5030; GFX9-NEXT: v_readlane_b32 s31, v40, 1 5031; GFX9-NEXT: v_readlane_b32 s30, v40, 0 5032; GFX9-NEXT: s_addk_i32 s32, 0xfc00 5033; GFX9-NEXT: v_readlane_b32 s33, v40, 2 5034; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 5035; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 5036; GFX9-NEXT: s_mov_b64 exec, s[34:35] 5037; GFX9-NEXT: s_waitcnt vmcnt(0) 5038; GFX9-NEXT: s_setpc_b64 s[30:31] 5039; 5040; GFX10-LABEL: test_call_external_void_func_v16i32: 5041; GFX10: ; %bb.0: 5042; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5043; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 5044; GFX10-NEXT: s_or_saveexec_b32 s34, -1 5045; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 5046; GFX10-NEXT: s_waitcnt_depctr 0xffe3 5047; GFX10-NEXT: s_mov_b32 exec_lo, s34 5048; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 5049; GFX10-NEXT: v_mov_b32_e32 v16, 0 5050; GFX10-NEXT: v_writelane_b32 v40, s33, 2 5051; GFX10-NEXT: s_mov_b32 s33, s32 5052; GFX10-NEXT: s_addk_i32 s32, 0x200 5053; GFX10-NEXT: s_waitcnt lgkmcnt(0) 5054; GFX10-NEXT: s_clause 0x3 5055; GFX10-NEXT: global_load_dwordx4 v[0:3], v16, s[34:35] 5056; GFX10-NEXT: global_load_dwordx4 v[4:7], v16, s[34:35] offset:16 5057; GFX10-NEXT: global_load_dwordx4 v[8:11], v16, s[34:35] offset:32 5058; GFX10-NEXT: global_load_dwordx4 v[12:15], v16, s[34:35] offset:48 5059; GFX10-NEXT: v_writelane_b32 v40, s30, 0 5060; GFX10-NEXT: s_getpc_b64 s[34:35] 5061; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v16i32@rel32@lo+4 5062; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32@rel32@hi+12 5063; GFX10-NEXT: v_writelane_b32 v40, s31, 1 5064; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 5065; GFX10-NEXT: v_readlane_b32 s31, v40, 1 5066; GFX10-NEXT: v_readlane_b32 s30, v40, 0 5067; GFX10-NEXT: s_addk_i32 s32, 0xfe00 5068; GFX10-NEXT: v_readlane_b32 s33, v40, 2 5069; GFX10-NEXT: s_or_saveexec_b32 s34, -1 5070; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 5071; GFX10-NEXT: s_waitcnt_depctr 0xffe3 5072; GFX10-NEXT: s_mov_b32 exec_lo, s34 5073; GFX10-NEXT: s_waitcnt vmcnt(0) 5074; GFX10-NEXT: s_setpc_b64 s[30:31] 5075; 5076; GFX11-LABEL: test_call_external_void_func_v16i32: 5077; GFX11: ; %bb.0: 5078; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5079; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 5080; GFX11-NEXT: s_or_saveexec_b32 s0, -1 5081; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 5082; GFX11-NEXT: s_mov_b32 exec_lo, s0 5083; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 5084; GFX11-NEXT: v_mov_b32_e32 v12, 0 5085; GFX11-NEXT: v_writelane_b32 v40, s33, 2 5086; GFX11-NEXT: s_mov_b32 s33, s32 5087; GFX11-NEXT: s_add_i32 s32, s32, 16 5088; GFX11-NEXT: s_waitcnt lgkmcnt(0) 5089; GFX11-NEXT: s_clause 0x3 5090; GFX11-NEXT: global_load_b128 v[0:3], v12, s[0:1] 5091; GFX11-NEXT: global_load_b128 v[4:7], v12, s[0:1] offset:16 5092; GFX11-NEXT: global_load_b128 v[8:11], v12, s[0:1] offset:32 5093; GFX11-NEXT: global_load_b128 v[12:15], v12, s[0:1] offset:48 5094; GFX11-NEXT: v_writelane_b32 v40, s30, 0 5095; GFX11-NEXT: s_getpc_b64 s[0:1] 5096; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v16i32@rel32@lo+4 5097; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32@rel32@hi+12 5098; GFX11-NEXT: v_writelane_b32 v40, s31, 1 5099; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 5100; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 5101; GFX11-NEXT: v_readlane_b32 s31, v40, 1 5102; GFX11-NEXT: v_readlane_b32 s30, v40, 0 5103; GFX11-NEXT: s_add_i32 s32, s32, -16 5104; GFX11-NEXT: v_readlane_b32 s33, v40, 2 5105; GFX11-NEXT: s_or_saveexec_b32 s0, -1 5106; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 5107; GFX11-NEXT: s_mov_b32 exec_lo, s0 5108; GFX11-NEXT: s_waitcnt vmcnt(0) 5109; GFX11-NEXT: s_setpc_b64 s[30:31] 5110; 5111; GFX10-SCRATCH-LABEL: test_call_external_void_func_v16i32: 5112; GFX10-SCRATCH: ; %bb.0: 5113; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5114; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 5115; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 5116; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 5117; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 5118; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 5119; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 5120; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v16, 0 5121; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 5122; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 5123; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 5124; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) 5125; GFX10-SCRATCH-NEXT: s_clause 0x3 5126; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v16, s[0:1] 5127; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[4:7], v16, s[0:1] offset:16 5128; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[8:11], v16, s[0:1] offset:32 5129; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[12:15], v16, s[0:1] offset:48 5130; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 5131; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 5132; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v16i32@rel32@lo+4 5133; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32@rel32@hi+12 5134; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 5135; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 5136; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 5137; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 5138; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 5139; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 5140; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 5141; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 5142; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 5143; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 5144; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 5145; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 5146 %ptr = load <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef 5147 %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr 5148 call amdgpu_gfx void @external_void_func_v16i32(<16 x i32> %val) 5149 ret void 5150} 5151 5152define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { 5153; GFX9-LABEL: test_call_external_void_func_v32i32: 5154; GFX9: ; %bb.0: 5155; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5156; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 5157; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 5158; GFX9-NEXT: s_mov_b64 exec, s[34:35] 5159; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 5160; GFX9-NEXT: v_mov_b32_e32 v28, 0 5161; GFX9-NEXT: v_writelane_b32 v40, s33, 2 5162; GFX9-NEXT: s_mov_b32 s33, s32 5163; GFX9-NEXT: s_addk_i32 s32, 0x400 5164; GFX9-NEXT: s_waitcnt lgkmcnt(0) 5165; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[34:35] 5166; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[34:35] offset:16 5167; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[34:35] offset:32 5168; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[34:35] offset:48 5169; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[34:35] offset:64 5170; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[34:35] offset:80 5171; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[34:35] offset:96 5172; GFX9-NEXT: s_nop 0 5173; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[34:35] offset:112 5174; GFX9-NEXT: v_writelane_b32 v40, s30, 0 5175; GFX9-NEXT: v_writelane_b32 v40, s31, 1 5176; GFX9-NEXT: s_getpc_b64 s[34:35] 5177; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i32@rel32@lo+4 5178; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32@rel32@hi+12 5179; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 5180; GFX9-NEXT: v_readlane_b32 s31, v40, 1 5181; GFX9-NEXT: v_readlane_b32 s30, v40, 0 5182; GFX9-NEXT: s_addk_i32 s32, 0xfc00 5183; GFX9-NEXT: v_readlane_b32 s33, v40, 2 5184; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 5185; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 5186; GFX9-NEXT: s_mov_b64 exec, s[34:35] 5187; GFX9-NEXT: s_waitcnt vmcnt(0) 5188; GFX9-NEXT: s_setpc_b64 s[30:31] 5189; 5190; GFX10-LABEL: test_call_external_void_func_v32i32: 5191; GFX10: ; %bb.0: 5192; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5193; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 5194; GFX10-NEXT: s_or_saveexec_b32 s34, -1 5195; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 5196; GFX10-NEXT: s_waitcnt_depctr 0xffe3 5197; GFX10-NEXT: s_mov_b32 exec_lo, s34 5198; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 5199; GFX10-NEXT: v_mov_b32_e32 v32, 0 5200; GFX10-NEXT: v_writelane_b32 v40, s33, 2 5201; GFX10-NEXT: s_mov_b32 s33, s32 5202; GFX10-NEXT: s_addk_i32 s32, 0x200 5203; GFX10-NEXT: s_waitcnt lgkmcnt(0) 5204; GFX10-NEXT: s_clause 0x7 5205; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[34:35] 5206; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[34:35] offset:16 5207; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[34:35] offset:32 5208; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[34:35] offset:48 5209; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[34:35] offset:64 5210; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[34:35] offset:80 5211; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[34:35] offset:96 5212; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[34:35] offset:112 5213; GFX10-NEXT: v_writelane_b32 v40, s30, 0 5214; GFX10-NEXT: s_getpc_b64 s[34:35] 5215; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i32@rel32@lo+4 5216; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32@rel32@hi+12 5217; GFX10-NEXT: v_writelane_b32 v40, s31, 1 5218; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 5219; GFX10-NEXT: v_readlane_b32 s31, v40, 1 5220; GFX10-NEXT: v_readlane_b32 s30, v40, 0 5221; GFX10-NEXT: s_addk_i32 s32, 0xfe00 5222; GFX10-NEXT: v_readlane_b32 s33, v40, 2 5223; GFX10-NEXT: s_or_saveexec_b32 s34, -1 5224; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 5225; GFX10-NEXT: s_waitcnt_depctr 0xffe3 5226; GFX10-NEXT: s_mov_b32 exec_lo, s34 5227; GFX10-NEXT: s_waitcnt vmcnt(0) 5228; GFX10-NEXT: s_setpc_b64 s[30:31] 5229; 5230; GFX11-LABEL: test_call_external_void_func_v32i32: 5231; GFX11: ; %bb.0: 5232; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5233; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 5234; GFX11-NEXT: s_or_saveexec_b32 s0, -1 5235; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 5236; GFX11-NEXT: s_mov_b32 exec_lo, s0 5237; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 5238; GFX11-NEXT: v_mov_b32_e32 v28, 0 5239; GFX11-NEXT: v_writelane_b32 v40, s33, 2 5240; GFX11-NEXT: s_mov_b32 s33, s32 5241; GFX11-NEXT: s_add_i32 s32, s32, 16 5242; GFX11-NEXT: s_waitcnt lgkmcnt(0) 5243; GFX11-NEXT: s_clause 0x7 5244; GFX11-NEXT: global_load_b128 v[0:3], v28, s[0:1] 5245; GFX11-NEXT: global_load_b128 v[4:7], v28, s[0:1] offset:16 5246; GFX11-NEXT: global_load_b128 v[8:11], v28, s[0:1] offset:32 5247; GFX11-NEXT: global_load_b128 v[12:15], v28, s[0:1] offset:48 5248; GFX11-NEXT: global_load_b128 v[16:19], v28, s[0:1] offset:64 5249; GFX11-NEXT: global_load_b128 v[20:23], v28, s[0:1] offset:80 5250; GFX11-NEXT: global_load_b128 v[24:27], v28, s[0:1] offset:96 5251; GFX11-NEXT: global_load_b128 v[28:31], v28, s[0:1] offset:112 5252; GFX11-NEXT: v_writelane_b32 v40, s30, 0 5253; GFX11-NEXT: s_getpc_b64 s[0:1] 5254; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v32i32@rel32@lo+4 5255; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32@rel32@hi+12 5256; GFX11-NEXT: v_writelane_b32 v40, s31, 1 5257; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 5258; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 5259; GFX11-NEXT: v_readlane_b32 s31, v40, 1 5260; GFX11-NEXT: v_readlane_b32 s30, v40, 0 5261; GFX11-NEXT: s_add_i32 s32, s32, -16 5262; GFX11-NEXT: v_readlane_b32 s33, v40, 2 5263; GFX11-NEXT: s_or_saveexec_b32 s0, -1 5264; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 5265; GFX11-NEXT: s_mov_b32 exec_lo, s0 5266; GFX11-NEXT: s_waitcnt vmcnt(0) 5267; GFX11-NEXT: s_setpc_b64 s[30:31] 5268; 5269; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32: 5270; GFX10-SCRATCH: ; %bb.0: 5271; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5272; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 5273; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 5274; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 5275; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 5276; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 5277; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 5278; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v32, 0 5279; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 5280; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 5281; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 5282; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) 5283; GFX10-SCRATCH-NEXT: s_clause 0x7 5284; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v32, s[0:1] 5285; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[4:7], v32, s[0:1] offset:16 5286; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[8:11], v32, s[0:1] offset:32 5287; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[12:15], v32, s[0:1] offset:48 5288; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[16:19], v32, s[0:1] offset:64 5289; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[20:23], v32, s[0:1] offset:80 5290; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[24:27], v32, s[0:1] offset:96 5291; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[28:31], v32, s[0:1] offset:112 5292; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 5293; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 5294; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v32i32@rel32@lo+4 5295; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32@rel32@hi+12 5296; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 5297; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 5298; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 5299; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 5300; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 5301; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 5302; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 5303; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 5304; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 5305; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 5306; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 5307; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 5308 %ptr = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef 5309 %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr 5310 call amdgpu_gfx void @external_void_func_v32i32(<32 x i32> %val) 5311 ret void 5312} 5313 5314define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { 5315; GFX9-LABEL: test_call_external_void_func_v32i32_i32: 5316; GFX9: ; %bb.0: 5317; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5318; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 5319; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 5320; GFX9-NEXT: s_mov_b64 exec, s[34:35] 5321; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 5322; GFX9-NEXT: v_mov_b32_e32 v28, 0 5323; GFX9-NEXT: global_load_dword v32, v[0:1], off 5324; GFX9-NEXT: v_writelane_b32 v40, s33, 2 5325; GFX9-NEXT: s_mov_b32 s33, s32 5326; GFX9-NEXT: s_waitcnt lgkmcnt(0) 5327; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[34:35] 5328; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[34:35] offset:16 5329; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[34:35] offset:32 5330; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[34:35] offset:48 5331; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[34:35] offset:64 5332; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[34:35] offset:80 5333; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[34:35] offset:96 5334; GFX9-NEXT: s_nop 0 5335; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[34:35] offset:112 5336; GFX9-NEXT: s_addk_i32 s32, 0x400 5337; GFX9-NEXT: v_writelane_b32 v40, s30, 0 5338; GFX9-NEXT: v_writelane_b32 v40, s31, 1 5339; GFX9-NEXT: s_getpc_b64 s[34:35] 5340; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_i32@rel32@lo+4 5341; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_i32@rel32@hi+12 5342; GFX9-NEXT: s_waitcnt vmcnt(8) 5343; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 5344; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 5345; GFX9-NEXT: v_readlane_b32 s31, v40, 1 5346; GFX9-NEXT: v_readlane_b32 s30, v40, 0 5347; GFX9-NEXT: s_addk_i32 s32, 0xfc00 5348; GFX9-NEXT: v_readlane_b32 s33, v40, 2 5349; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 5350; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 5351; GFX9-NEXT: s_mov_b64 exec, s[34:35] 5352; GFX9-NEXT: s_waitcnt vmcnt(0) 5353; GFX9-NEXT: s_setpc_b64 s[30:31] 5354; 5355; GFX10-LABEL: test_call_external_void_func_v32i32_i32: 5356; GFX10: ; %bb.0: 5357; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5358; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 5359; GFX10-NEXT: s_or_saveexec_b32 s34, -1 5360; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 5361; GFX10-NEXT: s_waitcnt_depctr 0xffe3 5362; GFX10-NEXT: s_mov_b32 exec_lo, s34 5363; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 5364; GFX10-NEXT: v_mov_b32_e32 v32, 0 5365; GFX10-NEXT: v_writelane_b32 v40, s33, 2 5366; GFX10-NEXT: s_mov_b32 s33, s32 5367; GFX10-NEXT: s_addk_i32 s32, 0x200 5368; GFX10-NEXT: global_load_dword v33, v[0:1], off 5369; GFX10-NEXT: s_waitcnt lgkmcnt(0) 5370; GFX10-NEXT: s_clause 0x7 5371; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[34:35] 5372; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[34:35] offset:16 5373; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[34:35] offset:32 5374; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[34:35] offset:48 5375; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[34:35] offset:64 5376; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[34:35] offset:80 5377; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[34:35] offset:96 5378; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[34:35] offset:112 5379; GFX10-NEXT: v_writelane_b32 v40, s30, 0 5380; GFX10-NEXT: s_getpc_b64 s[34:35] 5381; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_i32@rel32@lo+4 5382; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_i32@rel32@hi+12 5383; GFX10-NEXT: v_writelane_b32 v40, s31, 1 5384; GFX10-NEXT: s_waitcnt vmcnt(8) 5385; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 5386; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 5387; GFX10-NEXT: v_readlane_b32 s31, v40, 1 5388; GFX10-NEXT: v_readlane_b32 s30, v40, 0 5389; GFX10-NEXT: s_addk_i32 s32, 0xfe00 5390; GFX10-NEXT: v_readlane_b32 s33, v40, 2 5391; GFX10-NEXT: s_or_saveexec_b32 s34, -1 5392; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 5393; GFX10-NEXT: s_waitcnt_depctr 0xffe3 5394; GFX10-NEXT: s_mov_b32 exec_lo, s34 5395; GFX10-NEXT: s_waitcnt vmcnt(0) 5396; GFX10-NEXT: s_setpc_b64 s[30:31] 5397; 5398; GFX11-LABEL: test_call_external_void_func_v32i32_i32: 5399; GFX11: ; %bb.0: 5400; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5401; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 5402; GFX11-NEXT: s_or_saveexec_b32 s0, -1 5403; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 5404; GFX11-NEXT: s_mov_b32 exec_lo, s0 5405; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 5406; GFX11-NEXT: v_mov_b32_e32 v28, 0 5407; GFX11-NEXT: v_writelane_b32 v40, s33, 2 5408; GFX11-NEXT: s_mov_b32 s33, s32 5409; GFX11-NEXT: s_add_i32 s32, s32, 16 5410; GFX11-NEXT: global_load_b32 v32, v[0:1], off 5411; GFX11-NEXT: s_waitcnt lgkmcnt(0) 5412; GFX11-NEXT: s_clause 0x7 5413; GFX11-NEXT: global_load_b128 v[0:3], v28, s[0:1] 5414; GFX11-NEXT: global_load_b128 v[4:7], v28, s[0:1] offset:16 5415; GFX11-NEXT: global_load_b128 v[8:11], v28, s[0:1] offset:32 5416; GFX11-NEXT: global_load_b128 v[12:15], v28, s[0:1] offset:48 5417; GFX11-NEXT: global_load_b128 v[16:19], v28, s[0:1] offset:64 5418; GFX11-NEXT: global_load_b128 v[20:23], v28, s[0:1] offset:80 5419; GFX11-NEXT: global_load_b128 v[24:27], v28, s[0:1] offset:96 5420; GFX11-NEXT: global_load_b128 v[28:31], v28, s[0:1] offset:112 5421; GFX11-NEXT: v_writelane_b32 v40, s30, 0 5422; GFX11-NEXT: s_getpc_b64 s[0:1] 5423; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v32i32_i32@rel32@lo+4 5424; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32_i32@rel32@hi+12 5425; GFX11-NEXT: v_writelane_b32 v40, s31, 1 5426; GFX11-NEXT: s_waitcnt vmcnt(8) 5427; GFX11-NEXT: scratch_store_b32 off, v32, s32 5428; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 5429; GFX11-NEXT: v_readlane_b32 s31, v40, 1 5430; GFX11-NEXT: v_readlane_b32 s30, v40, 0 5431; GFX11-NEXT: s_add_i32 s32, s32, -16 5432; GFX11-NEXT: v_readlane_b32 s33, v40, 2 5433; GFX11-NEXT: s_or_saveexec_b32 s0, -1 5434; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 5435; GFX11-NEXT: s_mov_b32 exec_lo, s0 5436; GFX11-NEXT: s_waitcnt vmcnt(0) 5437; GFX11-NEXT: s_setpc_b64 s[30:31] 5438; 5439; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32_i32: 5440; GFX10-SCRATCH: ; %bb.0: 5441; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5442; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 5443; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 5444; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 5445; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 5446; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 5447; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 5448; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v32, 0 5449; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 5450; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 5451; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 5452; GFX10-SCRATCH-NEXT: global_load_dword v33, v[0:1], off 5453; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) 5454; GFX10-SCRATCH-NEXT: s_clause 0x7 5455; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v32, s[0:1] 5456; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[4:7], v32, s[0:1] offset:16 5457; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[8:11], v32, s[0:1] offset:32 5458; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[12:15], v32, s[0:1] offset:48 5459; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[16:19], v32, s[0:1] offset:64 5460; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[20:23], v32, s[0:1] offset:80 5461; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[24:27], v32, s[0:1] offset:96 5462; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[28:31], v32, s[0:1] offset:112 5463; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 5464; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 5465; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v32i32_i32@rel32@lo+4 5466; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32_i32@rel32@hi+12 5467; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 5468; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(8) 5469; GFX10-SCRATCH-NEXT: scratch_store_dword off, v33, s32 5470; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 5471; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 5472; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 5473; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 5474; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 5475; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 5476; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 5477; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 5478; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 5479; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 5480; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 5481 %ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef 5482 %val0 = load <32 x i32>, <32 x i32> addrspace(1)* %ptr0 5483 %val1 = load i32, i32 addrspace(1)* undef 5484 call amdgpu_gfx void @external_void_func_v32i32_i32(<32 x i32> %val0, i32 %val1) 5485 ret void 5486} 5487 5488define amdgpu_gfx void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %out) #0 { 5489; GFX9-LABEL: test_call_external_i32_func_i32_imm: 5490; GFX9: ; %bb.0: 5491; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5492; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 5493; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill 5494; GFX9-NEXT: s_mov_b64 exec, s[34:35] 5495; GFX9-NEXT: v_writelane_b32 v40, s33, 2 5496; GFX9-NEXT: s_mov_b32 s33, s32 5497; GFX9-NEXT: s_addk_i32 s32, 0x400 5498; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill 5499; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill 5500; GFX9-NEXT: v_writelane_b32 v40, s30, 0 5501; GFX9-NEXT: v_mov_b32_e32 v41, v0 5502; GFX9-NEXT: v_mov_b32_e32 v0, 42 5503; GFX9-NEXT: v_writelane_b32 v40, s31, 1 5504; GFX9-NEXT: v_mov_b32_e32 v42, v1 5505; GFX9-NEXT: s_getpc_b64 s[34:35] 5506; GFX9-NEXT: s_add_u32 s34, s34, external_i32_func_i32@rel32@lo+4 5507; GFX9-NEXT: s_addc_u32 s35, s35, external_i32_func_i32@rel32@hi+12 5508; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 5509; GFX9-NEXT: global_store_dword v[41:42], v0, off 5510; GFX9-NEXT: s_waitcnt vmcnt(0) 5511; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload 5512; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload 5513; GFX9-NEXT: v_readlane_b32 s31, v40, 1 5514; GFX9-NEXT: v_readlane_b32 s30, v40, 0 5515; GFX9-NEXT: s_addk_i32 s32, 0xfc00 5516; GFX9-NEXT: v_readlane_b32 s33, v40, 2 5517; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 5518; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload 5519; GFX9-NEXT: s_mov_b64 exec, s[34:35] 5520; GFX9-NEXT: s_waitcnt vmcnt(0) 5521; GFX9-NEXT: s_setpc_b64 s[30:31] 5522; 5523; GFX10-LABEL: test_call_external_i32_func_i32_imm: 5524; GFX10: ; %bb.0: 5525; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5526; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 5527; GFX10-NEXT: s_or_saveexec_b32 s34, -1 5528; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill 5529; GFX10-NEXT: s_waitcnt_depctr 0xffe3 5530; GFX10-NEXT: s_mov_b32 exec_lo, s34 5531; GFX10-NEXT: v_writelane_b32 v40, s33, 2 5532; GFX10-NEXT: s_mov_b32 s33, s32 5533; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill 5534; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill 5535; GFX10-NEXT: v_mov_b32_e32 v41, v0 5536; GFX10-NEXT: v_writelane_b32 v40, s30, 0 5537; GFX10-NEXT: v_mov_b32_e32 v0, 42 5538; GFX10-NEXT: s_addk_i32 s32, 0x200 5539; GFX10-NEXT: v_mov_b32_e32 v42, v1 5540; GFX10-NEXT: s_getpc_b64 s[34:35] 5541; GFX10-NEXT: s_add_u32 s34, s34, external_i32_func_i32@rel32@lo+4 5542; GFX10-NEXT: s_addc_u32 s35, s35, external_i32_func_i32@rel32@hi+12 5543; GFX10-NEXT: v_writelane_b32 v40, s31, 1 5544; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 5545; GFX10-NEXT: global_store_dword v[41:42], v0, off 5546; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 5547; GFX10-NEXT: s_clause 0x1 5548; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 5549; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 5550; GFX10-NEXT: v_readlane_b32 s31, v40, 1 5551; GFX10-NEXT: v_readlane_b32 s30, v40, 0 5552; GFX10-NEXT: s_addk_i32 s32, 0xfe00 5553; GFX10-NEXT: v_readlane_b32 s33, v40, 2 5554; GFX10-NEXT: s_or_saveexec_b32 s34, -1 5555; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload 5556; GFX10-NEXT: s_waitcnt_depctr 0xffe3 5557; GFX10-NEXT: s_mov_b32 exec_lo, s34 5558; GFX10-NEXT: s_waitcnt vmcnt(0) 5559; GFX10-NEXT: s_setpc_b64 s[30:31] 5560; 5561; GFX11-LABEL: test_call_external_i32_func_i32_imm: 5562; GFX11: ; %bb.0: 5563; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5564; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 5565; GFX11-NEXT: s_or_saveexec_b32 s0, -1 5566; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:8 ; 4-byte Folded Spill 5567; GFX11-NEXT: s_mov_b32 exec_lo, s0 5568; GFX11-NEXT: v_writelane_b32 v40, s33, 2 5569; GFX11-NEXT: s_mov_b32 s33, s32 5570; GFX11-NEXT: s_clause 0x1 5571; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4 5572; GFX11-NEXT: scratch_store_b32 off, v42, s33 5573; GFX11-NEXT: v_dual_mov_b32 v42, v1 :: v_dual_mov_b32 v41, v0 5574; GFX11-NEXT: v_writelane_b32 v40, s30, 0 5575; GFX11-NEXT: v_mov_b32_e32 v0, 42 5576; GFX11-NEXT: s_add_i32 s32, s32, 16 5577; GFX11-NEXT: s_getpc_b64 s[0:1] 5578; GFX11-NEXT: s_add_u32 s0, s0, external_i32_func_i32@rel32@lo+4 5579; GFX11-NEXT: s_addc_u32 s1, s1, external_i32_func_i32@rel32@hi+12 5580; GFX11-NEXT: v_writelane_b32 v40, s31, 1 5581; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 5582; GFX11-NEXT: global_store_b32 v[41:42], v0, off dlc 5583; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 5584; GFX11-NEXT: s_clause 0x1 5585; GFX11-NEXT: scratch_load_b32 v42, off, s33 5586; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4 5587; GFX11-NEXT: v_readlane_b32 s31, v40, 1 5588; GFX11-NEXT: v_readlane_b32 s30, v40, 0 5589; GFX11-NEXT: s_add_i32 s32, s32, -16 5590; GFX11-NEXT: v_readlane_b32 s33, v40, 2 5591; GFX11-NEXT: s_or_saveexec_b32 s0, -1 5592; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:8 ; 4-byte Folded Reload 5593; GFX11-NEXT: s_mov_b32 exec_lo, s0 5594; GFX11-NEXT: s_waitcnt vmcnt(0) 5595; GFX11-NEXT: s_setpc_b64 s[30:31] 5596; 5597; GFX10-SCRATCH-LABEL: test_call_external_i32_func_i32_imm: 5598; GFX10-SCRATCH: ; %bb.0: 5599; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5600; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 5601; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 5602; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 offset:8 ; 4-byte Folded Spill 5603; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 5604; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 5605; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 5606; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 5607; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 offset:4 ; 4-byte Folded Spill 5608; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 ; 4-byte Folded Spill 5609; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, v0 5610; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 5611; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 42 5612; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 5613; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, v1 5614; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 5615; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_i32_func_i32@rel32@lo+4 5616; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_i32_func_i32@rel32@hi+12 5617; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 5618; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 5619; GFX10-SCRATCH-NEXT: global_store_dword v[41:42], v0, off 5620; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 5621; GFX10-SCRATCH-NEXT: s_clause 0x1 5622; GFX10-SCRATCH-NEXT: scratch_load_dword v42, off, s33 5623; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 offset:4 5624; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 5625; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 5626; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 5627; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 5628; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 5629; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:8 ; 4-byte Folded Reload 5630; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 5631; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 5632; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 5633; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 5634 %val = call amdgpu_gfx i32 @external_i32_func_i32(i32 42) 5635 store volatile i32 %val, i32 addrspace(1)* %out 5636 ret void 5637} 5638 5639define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { 5640; GFX9-LABEL: test_call_external_void_func_struct_i8_i32: 5641; GFX9: ; %bb.0: 5642; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5643; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 5644; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 5645; GFX9-NEXT: s_mov_b64 exec, s[34:35] 5646; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 5647; GFX9-NEXT: v_mov_b32_e32 v2, 0 5648; GFX9-NEXT: v_writelane_b32 v40, s33, 2 5649; GFX9-NEXT: s_mov_b32 s33, s32 5650; GFX9-NEXT: s_addk_i32 s32, 0x400 5651; GFX9-NEXT: s_waitcnt lgkmcnt(0) 5652; GFX9-NEXT: global_load_ubyte v0, v2, s[34:35] 5653; GFX9-NEXT: global_load_dword v1, v2, s[34:35] offset:4 5654; GFX9-NEXT: v_writelane_b32 v40, s30, 0 5655; GFX9-NEXT: v_writelane_b32 v40, s31, 1 5656; GFX9-NEXT: s_getpc_b64 s[34:35] 5657; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_struct_i8_i32@rel32@lo+4 5658; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_struct_i8_i32@rel32@hi+12 5659; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 5660; GFX9-NEXT: v_readlane_b32 s31, v40, 1 5661; GFX9-NEXT: v_readlane_b32 s30, v40, 0 5662; GFX9-NEXT: s_addk_i32 s32, 0xfc00 5663; GFX9-NEXT: v_readlane_b32 s33, v40, 2 5664; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 5665; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 5666; GFX9-NEXT: s_mov_b64 exec, s[34:35] 5667; GFX9-NEXT: s_waitcnt vmcnt(0) 5668; GFX9-NEXT: s_setpc_b64 s[30:31] 5669; 5670; GFX10-LABEL: test_call_external_void_func_struct_i8_i32: 5671; GFX10: ; %bb.0: 5672; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5673; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 5674; GFX10-NEXT: s_or_saveexec_b32 s34, -1 5675; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 5676; GFX10-NEXT: s_waitcnt_depctr 0xffe3 5677; GFX10-NEXT: s_mov_b32 exec_lo, s34 5678; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 5679; GFX10-NEXT: v_mov_b32_e32 v2, 0 5680; GFX10-NEXT: v_writelane_b32 v40, s33, 2 5681; GFX10-NEXT: s_mov_b32 s33, s32 5682; GFX10-NEXT: s_addk_i32 s32, 0x200 5683; GFX10-NEXT: s_waitcnt lgkmcnt(0) 5684; GFX10-NEXT: s_clause 0x1 5685; GFX10-NEXT: global_load_ubyte v0, v2, s[34:35] 5686; GFX10-NEXT: global_load_dword v1, v2, s[34:35] offset:4 5687; GFX10-NEXT: v_writelane_b32 v40, s30, 0 5688; GFX10-NEXT: s_getpc_b64 s[34:35] 5689; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_struct_i8_i32@rel32@lo+4 5690; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_struct_i8_i32@rel32@hi+12 5691; GFX10-NEXT: v_writelane_b32 v40, s31, 1 5692; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 5693; GFX10-NEXT: v_readlane_b32 s31, v40, 1 5694; GFX10-NEXT: v_readlane_b32 s30, v40, 0 5695; GFX10-NEXT: s_addk_i32 s32, 0xfe00 5696; GFX10-NEXT: v_readlane_b32 s33, v40, 2 5697; GFX10-NEXT: s_or_saveexec_b32 s34, -1 5698; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 5699; GFX10-NEXT: s_waitcnt_depctr 0xffe3 5700; GFX10-NEXT: s_mov_b32 exec_lo, s34 5701; GFX10-NEXT: s_waitcnt vmcnt(0) 5702; GFX10-NEXT: s_setpc_b64 s[30:31] 5703; 5704; GFX11-LABEL: test_call_external_void_func_struct_i8_i32: 5705; GFX11: ; %bb.0: 5706; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5707; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 5708; GFX11-NEXT: s_or_saveexec_b32 s0, -1 5709; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 5710; GFX11-NEXT: s_mov_b32 exec_lo, s0 5711; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 5712; GFX11-NEXT: v_mov_b32_e32 v1, 0 5713; GFX11-NEXT: v_writelane_b32 v40, s33, 2 5714; GFX11-NEXT: s_mov_b32 s33, s32 5715; GFX11-NEXT: s_add_i32 s32, s32, 16 5716; GFX11-NEXT: s_waitcnt lgkmcnt(0) 5717; GFX11-NEXT: s_clause 0x1 5718; GFX11-NEXT: global_load_u8 v0, v1, s[0:1] 5719; GFX11-NEXT: global_load_b32 v1, v1, s[0:1] offset:4 5720; GFX11-NEXT: v_writelane_b32 v40, s30, 0 5721; GFX11-NEXT: s_getpc_b64 s[0:1] 5722; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_struct_i8_i32@rel32@lo+4 5723; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_struct_i8_i32@rel32@hi+12 5724; GFX11-NEXT: v_writelane_b32 v40, s31, 1 5725; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 5726; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 5727; GFX11-NEXT: v_readlane_b32 s31, v40, 1 5728; GFX11-NEXT: v_readlane_b32 s30, v40, 0 5729; GFX11-NEXT: s_add_i32 s32, s32, -16 5730; GFX11-NEXT: v_readlane_b32 s33, v40, 2 5731; GFX11-NEXT: s_or_saveexec_b32 s0, -1 5732; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 5733; GFX11-NEXT: s_mov_b32 exec_lo, s0 5734; GFX11-NEXT: s_waitcnt vmcnt(0) 5735; GFX11-NEXT: s_setpc_b64 s[30:31] 5736; 5737; GFX10-SCRATCH-LABEL: test_call_external_void_func_struct_i8_i32: 5738; GFX10-SCRATCH: ; %bb.0: 5739; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5740; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 5741; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 5742; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 5743; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 5744; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 5745; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 5746; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0 5747; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 5748; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 5749; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 5750; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) 5751; GFX10-SCRATCH-NEXT: s_clause 0x1 5752; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v2, s[0:1] 5753; GFX10-SCRATCH-NEXT: global_load_dword v1, v2, s[0:1] offset:4 5754; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 5755; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 5756; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_struct_i8_i32@rel32@lo+4 5757; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_struct_i8_i32@rel32@hi+12 5758; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 5759; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 5760; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 5761; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 5762; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 5763; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 5764; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 5765; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 5766; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 5767; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 5768; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 5769; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 5770 %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef 5771 %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 5772 call amdgpu_gfx void @external_void_func_struct_i8_i32({ i8, i32 } %val) 5773 ret void 5774} 5775 5776define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { 5777; GFX9-LABEL: test_call_external_void_func_byval_struct_i8_i32: 5778; GFX9: ; %bb.0: 5779; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5780; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 5781; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill 5782; GFX9-NEXT: s_mov_b64 exec, s[34:35] 5783; GFX9-NEXT: v_writelane_b32 v40, s33, 2 5784; GFX9-NEXT: s_mov_b32 s33, s32 5785; GFX9-NEXT: v_mov_b32_e32 v0, 3 5786; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s33 5787; GFX9-NEXT: v_mov_b32_e32 v0, 8 5788; GFX9-NEXT: s_addk_i32 s32, 0x400 5789; GFX9-NEXT: v_writelane_b32 v40, s30, 0 5790; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 5791; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 5792; GFX9-NEXT: v_writelane_b32 v40, s31, 1 5793; GFX9-NEXT: s_getpc_b64 s[34:35] 5794; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_byval_struct_i8_i32@rel32@lo+4 5795; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_byval_struct_i8_i32@rel32@hi+12 5796; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 5797; GFX9-NEXT: v_readlane_b32 s31, v40, 1 5798; GFX9-NEXT: v_readlane_b32 s30, v40, 0 5799; GFX9-NEXT: s_addk_i32 s32, 0xfc00 5800; GFX9-NEXT: v_readlane_b32 s33, v40, 2 5801; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 5802; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload 5803; GFX9-NEXT: s_mov_b64 exec, s[34:35] 5804; GFX9-NEXT: s_waitcnt vmcnt(0) 5805; GFX9-NEXT: s_setpc_b64 s[30:31] 5806; 5807; GFX10-LABEL: test_call_external_void_func_byval_struct_i8_i32: 5808; GFX10: ; %bb.0: 5809; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5810; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 5811; GFX10-NEXT: s_or_saveexec_b32 s34, -1 5812; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill 5813; GFX10-NEXT: s_waitcnt_depctr 0xffe3 5814; GFX10-NEXT: s_mov_b32 exec_lo, s34 5815; GFX10-NEXT: v_writelane_b32 v40, s33, 2 5816; GFX10-NEXT: v_mov_b32_e32 v0, 3 5817; GFX10-NEXT: v_mov_b32_e32 v1, 8 5818; GFX10-NEXT: s_mov_b32 s33, s32 5819; GFX10-NEXT: s_addk_i32 s32, 0x200 5820; GFX10-NEXT: v_writelane_b32 v40, s30, 0 5821; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s33 5822; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 5823; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 5824; GFX10-NEXT: s_getpc_b64 s[34:35] 5825; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_byval_struct_i8_i32@rel32@lo+4 5826; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_byval_struct_i8_i32@rel32@hi+12 5827; GFX10-NEXT: v_writelane_b32 v40, s31, 1 5828; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 5829; GFX10-NEXT: v_readlane_b32 s31, v40, 1 5830; GFX10-NEXT: v_readlane_b32 s30, v40, 0 5831; GFX10-NEXT: s_addk_i32 s32, 0xfe00 5832; GFX10-NEXT: v_readlane_b32 s33, v40, 2 5833; GFX10-NEXT: s_or_saveexec_b32 s34, -1 5834; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload 5835; GFX10-NEXT: s_waitcnt_depctr 0xffe3 5836; GFX10-NEXT: s_mov_b32 exec_lo, s34 5837; GFX10-NEXT: s_waitcnt vmcnt(0) 5838; GFX10-NEXT: s_setpc_b64 s[30:31] 5839; 5840; GFX11-LABEL: test_call_external_void_func_byval_struct_i8_i32: 5841; GFX11: ; %bb.0: 5842; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5843; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 5844; GFX11-NEXT: s_or_saveexec_b32 s0, -1 5845; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:8 ; 4-byte Folded Spill 5846; GFX11-NEXT: s_mov_b32 exec_lo, s0 5847; GFX11-NEXT: v_writelane_b32 v40, s33, 2 5848; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 8 5849; GFX11-NEXT: s_mov_b32 s33, s32 5850; GFX11-NEXT: s_add_i32 s32, s32, 16 5851; GFX11-NEXT: v_writelane_b32 v40, s30, 0 5852; GFX11-NEXT: s_clause 0x1 5853; GFX11-NEXT: scratch_store_b8 off, v0, s33 5854; GFX11-NEXT: scratch_store_b32 off, v1, s33 offset:4 5855; GFX11-NEXT: v_mov_b32_e32 v0, s33 5856; GFX11-NEXT: s_getpc_b64 s[0:1] 5857; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_byval_struct_i8_i32@rel32@lo+4 5858; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_byval_struct_i8_i32@rel32@hi+12 5859; GFX11-NEXT: v_writelane_b32 v40, s31, 1 5860; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 5861; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 5862; GFX11-NEXT: v_readlane_b32 s31, v40, 1 5863; GFX11-NEXT: v_readlane_b32 s30, v40, 0 5864; GFX11-NEXT: s_add_i32 s32, s32, -16 5865; GFX11-NEXT: v_readlane_b32 s33, v40, 2 5866; GFX11-NEXT: s_or_saveexec_b32 s0, -1 5867; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:8 ; 4-byte Folded Reload 5868; GFX11-NEXT: s_mov_b32 exec_lo, s0 5869; GFX11-NEXT: s_waitcnt vmcnt(0) 5870; GFX11-NEXT: s_setpc_b64 s[30:31] 5871; 5872; GFX10-SCRATCH-LABEL: test_call_external_void_func_byval_struct_i8_i32: 5873; GFX10-SCRATCH: ; %bb.0: 5874; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5875; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 5876; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 5877; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 offset:8 ; 4-byte Folded Spill 5878; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 5879; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 5880; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 5881; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3 5882; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 8 5883; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 5884; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 5885; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 5886; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s33 5887; GFX10-SCRATCH-NEXT: scratch_store_dword off, v1, s33 offset:4 5888; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s33 5889; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 5890; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_byval_struct_i8_i32@rel32@lo+4 5891; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_byval_struct_i8_i32@rel32@hi+12 5892; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 5893; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 5894; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 5895; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 5896; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 5897; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 5898; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 5899; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:8 ; 4-byte Folded Reload 5900; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 5901; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 5902; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 5903; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 5904 %val = alloca { i8, i32 }, align 4, addrspace(5) 5905 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 0 5906 %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 1 5907 store i8 3, i8 addrspace(5)* %gep0 5908 store i32 8, i32 addrspace(5)* %gep1 5909 call amdgpu_gfx void @external_void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %val) 5910 ret void 5911} 5912 5913define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 { 5914; GFX9-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: 5915; GFX9: ; %bb.0: 5916; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5917; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 5918; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill 5919; GFX9-NEXT: s_mov_b64 exec, s[34:35] 5920; GFX9-NEXT: v_writelane_b32 v40, s33, 2 5921; GFX9-NEXT: s_mov_b32 s33, s32 5922; GFX9-NEXT: v_mov_b32_e32 v0, 3 5923; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s33 5924; GFX9-NEXT: v_mov_b32_e32 v0, 8 5925; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 5926; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 5927; GFX9-NEXT: s_addk_i32 s32, 0x800 5928; GFX9-NEXT: v_writelane_b32 v40, s30, 0 5929; GFX9-NEXT: v_add_u32_e32 v0, 8, v0 5930; GFX9-NEXT: v_lshrrev_b32_e64 v1, 6, s33 5931; GFX9-NEXT: v_writelane_b32 v40, s31, 1 5932; GFX9-NEXT: s_getpc_b64 s[34:35] 5933; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 5934; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 5935; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 5936; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:8 5937; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12 5938; GFX9-NEXT: v_readlane_b32 s31, v40, 1 5939; GFX9-NEXT: v_readlane_b32 s30, v40, 0 5940; GFX9-NEXT: s_addk_i32 s32, 0xf800 5941; GFX9-NEXT: v_readlane_b32 s33, v40, 2 5942; GFX9-NEXT: s_waitcnt vmcnt(0) 5943; GFX9-NEXT: global_store_byte v[0:1], v0, off 5944; GFX9-NEXT: s_waitcnt vmcnt(0) 5945; GFX9-NEXT: global_store_dword v[0:1], v1, off 5946; GFX9-NEXT: s_waitcnt vmcnt(0) 5947; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 5948; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload 5949; GFX9-NEXT: s_mov_b64 exec, s[34:35] 5950; GFX9-NEXT: s_waitcnt vmcnt(0) 5951; GFX9-NEXT: s_setpc_b64 s[30:31] 5952; 5953; GFX10-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: 5954; GFX10: ; %bb.0: 5955; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5956; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 5957; GFX10-NEXT: s_or_saveexec_b32 s34, -1 5958; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill 5959; GFX10-NEXT: s_waitcnt_depctr 0xffe3 5960; GFX10-NEXT: s_mov_b32 exec_lo, s34 5961; GFX10-NEXT: v_mov_b32_e32 v0, 3 5962; GFX10-NEXT: v_mov_b32_e32 v1, 8 5963; GFX10-NEXT: v_writelane_b32 v40, s33, 2 5964; GFX10-NEXT: s_mov_b32 s33, s32 5965; GFX10-NEXT: s_addk_i32 s32, 0x400 5966; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s33 5967; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 5968; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 5969; GFX10-NEXT: v_writelane_b32 v40, s30, 0 5970; GFX10-NEXT: v_lshrrev_b32_e64 v1, 5, s33 5971; GFX10-NEXT: s_getpc_b64 s[34:35] 5972; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 5973; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 5974; GFX10-NEXT: v_add_nc_u32_e32 v0, 8, v0 5975; GFX10-NEXT: v_writelane_b32 v40, s31, 1 5976; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 5977; GFX10-NEXT: s_clause 0x1 5978; GFX10-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:8 5979; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12 5980; GFX10-NEXT: v_readlane_b32 s31, v40, 1 5981; GFX10-NEXT: v_readlane_b32 s30, v40, 0 5982; GFX10-NEXT: s_addk_i32 s32, 0xfc00 5983; GFX10-NEXT: v_readlane_b32 s33, v40, 2 5984; GFX10-NEXT: s_waitcnt vmcnt(0) 5985; GFX10-NEXT: global_store_byte v[0:1], v0, off 5986; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 5987; GFX10-NEXT: global_store_dword v[0:1], v1, off 5988; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 5989; GFX10-NEXT: s_or_saveexec_b32 s34, -1 5990; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload 5991; GFX10-NEXT: s_waitcnt_depctr 0xffe3 5992; GFX10-NEXT: s_mov_b32 exec_lo, s34 5993; GFX10-NEXT: s_waitcnt vmcnt(0) 5994; GFX10-NEXT: s_setpc_b64 s[30:31] 5995; 5996; GFX11-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: 5997; GFX11: ; %bb.0: 5998; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5999; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 6000; GFX11-NEXT: s_or_saveexec_b32 s0, -1 6001; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:16 ; 4-byte Folded Spill 6002; GFX11-NEXT: s_mov_b32 exec_lo, s0 6003; GFX11-NEXT: v_writelane_b32 v40, s33, 2 6004; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 8 6005; GFX11-NEXT: s_mov_b32 s33, s32 6006; GFX11-NEXT: s_add_i32 s32, s32, 32 6007; GFX11-NEXT: s_getpc_b64 s[0:1] 6008; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 6009; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 6010; GFX11-NEXT: s_add_i32 vcc_lo, s33, 8 6011; GFX11-NEXT: v_writelane_b32 v40, s30, 0 6012; GFX11-NEXT: s_clause 0x1 6013; GFX11-NEXT: scratch_store_b8 off, v0, s33 6014; GFX11-NEXT: scratch_store_b32 off, v1, s33 offset:4 6015; GFX11-NEXT: v_dual_mov_b32 v0, vcc_lo :: v_dual_mov_b32 v1, s33 6016; GFX11-NEXT: v_writelane_b32 v40, s31, 1 6017; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 6018; GFX11-NEXT: s_clause 0x1 6019; GFX11-NEXT: scratch_load_u8 v0, off, s33 offset:8 6020; GFX11-NEXT: scratch_load_b32 v1, off, s33 offset:12 6021; GFX11-NEXT: v_readlane_b32 s31, v40, 1 6022; GFX11-NEXT: v_readlane_b32 s30, v40, 0 6023; GFX11-NEXT: s_addk_i32 s32, 0xffe0 6024; GFX11-NEXT: v_readlane_b32 s33, v40, 2 6025; GFX11-NEXT: s_waitcnt vmcnt(0) 6026; GFX11-NEXT: global_store_b8 v[0:1], v0, off dlc 6027; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 6028; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc 6029; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 6030; GFX11-NEXT: s_or_saveexec_b32 s0, -1 6031; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:16 ; 4-byte Folded Reload 6032; GFX11-NEXT: s_mov_b32 exec_lo, s0 6033; GFX11-NEXT: s_waitcnt vmcnt(0) 6034; GFX11-NEXT: s_setpc_b64 s[30:31] 6035; 6036; GFX10-SCRATCH-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: 6037; GFX10-SCRATCH: ; %bb.0: 6038; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6039; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 6040; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 6041; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 offset:16 ; 4-byte Folded Spill 6042; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 6043; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 6044; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 6045; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3 6046; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 6047; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 6048; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 8 6049; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 6050; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 6051; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 6052; GFX10-SCRATCH-NEXT: s_add_i32 vcc_lo, s33, 8 6053; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 6054; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s33 6055; GFX10-SCRATCH-NEXT: scratch_store_dword off, v1, s33 offset:4 6056; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, vcc_lo 6057; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, s33 6058; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 6059; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 6060; GFX10-SCRATCH-NEXT: s_clause 0x1 6061; GFX10-SCRATCH-NEXT: scratch_load_ubyte v0, off, s33 offset:8 6062; GFX10-SCRATCH-NEXT: scratch_load_dword v1, off, s33 offset:12 6063; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 6064; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 6065; GFX10-SCRATCH-NEXT: s_addk_i32 s32, 0xffe0 6066; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 6067; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 6068; GFX10-SCRATCH-NEXT: global_store_byte v[0:1], v0, off 6069; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 6070; GFX10-SCRATCH-NEXT: global_store_dword v[0:1], v1, off 6071; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 6072; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 6073; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:16 ; 4-byte Folded Reload 6074; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 6075; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 6076; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 6077; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 6078 %in.val = alloca { i8, i32 }, align 4, addrspace(5) 6079 %out.val = alloca { i8, i32 }, align 4, addrspace(5) 6080 %in.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 0 6081 %in.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 1 6082 store i8 3, i8 addrspace(5)* %in.gep0 6083 store i32 8, i32 addrspace(5)* %in.gep1 6084 call amdgpu_gfx void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }) %out.val, { i8, i32 } addrspace(5)* byval({ i8, i32 }) %in.val) 6085 %out.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %out.val, i32 0, i32 0 6086 %out.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %out.val, i32 0, i32 1 6087 %out.val0 = load i8, i8 addrspace(5)* %out.gep0 6088 %out.val1 = load i32, i32 addrspace(5)* %out.gep1 6089 6090 store volatile i8 %out.val0, i8 addrspace(1)* undef 6091 store volatile i32 %out.val1, i32 addrspace(1)* undef 6092 ret void 6093} 6094 6095define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { 6096; GFX9-LABEL: test_call_external_void_func_v16i8: 6097; GFX9: ; %bb.0: 6098; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6099; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 6100; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 6101; GFX9-NEXT: s_mov_b64 exec, s[34:35] 6102; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 6103; GFX9-NEXT: v_mov_b32_e32 v0, 0 6104; GFX9-NEXT: v_writelane_b32 v40, s33, 2 6105; GFX9-NEXT: s_mov_b32 s33, s32 6106; GFX9-NEXT: s_addk_i32 s32, 0x400 6107; GFX9-NEXT: s_waitcnt lgkmcnt(0) 6108; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[34:35] 6109; GFX9-NEXT: v_writelane_b32 v40, s30, 0 6110; GFX9-NEXT: v_writelane_b32 v40, s31, 1 6111; GFX9-NEXT: s_getpc_b64 s[34:35] 6112; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v16i8@rel32@lo+4 6113; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v16i8@rel32@hi+12 6114; GFX9-NEXT: s_waitcnt vmcnt(0) 6115; GFX9-NEXT: v_lshrrev_b32_e32 v16, 8, v0 6116; GFX9-NEXT: v_lshrrev_b32_e32 v17, 16, v0 6117; GFX9-NEXT: v_lshrrev_b32_e32 v18, 24, v0 6118; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v1 6119; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1 6120; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1 6121; GFX9-NEXT: v_lshrrev_b32_e32 v9, 8, v2 6122; GFX9-NEXT: v_lshrrev_b32_e32 v10, 16, v2 6123; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v2 6124; GFX9-NEXT: v_lshrrev_b32_e32 v13, 8, v3 6125; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v3 6126; GFX9-NEXT: v_lshrrev_b32_e32 v15, 24, v3 6127; GFX9-NEXT: v_mov_b32_e32 v4, v1 6128; GFX9-NEXT: v_mov_b32_e32 v8, v2 6129; GFX9-NEXT: v_mov_b32_e32 v12, v3 6130; GFX9-NEXT: v_mov_b32_e32 v1, v16 6131; GFX9-NEXT: v_mov_b32_e32 v2, v17 6132; GFX9-NEXT: v_mov_b32_e32 v3, v18 6133; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 6134; GFX9-NEXT: v_readlane_b32 s31, v40, 1 6135; GFX9-NEXT: v_readlane_b32 s30, v40, 0 6136; GFX9-NEXT: s_addk_i32 s32, 0xfc00 6137; GFX9-NEXT: v_readlane_b32 s33, v40, 2 6138; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 6139; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 6140; GFX9-NEXT: s_mov_b64 exec, s[34:35] 6141; GFX9-NEXT: s_waitcnt vmcnt(0) 6142; GFX9-NEXT: s_setpc_b64 s[30:31] 6143; 6144; GFX10-LABEL: test_call_external_void_func_v16i8: 6145; GFX10: ; %bb.0: 6146; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6147; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 6148; GFX10-NEXT: s_or_saveexec_b32 s34, -1 6149; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 6150; GFX10-NEXT: s_waitcnt_depctr 0xffe3 6151; GFX10-NEXT: s_mov_b32 exec_lo, s34 6152; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 6153; GFX10-NEXT: v_mov_b32_e32 v0, 0 6154; GFX10-NEXT: v_writelane_b32 v40, s33, 2 6155; GFX10-NEXT: s_mov_b32 s33, s32 6156; GFX10-NEXT: s_addk_i32 s32, 0x200 6157; GFX10-NEXT: v_writelane_b32 v40, s30, 0 6158; GFX10-NEXT: v_writelane_b32 v40, s31, 1 6159; GFX10-NEXT: s_waitcnt lgkmcnt(0) 6160; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[34:35] 6161; GFX10-NEXT: s_waitcnt_depctr 0xffe3 6162; GFX10-NEXT: s_getpc_b64 s[34:35] 6163; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v16i8@rel32@lo+4 6164; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v16i8@rel32@hi+12 6165; GFX10-NEXT: s_waitcnt vmcnt(0) 6166; GFX10-NEXT: v_lshrrev_b32_e32 v16, 8, v0 6167; GFX10-NEXT: v_lshrrev_b32_e32 v17, 16, v0 6168; GFX10-NEXT: v_lshrrev_b32_e32 v18, 24, v0 6169; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v1 6170; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v1 6171; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1 6172; GFX10-NEXT: v_lshrrev_b32_e32 v9, 8, v2 6173; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v2 6174; GFX10-NEXT: v_lshrrev_b32_e32 v11, 24, v2 6175; GFX10-NEXT: v_lshrrev_b32_e32 v13, 8, v3 6176; GFX10-NEXT: v_lshrrev_b32_e32 v14, 16, v3 6177; GFX10-NEXT: v_lshrrev_b32_e32 v15, 24, v3 6178; GFX10-NEXT: v_mov_b32_e32 v4, v1 6179; GFX10-NEXT: v_mov_b32_e32 v8, v2 6180; GFX10-NEXT: v_mov_b32_e32 v12, v3 6181; GFX10-NEXT: v_mov_b32_e32 v1, v16 6182; GFX10-NEXT: v_mov_b32_e32 v2, v17 6183; GFX10-NEXT: v_mov_b32_e32 v3, v18 6184; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 6185; GFX10-NEXT: v_readlane_b32 s31, v40, 1 6186; GFX10-NEXT: v_readlane_b32 s30, v40, 0 6187; GFX10-NEXT: s_addk_i32 s32, 0xfe00 6188; GFX10-NEXT: v_readlane_b32 s33, v40, 2 6189; GFX10-NEXT: s_or_saveexec_b32 s34, -1 6190; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 6191; GFX10-NEXT: s_waitcnt_depctr 0xffe3 6192; GFX10-NEXT: s_mov_b32 exec_lo, s34 6193; GFX10-NEXT: s_waitcnt vmcnt(0) 6194; GFX10-NEXT: s_setpc_b64 s[30:31] 6195; 6196; GFX11-LABEL: test_call_external_void_func_v16i8: 6197; GFX11: ; %bb.0: 6198; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6199; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 6200; GFX11-NEXT: s_or_saveexec_b32 s0, -1 6201; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 6202; GFX11-NEXT: s_mov_b32 exec_lo, s0 6203; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 6204; GFX11-NEXT: v_mov_b32_e32 v0, 0 6205; GFX11-NEXT: v_writelane_b32 v40, s33, 2 6206; GFX11-NEXT: s_mov_b32 s33, s32 6207; GFX11-NEXT: s_add_i32 s32, s32, 16 6208; GFX11-NEXT: v_writelane_b32 v40, s30, 0 6209; GFX11-NEXT: v_writelane_b32 v40, s31, 1 6210; GFX11-NEXT: s_waitcnt lgkmcnt(0) 6211; GFX11-NEXT: global_load_b128 v[0:3], v0, s[0:1] 6212; GFX11-NEXT: s_getpc_b64 s[0:1] 6213; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v16i8@rel32@lo+4 6214; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v16i8@rel32@hi+12 6215; GFX11-NEXT: s_waitcnt vmcnt(0) 6216; GFX11-NEXT: v_lshrrev_b32_e32 v16, 8, v0 6217; GFX11-NEXT: v_lshrrev_b32_e32 v17, 16, v0 6218; GFX11-NEXT: v_lshrrev_b32_e32 v18, 24, v0 6219; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v1 6220; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v1 6221; GFX11-NEXT: v_lshrrev_b32_e32 v7, 24, v1 6222; GFX11-NEXT: v_lshrrev_b32_e32 v9, 8, v2 6223; GFX11-NEXT: v_lshrrev_b32_e32 v10, 16, v2 6224; GFX11-NEXT: v_lshrrev_b32_e32 v11, 24, v2 6225; GFX11-NEXT: v_lshrrev_b32_e32 v13, 8, v3 6226; GFX11-NEXT: v_lshrrev_b32_e32 v14, 16, v3 6227; GFX11-NEXT: v_lshrrev_b32_e32 v15, 24, v3 6228; GFX11-NEXT: v_mov_b32_e32 v4, v1 6229; GFX11-NEXT: v_mov_b32_e32 v8, v2 6230; GFX11-NEXT: v_dual_mov_b32 v12, v3 :: v_dual_mov_b32 v3, v18 6231; GFX11-NEXT: v_dual_mov_b32 v1, v16 :: v_dual_mov_b32 v2, v17 6232; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 6233; GFX11-NEXT: v_readlane_b32 s31, v40, 1 6234; GFX11-NEXT: v_readlane_b32 s30, v40, 0 6235; GFX11-NEXT: s_add_i32 s32, s32, -16 6236; GFX11-NEXT: v_readlane_b32 s33, v40, 2 6237; GFX11-NEXT: s_or_saveexec_b32 s0, -1 6238; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 6239; GFX11-NEXT: s_mov_b32 exec_lo, s0 6240; GFX11-NEXT: s_waitcnt vmcnt(0) 6241; GFX11-NEXT: s_setpc_b64 s[30:31] 6242; 6243; GFX10-SCRATCH-LABEL: test_call_external_void_func_v16i8: 6244; GFX10-SCRATCH: ; %bb.0: 6245; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6246; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 6247; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 6248; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 6249; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 6250; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 6251; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 6252; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 6253; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 6254; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 6255; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 6256; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 6257; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 6258; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) 6259; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] 6260; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 6261; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 6262; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v16i8@rel32@lo+4 6263; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v16i8@rel32@hi+12 6264; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 6265; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v16, 8, v0 6266; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v17, 16, v0 6267; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v18, 24, v0 6268; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v5, 8, v1 6269; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v6, 16, v1 6270; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v7, 24, v1 6271; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v9, 8, v2 6272; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v10, 16, v2 6273; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v11, 24, v2 6274; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v13, 8, v3 6275; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v14, 16, v3 6276; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v15, 24, v3 6277; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, v1 6278; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, v2 6279; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v12, v3 6280; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, v16 6281; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, v17 6282; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, v18 6283; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 6284; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 6285; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 6286; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 6287; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 6288; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 6289; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 6290; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 6291; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 6292; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 6293; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 6294 %ptr = load <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef 6295 %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 6296 call amdgpu_gfx void @external_void_func_v16i8(<16 x i8> %val) 6297 ret void 6298} 6299 6300define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { 6301; GFX9-LABEL: tail_call_byval_align16: 6302; GFX9: ; %bb.0: ; %entry 6303; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6304; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 6305; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill 6306; GFX9-NEXT: s_mov_b64 exec, s[4:5] 6307; GFX9-NEXT: v_writelane_b32 v40, s33, 32 6308; GFX9-NEXT: s_mov_b32 s33, s32 6309; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:20 6310; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:16 6311; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s33 6312; GFX9-NEXT: v_writelane_b32 v40, s30, 0 6313; GFX9-NEXT: v_writelane_b32 v40, s31, 1 6314; GFX9-NEXT: v_writelane_b32 v40, s34, 2 6315; GFX9-NEXT: v_writelane_b32 v40, s35, 3 6316; GFX9-NEXT: v_writelane_b32 v40, s36, 4 6317; GFX9-NEXT: v_writelane_b32 v40, s37, 5 6318; GFX9-NEXT: v_writelane_b32 v40, s38, 6 6319; GFX9-NEXT: v_writelane_b32 v40, s39, 7 6320; GFX9-NEXT: v_writelane_b32 v40, s40, 8 6321; GFX9-NEXT: v_writelane_b32 v40, s41, 9 6322; GFX9-NEXT: v_writelane_b32 v40, s42, 10 6323; GFX9-NEXT: v_writelane_b32 v40, s43, 11 6324; GFX9-NEXT: v_writelane_b32 v40, s44, 12 6325; GFX9-NEXT: v_writelane_b32 v40, s45, 13 6326; GFX9-NEXT: v_writelane_b32 v40, s46, 14 6327; GFX9-NEXT: v_writelane_b32 v40, s47, 15 6328; GFX9-NEXT: v_writelane_b32 v40, s48, 16 6329; GFX9-NEXT: v_writelane_b32 v40, s49, 17 6330; GFX9-NEXT: v_writelane_b32 v40, s50, 18 6331; GFX9-NEXT: v_writelane_b32 v40, s51, 19 6332; GFX9-NEXT: v_writelane_b32 v40, s52, 20 6333; GFX9-NEXT: v_writelane_b32 v40, s53, 21 6334; GFX9-NEXT: v_writelane_b32 v40, s54, 22 6335; GFX9-NEXT: v_writelane_b32 v40, s55, 23 6336; GFX9-NEXT: v_writelane_b32 v40, s56, 24 6337; GFX9-NEXT: v_writelane_b32 v40, s57, 25 6338; GFX9-NEXT: v_writelane_b32 v40, s58, 26 6339; GFX9-NEXT: v_writelane_b32 v40, s59, 27 6340; GFX9-NEXT: v_writelane_b32 v40, s60, 28 6341; GFX9-NEXT: v_writelane_b32 v40, s61, 29 6342; GFX9-NEXT: s_addk_i32 s32, 0x800 6343; GFX9-NEXT: v_writelane_b32 v40, s62, 30 6344; GFX9-NEXT: v_writelane_b32 v40, s63, 31 6345; GFX9-NEXT: s_getpc_b64 s[4:5] 6346; GFX9-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4 6347; GFX9-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12 6348; GFX9-NEXT: s_waitcnt vmcnt(2) 6349; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 6350; GFX9-NEXT: s_waitcnt vmcnt(2) 6351; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 6352; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 6353; GFX9-NEXT: v_readlane_b32 s63, v40, 31 6354; GFX9-NEXT: v_readlane_b32 s62, v40, 30 6355; GFX9-NEXT: v_readlane_b32 s61, v40, 29 6356; GFX9-NEXT: v_readlane_b32 s60, v40, 28 6357; GFX9-NEXT: v_readlane_b32 s59, v40, 27 6358; GFX9-NEXT: v_readlane_b32 s58, v40, 26 6359; GFX9-NEXT: v_readlane_b32 s57, v40, 25 6360; GFX9-NEXT: v_readlane_b32 s56, v40, 24 6361; GFX9-NEXT: v_readlane_b32 s55, v40, 23 6362; GFX9-NEXT: v_readlane_b32 s54, v40, 22 6363; GFX9-NEXT: v_readlane_b32 s53, v40, 21 6364; GFX9-NEXT: v_readlane_b32 s52, v40, 20 6365; GFX9-NEXT: v_readlane_b32 s51, v40, 19 6366; GFX9-NEXT: v_readlane_b32 s50, v40, 18 6367; GFX9-NEXT: v_readlane_b32 s49, v40, 17 6368; GFX9-NEXT: v_readlane_b32 s48, v40, 16 6369; GFX9-NEXT: v_readlane_b32 s47, v40, 15 6370; GFX9-NEXT: v_readlane_b32 s46, v40, 14 6371; GFX9-NEXT: v_readlane_b32 s45, v40, 13 6372; GFX9-NEXT: v_readlane_b32 s44, v40, 12 6373; GFX9-NEXT: v_readlane_b32 s43, v40, 11 6374; GFX9-NEXT: v_readlane_b32 s42, v40, 10 6375; GFX9-NEXT: v_readlane_b32 s41, v40, 9 6376; GFX9-NEXT: v_readlane_b32 s40, v40, 8 6377; GFX9-NEXT: v_readlane_b32 s39, v40, 7 6378; GFX9-NEXT: v_readlane_b32 s38, v40, 6 6379; GFX9-NEXT: v_readlane_b32 s37, v40, 5 6380; GFX9-NEXT: v_readlane_b32 s36, v40, 4 6381; GFX9-NEXT: v_readlane_b32 s35, v40, 3 6382; GFX9-NEXT: v_readlane_b32 s34, v40, 2 6383; GFX9-NEXT: v_readlane_b32 s31, v40, 1 6384; GFX9-NEXT: v_readlane_b32 s30, v40, 0 6385; GFX9-NEXT: s_addk_i32 s32, 0xf800 6386; GFX9-NEXT: v_readlane_b32 s33, v40, 32 6387; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 6388; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload 6389; GFX9-NEXT: s_mov_b64 exec, s[4:5] 6390; GFX9-NEXT: s_waitcnt vmcnt(0) 6391; GFX9-NEXT: s_setpc_b64 s[30:31] 6392; 6393; GFX10-LABEL: tail_call_byval_align16: 6394; GFX10: ; %bb.0: ; %entry 6395; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6396; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 6397; GFX10-NEXT: s_or_saveexec_b32 s4, -1 6398; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill 6399; GFX10-NEXT: s_waitcnt_depctr 0xffe3 6400; GFX10-NEXT: s_mov_b32 exec_lo, s4 6401; GFX10-NEXT: s_mov_b32 s6, s33 6402; GFX10-NEXT: s_mov_b32 s33, s32 6403; GFX10-NEXT: s_clause 0x2 6404; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:20 6405; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:16 6406; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s33 6407; GFX10-NEXT: v_writelane_b32 v40, s30, 0 6408; GFX10-NEXT: s_addk_i32 s32, 0x400 6409; GFX10-NEXT: s_getpc_b64 s[4:5] 6410; GFX10-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4 6411; GFX10-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12 6412; GFX10-NEXT: s_waitcnt vmcnt(2) 6413; GFX10-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 6414; GFX10-NEXT: s_waitcnt vmcnt(1) 6415; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 6416; GFX10-NEXT: v_writelane_b32 v40, s31, 1 6417; GFX10-NEXT: v_writelane_b32 v40, s34, 2 6418; GFX10-NEXT: v_writelane_b32 v40, s35, 3 6419; GFX10-NEXT: v_writelane_b32 v40, s36, 4 6420; GFX10-NEXT: v_writelane_b32 v40, s37, 5 6421; GFX10-NEXT: v_writelane_b32 v40, s38, 6 6422; GFX10-NEXT: v_writelane_b32 v40, s39, 7 6423; GFX10-NEXT: v_writelane_b32 v40, s40, 8 6424; GFX10-NEXT: v_writelane_b32 v40, s41, 9 6425; GFX10-NEXT: v_writelane_b32 v40, s42, 10 6426; GFX10-NEXT: v_writelane_b32 v40, s43, 11 6427; GFX10-NEXT: v_writelane_b32 v40, s44, 12 6428; GFX10-NEXT: v_writelane_b32 v40, s45, 13 6429; GFX10-NEXT: v_writelane_b32 v40, s46, 14 6430; GFX10-NEXT: v_writelane_b32 v40, s47, 15 6431; GFX10-NEXT: v_writelane_b32 v40, s48, 16 6432; GFX10-NEXT: v_writelane_b32 v40, s49, 17 6433; GFX10-NEXT: v_writelane_b32 v40, s50, 18 6434; GFX10-NEXT: v_writelane_b32 v40, s51, 19 6435; GFX10-NEXT: v_writelane_b32 v40, s52, 20 6436; GFX10-NEXT: v_writelane_b32 v40, s53, 21 6437; GFX10-NEXT: v_writelane_b32 v40, s54, 22 6438; GFX10-NEXT: v_writelane_b32 v40, s55, 23 6439; GFX10-NEXT: v_writelane_b32 v40, s56, 24 6440; GFX10-NEXT: v_writelane_b32 v40, s57, 25 6441; GFX10-NEXT: v_writelane_b32 v40, s58, 26 6442; GFX10-NEXT: v_writelane_b32 v40, s59, 27 6443; GFX10-NEXT: v_writelane_b32 v40, s60, 28 6444; GFX10-NEXT: v_writelane_b32 v40, s61, 29 6445; GFX10-NEXT: v_writelane_b32 v40, s62, 30 6446; GFX10-NEXT: v_writelane_b32 v40, s63, 31 6447; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] 6448; GFX10-NEXT: v_readlane_b32 s63, v40, 31 6449; GFX10-NEXT: v_readlane_b32 s62, v40, 30 6450; GFX10-NEXT: v_readlane_b32 s61, v40, 29 6451; GFX10-NEXT: v_readlane_b32 s60, v40, 28 6452; GFX10-NEXT: v_readlane_b32 s59, v40, 27 6453; GFX10-NEXT: v_readlane_b32 s58, v40, 26 6454; GFX10-NEXT: v_readlane_b32 s57, v40, 25 6455; GFX10-NEXT: v_readlane_b32 s56, v40, 24 6456; GFX10-NEXT: v_readlane_b32 s55, v40, 23 6457; GFX10-NEXT: v_readlane_b32 s54, v40, 22 6458; GFX10-NEXT: v_readlane_b32 s53, v40, 21 6459; GFX10-NEXT: v_readlane_b32 s52, v40, 20 6460; GFX10-NEXT: v_readlane_b32 s51, v40, 19 6461; GFX10-NEXT: v_readlane_b32 s50, v40, 18 6462; GFX10-NEXT: v_readlane_b32 s49, v40, 17 6463; GFX10-NEXT: v_readlane_b32 s48, v40, 16 6464; GFX10-NEXT: v_readlane_b32 s47, v40, 15 6465; GFX10-NEXT: v_readlane_b32 s46, v40, 14 6466; GFX10-NEXT: v_readlane_b32 s45, v40, 13 6467; GFX10-NEXT: v_readlane_b32 s44, v40, 12 6468; GFX10-NEXT: v_readlane_b32 s43, v40, 11 6469; GFX10-NEXT: v_readlane_b32 s42, v40, 10 6470; GFX10-NEXT: v_readlane_b32 s41, v40, 9 6471; GFX10-NEXT: v_readlane_b32 s40, v40, 8 6472; GFX10-NEXT: v_readlane_b32 s39, v40, 7 6473; GFX10-NEXT: v_readlane_b32 s38, v40, 6 6474; GFX10-NEXT: v_readlane_b32 s37, v40, 5 6475; GFX10-NEXT: v_readlane_b32 s36, v40, 4 6476; GFX10-NEXT: v_readlane_b32 s35, v40, 3 6477; GFX10-NEXT: v_readlane_b32 s34, v40, 2 6478; GFX10-NEXT: v_readlane_b32 s31, v40, 1 6479; GFX10-NEXT: v_readlane_b32 s30, v40, 0 6480; GFX10-NEXT: s_addk_i32 s32, 0xfc00 6481; GFX10-NEXT: s_mov_b32 s33, s6 6482; GFX10-NEXT: s_or_saveexec_b32 s4, -1 6483; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload 6484; GFX10-NEXT: s_waitcnt_depctr 0xffe3 6485; GFX10-NEXT: s_mov_b32 exec_lo, s4 6486; GFX10-NEXT: s_waitcnt vmcnt(0) 6487; GFX10-NEXT: s_setpc_b64 s[30:31] 6488; 6489; GFX11-LABEL: tail_call_byval_align16: 6490; GFX11: ; %bb.0: ; %entry 6491; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6492; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 6493; GFX11-NEXT: s_or_saveexec_b32 s0, -1 6494; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:24 ; 4-byte Folded Spill 6495; GFX11-NEXT: s_mov_b32 exec_lo, s0 6496; GFX11-NEXT: s_mov_b32 s4, s33 6497; GFX11-NEXT: s_mov_b32 s33, s32 6498; GFX11-NEXT: s_clause 0x1 6499; GFX11-NEXT: scratch_load_b64 v[32:33], off, s33 offset:16 6500; GFX11-NEXT: scratch_load_b32 v31, off, s33 6501; GFX11-NEXT: v_writelane_b32 v40, s30, 0 6502; GFX11-NEXT: s_add_i32 s32, s32, 32 6503; GFX11-NEXT: s_getpc_b64 s[0:1] 6504; GFX11-NEXT: s_add_u32 s0, s0, byval_align16_f64_arg@rel32@lo+4 6505; GFX11-NEXT: s_addc_u32 s1, s1, byval_align16_f64_arg@rel32@hi+12 6506; GFX11-NEXT: v_writelane_b32 v40, s31, 1 6507; GFX11-NEXT: v_writelane_b32 v40, s34, 2 6508; GFX11-NEXT: v_writelane_b32 v40, s35, 3 6509; GFX11-NEXT: v_writelane_b32 v40, s36, 4 6510; GFX11-NEXT: v_writelane_b32 v40, s37, 5 6511; GFX11-NEXT: v_writelane_b32 v40, s38, 6 6512; GFX11-NEXT: v_writelane_b32 v40, s39, 7 6513; GFX11-NEXT: v_writelane_b32 v40, s40, 8 6514; GFX11-NEXT: v_writelane_b32 v40, s41, 9 6515; GFX11-NEXT: v_writelane_b32 v40, s42, 10 6516; GFX11-NEXT: v_writelane_b32 v40, s43, 11 6517; GFX11-NEXT: v_writelane_b32 v40, s44, 12 6518; GFX11-NEXT: v_writelane_b32 v40, s45, 13 6519; GFX11-NEXT: v_writelane_b32 v40, s46, 14 6520; GFX11-NEXT: v_writelane_b32 v40, s47, 15 6521; GFX11-NEXT: v_writelane_b32 v40, s48, 16 6522; GFX11-NEXT: v_writelane_b32 v40, s49, 17 6523; GFX11-NEXT: v_writelane_b32 v40, s50, 18 6524; GFX11-NEXT: v_writelane_b32 v40, s51, 19 6525; GFX11-NEXT: v_writelane_b32 v40, s52, 20 6526; GFX11-NEXT: v_writelane_b32 v40, s53, 21 6527; GFX11-NEXT: v_writelane_b32 v40, s54, 22 6528; GFX11-NEXT: v_writelane_b32 v40, s55, 23 6529; GFX11-NEXT: v_writelane_b32 v40, s56, 24 6530; GFX11-NEXT: v_writelane_b32 v40, s57, 25 6531; GFX11-NEXT: v_writelane_b32 v40, s58, 26 6532; GFX11-NEXT: v_writelane_b32 v40, s59, 27 6533; GFX11-NEXT: v_writelane_b32 v40, s60, 28 6534; GFX11-NEXT: v_writelane_b32 v40, s61, 29 6535; GFX11-NEXT: v_writelane_b32 v40, s62, 30 6536; GFX11-NEXT: v_writelane_b32 v40, s63, 31 6537; GFX11-NEXT: s_waitcnt vmcnt(1) 6538; GFX11-NEXT: scratch_store_b64 off, v[32:33], s32 6539; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 6540; GFX11-NEXT: v_readlane_b32 s63, v40, 31 6541; GFX11-NEXT: v_readlane_b32 s62, v40, 30 6542; GFX11-NEXT: v_readlane_b32 s61, v40, 29 6543; GFX11-NEXT: v_readlane_b32 s60, v40, 28 6544; GFX11-NEXT: v_readlane_b32 s59, v40, 27 6545; GFX11-NEXT: v_readlane_b32 s58, v40, 26 6546; GFX11-NEXT: v_readlane_b32 s57, v40, 25 6547; GFX11-NEXT: v_readlane_b32 s56, v40, 24 6548; GFX11-NEXT: v_readlane_b32 s55, v40, 23 6549; GFX11-NEXT: v_readlane_b32 s54, v40, 22 6550; GFX11-NEXT: v_readlane_b32 s53, v40, 21 6551; GFX11-NEXT: v_readlane_b32 s52, v40, 20 6552; GFX11-NEXT: v_readlane_b32 s51, v40, 19 6553; GFX11-NEXT: v_readlane_b32 s50, v40, 18 6554; GFX11-NEXT: v_readlane_b32 s49, v40, 17 6555; GFX11-NEXT: v_readlane_b32 s48, v40, 16 6556; GFX11-NEXT: v_readlane_b32 s47, v40, 15 6557; GFX11-NEXT: v_readlane_b32 s46, v40, 14 6558; GFX11-NEXT: v_readlane_b32 s45, v40, 13 6559; GFX11-NEXT: v_readlane_b32 s44, v40, 12 6560; GFX11-NEXT: v_readlane_b32 s43, v40, 11 6561; GFX11-NEXT: v_readlane_b32 s42, v40, 10 6562; GFX11-NEXT: v_readlane_b32 s41, v40, 9 6563; GFX11-NEXT: v_readlane_b32 s40, v40, 8 6564; GFX11-NEXT: v_readlane_b32 s39, v40, 7 6565; GFX11-NEXT: v_readlane_b32 s38, v40, 6 6566; GFX11-NEXT: v_readlane_b32 s37, v40, 5 6567; GFX11-NEXT: v_readlane_b32 s36, v40, 4 6568; GFX11-NEXT: v_readlane_b32 s35, v40, 3 6569; GFX11-NEXT: v_readlane_b32 s34, v40, 2 6570; GFX11-NEXT: v_readlane_b32 s31, v40, 1 6571; GFX11-NEXT: v_readlane_b32 s30, v40, 0 6572; GFX11-NEXT: s_addk_i32 s32, 0xffe0 6573; GFX11-NEXT: s_mov_b32 s33, s4 6574; GFX11-NEXT: s_or_saveexec_b32 s0, -1 6575; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:24 ; 4-byte Folded Reload 6576; GFX11-NEXT: s_mov_b32 exec_lo, s0 6577; GFX11-NEXT: s_waitcnt vmcnt(0) 6578; GFX11-NEXT: s_setpc_b64 s[30:31] 6579; 6580; GFX10-SCRATCH-LABEL: tail_call_byval_align16: 6581; GFX10-SCRATCH: ; %bb.0: ; %entry 6582; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6583; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 6584; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 6585; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 offset:24 ; 4-byte Folded Spill 6586; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 6587; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 6588; GFX10-SCRATCH-NEXT: s_mov_b32 s4, s33 6589; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 6590; GFX10-SCRATCH-NEXT: s_clause 0x1 6591; GFX10-SCRATCH-NEXT: scratch_load_dwordx2 v[32:33], off, s33 offset:16 6592; GFX10-SCRATCH-NEXT: scratch_load_dword v31, off, s33 6593; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 6594; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 6595; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 6596; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, byval_align16_f64_arg@rel32@lo+4 6597; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, byval_align16_f64_arg@rel32@hi+12 6598; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 6599; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s34, 2 6600; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s35, 3 6601; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s36, 4 6602; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s37, 5 6603; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s38, 6 6604; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s39, 7 6605; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s40, 8 6606; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s41, 9 6607; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s42, 10 6608; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s43, 11 6609; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s44, 12 6610; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s45, 13 6611; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s46, 14 6612; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s47, 15 6613; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s48, 16 6614; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s49, 17 6615; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 18 6616; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 19 6617; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s52, 20 6618; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s53, 21 6619; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s54, 22 6620; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s55, 23 6621; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s56, 24 6622; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s57, 25 6623; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s58, 26 6624; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s59, 27 6625; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s60, 28 6626; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s61, 29 6627; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s62, 30 6628; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s63, 31 6629; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(1) 6630; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[32:33], s32 6631; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 6632; GFX10-SCRATCH-NEXT: v_readlane_b32 s63, v40, 31 6633; GFX10-SCRATCH-NEXT: v_readlane_b32 s62, v40, 30 6634; GFX10-SCRATCH-NEXT: v_readlane_b32 s61, v40, 29 6635; GFX10-SCRATCH-NEXT: v_readlane_b32 s60, v40, 28 6636; GFX10-SCRATCH-NEXT: v_readlane_b32 s59, v40, 27 6637; GFX10-SCRATCH-NEXT: v_readlane_b32 s58, v40, 26 6638; GFX10-SCRATCH-NEXT: v_readlane_b32 s57, v40, 25 6639; GFX10-SCRATCH-NEXT: v_readlane_b32 s56, v40, 24 6640; GFX10-SCRATCH-NEXT: v_readlane_b32 s55, v40, 23 6641; GFX10-SCRATCH-NEXT: v_readlane_b32 s54, v40, 22 6642; GFX10-SCRATCH-NEXT: v_readlane_b32 s53, v40, 21 6643; GFX10-SCRATCH-NEXT: v_readlane_b32 s52, v40, 20 6644; GFX10-SCRATCH-NEXT: v_readlane_b32 s51, v40, 19 6645; GFX10-SCRATCH-NEXT: v_readlane_b32 s50, v40, 18 6646; GFX10-SCRATCH-NEXT: v_readlane_b32 s49, v40, 17 6647; GFX10-SCRATCH-NEXT: v_readlane_b32 s48, v40, 16 6648; GFX10-SCRATCH-NEXT: v_readlane_b32 s47, v40, 15 6649; GFX10-SCRATCH-NEXT: v_readlane_b32 s46, v40, 14 6650; GFX10-SCRATCH-NEXT: v_readlane_b32 s45, v40, 13 6651; GFX10-SCRATCH-NEXT: v_readlane_b32 s44, v40, 12 6652; GFX10-SCRATCH-NEXT: v_readlane_b32 s43, v40, 11 6653; GFX10-SCRATCH-NEXT: v_readlane_b32 s42, v40, 10 6654; GFX10-SCRATCH-NEXT: v_readlane_b32 s41, v40, 9 6655; GFX10-SCRATCH-NEXT: v_readlane_b32 s40, v40, 8 6656; GFX10-SCRATCH-NEXT: v_readlane_b32 s39, v40, 7 6657; GFX10-SCRATCH-NEXT: v_readlane_b32 s38, v40, 6 6658; GFX10-SCRATCH-NEXT: v_readlane_b32 s37, v40, 5 6659; GFX10-SCRATCH-NEXT: v_readlane_b32 s36, v40, 4 6660; GFX10-SCRATCH-NEXT: v_readlane_b32 s35, v40, 3 6661; GFX10-SCRATCH-NEXT: v_readlane_b32 s34, v40, 2 6662; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 6663; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 6664; GFX10-SCRATCH-NEXT: s_addk_i32 s32, 0xffe0 6665; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s4 6666; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 6667; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:24 ; 4-byte Folded Reload 6668; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 6669; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 6670; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 6671; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 6672entry: 6673 %alloca = alloca double, align 8, addrspace(5) 6674 tail call amdgpu_gfx void @byval_align16_f64_arg(<32 x i32> %val, double addrspace(5)* byval(double) align 16 %alloca) 6675 ret void 6676} 6677 6678; inreg arguments are put in sgprs 6679define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { 6680; GFX9-LABEL: test_call_external_void_func_i1_imm_inreg: 6681; GFX9: ; %bb.0: 6682; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6683; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 6684; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 6685; GFX9-NEXT: s_mov_b64 exec, s[34:35] 6686; GFX9-NEXT: v_writelane_b32 v40, s33, 2 6687; GFX9-NEXT: s_mov_b32 s33, s32 6688; GFX9-NEXT: s_addk_i32 s32, 0x400 6689; GFX9-NEXT: v_writelane_b32 v40, s30, 0 6690; GFX9-NEXT: v_mov_b32_e32 v0, 1 6691; GFX9-NEXT: v_writelane_b32 v40, s31, 1 6692; GFX9-NEXT: s_getpc_b64 s[34:35] 6693; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i1_inreg@rel32@lo+4 6694; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i1_inreg@rel32@hi+12 6695; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 6696; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 6697; GFX9-NEXT: v_readlane_b32 s31, v40, 1 6698; GFX9-NEXT: v_readlane_b32 s30, v40, 0 6699; GFX9-NEXT: s_addk_i32 s32, 0xfc00 6700; GFX9-NEXT: v_readlane_b32 s33, v40, 2 6701; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 6702; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 6703; GFX9-NEXT: s_mov_b64 exec, s[34:35] 6704; GFX9-NEXT: s_waitcnt vmcnt(0) 6705; GFX9-NEXT: s_setpc_b64 s[30:31] 6706; 6707; GFX10-LABEL: test_call_external_void_func_i1_imm_inreg: 6708; GFX10: ; %bb.0: 6709; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6710; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 6711; GFX10-NEXT: s_or_saveexec_b32 s34, -1 6712; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 6713; GFX10-NEXT: s_waitcnt_depctr 0xffe3 6714; GFX10-NEXT: s_mov_b32 exec_lo, s34 6715; GFX10-NEXT: v_writelane_b32 v40, s33, 2 6716; GFX10-NEXT: v_mov_b32_e32 v0, 1 6717; GFX10-NEXT: s_mov_b32 s33, s32 6718; GFX10-NEXT: s_addk_i32 s32, 0x200 6719; GFX10-NEXT: s_getpc_b64 s[34:35] 6720; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i1_inreg@rel32@lo+4 6721; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i1_inreg@rel32@hi+12 6722; GFX10-NEXT: v_writelane_b32 v40, s30, 0 6723; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 6724; GFX10-NEXT: v_writelane_b32 v40, s31, 1 6725; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 6726; GFX10-NEXT: v_readlane_b32 s31, v40, 1 6727; GFX10-NEXT: v_readlane_b32 s30, v40, 0 6728; GFX10-NEXT: s_addk_i32 s32, 0xfe00 6729; GFX10-NEXT: v_readlane_b32 s33, v40, 2 6730; GFX10-NEXT: s_or_saveexec_b32 s34, -1 6731; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 6732; GFX10-NEXT: s_waitcnt_depctr 0xffe3 6733; GFX10-NEXT: s_mov_b32 exec_lo, s34 6734; GFX10-NEXT: s_waitcnt vmcnt(0) 6735; GFX10-NEXT: s_setpc_b64 s[30:31] 6736; 6737; GFX11-LABEL: test_call_external_void_func_i1_imm_inreg: 6738; GFX11: ; %bb.0: 6739; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6740; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 6741; GFX11-NEXT: s_or_saveexec_b32 s0, -1 6742; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 6743; GFX11-NEXT: s_mov_b32 exec_lo, s0 6744; GFX11-NEXT: v_writelane_b32 v40, s33, 2 6745; GFX11-NEXT: v_mov_b32_e32 v0, 1 6746; GFX11-NEXT: s_mov_b32 s33, s32 6747; GFX11-NEXT: s_add_i32 s32, s32, 16 6748; GFX11-NEXT: s_getpc_b64 s[0:1] 6749; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i1_inreg@rel32@lo+4 6750; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i1_inreg@rel32@hi+12 6751; GFX11-NEXT: v_writelane_b32 v40, s30, 0 6752; GFX11-NEXT: scratch_store_b8 off, v0, s32 6753; GFX11-NEXT: v_writelane_b32 v40, s31, 1 6754; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 6755; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 6756; GFX11-NEXT: v_readlane_b32 s31, v40, 1 6757; GFX11-NEXT: v_readlane_b32 s30, v40, 0 6758; GFX11-NEXT: s_add_i32 s32, s32, -16 6759; GFX11-NEXT: v_readlane_b32 s33, v40, 2 6760; GFX11-NEXT: s_or_saveexec_b32 s0, -1 6761; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 6762; GFX11-NEXT: s_mov_b32 exec_lo, s0 6763; GFX11-NEXT: s_waitcnt vmcnt(0) 6764; GFX11-NEXT: s_setpc_b64 s[30:31] 6765; 6766; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_imm_inreg: 6767; GFX10-SCRATCH: ; %bb.0: 6768; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6769; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 6770; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 6771; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 6772; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 6773; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 6774; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 6775; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 6776; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 6777; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 6778; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 6779; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i1_inreg@rel32@lo+4 6780; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i1_inreg@rel32@hi+12 6781; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 6782; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 6783; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 6784; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 6785; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 6786; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 6787; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 6788; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 6789; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 6790; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 6791; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 6792; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 6793; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 6794; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 6795 call amdgpu_gfx void @external_void_func_i1_inreg(i1 inreg true) 6796 ret void 6797} 6798 6799define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { 6800; GFX9-LABEL: test_call_external_void_func_i8_imm_inreg: 6801; GFX9: ; %bb.0: 6802; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6803; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 6804; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 6805; GFX9-NEXT: s_mov_b64 exec, s[34:35] 6806; GFX9-NEXT: v_writelane_b32 v40, s33, 3 6807; GFX9-NEXT: v_writelane_b32 v40, s4, 0 6808; GFX9-NEXT: s_mov_b32 s33, s32 6809; GFX9-NEXT: s_addk_i32 s32, 0x400 6810; GFX9-NEXT: v_writelane_b32 v40, s30, 1 6811; GFX9-NEXT: s_movk_i32 s4, 0x7b 6812; GFX9-NEXT: v_writelane_b32 v40, s31, 2 6813; GFX9-NEXT: s_getpc_b64 s[34:35] 6814; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8_inreg@rel32@lo+4 6815; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8_inreg@rel32@hi+12 6816; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 6817; GFX9-NEXT: v_readlane_b32 s31, v40, 2 6818; GFX9-NEXT: v_readlane_b32 s30, v40, 1 6819; GFX9-NEXT: v_readlane_b32 s4, v40, 0 6820; GFX9-NEXT: s_addk_i32 s32, 0xfc00 6821; GFX9-NEXT: v_readlane_b32 s33, v40, 3 6822; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 6823; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 6824; GFX9-NEXT: s_mov_b64 exec, s[34:35] 6825; GFX9-NEXT: s_waitcnt vmcnt(0) 6826; GFX9-NEXT: s_setpc_b64 s[30:31] 6827; 6828; GFX10-LABEL: test_call_external_void_func_i8_imm_inreg: 6829; GFX10: ; %bb.0: 6830; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6831; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 6832; GFX10-NEXT: s_or_saveexec_b32 s34, -1 6833; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 6834; GFX10-NEXT: s_waitcnt_depctr 0xffe3 6835; GFX10-NEXT: s_mov_b32 exec_lo, s34 6836; GFX10-NEXT: v_writelane_b32 v40, s33, 3 6837; GFX10-NEXT: s_mov_b32 s33, s32 6838; GFX10-NEXT: s_addk_i32 s32, 0x200 6839; GFX10-NEXT: s_getpc_b64 s[34:35] 6840; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8_inreg@rel32@lo+4 6841; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8_inreg@rel32@hi+12 6842; GFX10-NEXT: v_writelane_b32 v40, s4, 0 6843; GFX10-NEXT: s_movk_i32 s4, 0x7b 6844; GFX10-NEXT: v_writelane_b32 v40, s30, 1 6845; GFX10-NEXT: v_writelane_b32 v40, s31, 2 6846; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 6847; GFX10-NEXT: v_readlane_b32 s31, v40, 2 6848; GFX10-NEXT: v_readlane_b32 s30, v40, 1 6849; GFX10-NEXT: v_readlane_b32 s4, v40, 0 6850; GFX10-NEXT: s_addk_i32 s32, 0xfe00 6851; GFX10-NEXT: v_readlane_b32 s33, v40, 3 6852; GFX10-NEXT: s_or_saveexec_b32 s34, -1 6853; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 6854; GFX10-NEXT: s_waitcnt_depctr 0xffe3 6855; GFX10-NEXT: s_mov_b32 exec_lo, s34 6856; GFX10-NEXT: s_waitcnt vmcnt(0) 6857; GFX10-NEXT: s_setpc_b64 s[30:31] 6858; 6859; GFX11-LABEL: test_call_external_void_func_i8_imm_inreg: 6860; GFX11: ; %bb.0: 6861; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6862; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 6863; GFX11-NEXT: s_or_saveexec_b32 s0, -1 6864; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 6865; GFX11-NEXT: s_mov_b32 exec_lo, s0 6866; GFX11-NEXT: v_writelane_b32 v40, s33, 3 6867; GFX11-NEXT: s_mov_b32 s33, s32 6868; GFX11-NEXT: s_add_i32 s32, s32, 16 6869; GFX11-NEXT: s_getpc_b64 s[0:1] 6870; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i8_inreg@rel32@lo+4 6871; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i8_inreg@rel32@hi+12 6872; GFX11-NEXT: v_writelane_b32 v40, s4, 0 6873; GFX11-NEXT: s_movk_i32 s4, 0x7b 6874; GFX11-NEXT: v_writelane_b32 v40, s30, 1 6875; GFX11-NEXT: v_writelane_b32 v40, s31, 2 6876; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 6877; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 6878; GFX11-NEXT: v_readlane_b32 s31, v40, 2 6879; GFX11-NEXT: v_readlane_b32 s30, v40, 1 6880; GFX11-NEXT: v_readlane_b32 s4, v40, 0 6881; GFX11-NEXT: s_add_i32 s32, s32, -16 6882; GFX11-NEXT: v_readlane_b32 s33, v40, 3 6883; GFX11-NEXT: s_or_saveexec_b32 s0, -1 6884; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 6885; GFX11-NEXT: s_mov_b32 exec_lo, s0 6886; GFX11-NEXT: s_waitcnt vmcnt(0) 6887; GFX11-NEXT: s_setpc_b64 s[30:31] 6888; 6889; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_imm_inreg: 6890; GFX10-SCRATCH: ; %bb.0: 6891; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6892; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 6893; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 6894; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 6895; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 6896; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 6897; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 3 6898; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 6899; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 6900; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 6901; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i8_inreg@rel32@lo+4 6902; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i8_inreg@rel32@hi+12 6903; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 6904; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b 6905; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 6906; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 6907; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 6908; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 6909; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 6910; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 6911; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 6912; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 6913; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 6914; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 6915; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 6916; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 6917; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 6918; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 6919 call amdgpu_gfx void @external_void_func_i8_inreg(i8 inreg 123) 6920 ret void 6921} 6922 6923define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { 6924; GFX9-LABEL: test_call_external_void_func_i16_imm_inreg: 6925; GFX9: ; %bb.0: 6926; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6927; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 6928; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 6929; GFX9-NEXT: s_mov_b64 exec, s[34:35] 6930; GFX9-NEXT: v_writelane_b32 v40, s33, 3 6931; GFX9-NEXT: v_writelane_b32 v40, s4, 0 6932; GFX9-NEXT: s_mov_b32 s33, s32 6933; GFX9-NEXT: s_addk_i32 s32, 0x400 6934; GFX9-NEXT: v_writelane_b32 v40, s30, 1 6935; GFX9-NEXT: s_movk_i32 s4, 0x7b 6936; GFX9-NEXT: v_writelane_b32 v40, s31, 2 6937; GFX9-NEXT: s_getpc_b64 s[34:35] 6938; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i16_inreg@rel32@lo+4 6939; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i16_inreg@rel32@hi+12 6940; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 6941; GFX9-NEXT: v_readlane_b32 s31, v40, 2 6942; GFX9-NEXT: v_readlane_b32 s30, v40, 1 6943; GFX9-NEXT: v_readlane_b32 s4, v40, 0 6944; GFX9-NEXT: s_addk_i32 s32, 0xfc00 6945; GFX9-NEXT: v_readlane_b32 s33, v40, 3 6946; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 6947; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 6948; GFX9-NEXT: s_mov_b64 exec, s[34:35] 6949; GFX9-NEXT: s_waitcnt vmcnt(0) 6950; GFX9-NEXT: s_setpc_b64 s[30:31] 6951; 6952; GFX10-LABEL: test_call_external_void_func_i16_imm_inreg: 6953; GFX10: ; %bb.0: 6954; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6955; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 6956; GFX10-NEXT: s_or_saveexec_b32 s34, -1 6957; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 6958; GFX10-NEXT: s_waitcnt_depctr 0xffe3 6959; GFX10-NEXT: s_mov_b32 exec_lo, s34 6960; GFX10-NEXT: v_writelane_b32 v40, s33, 3 6961; GFX10-NEXT: s_mov_b32 s33, s32 6962; GFX10-NEXT: s_addk_i32 s32, 0x200 6963; GFX10-NEXT: s_getpc_b64 s[34:35] 6964; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i16_inreg@rel32@lo+4 6965; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i16_inreg@rel32@hi+12 6966; GFX10-NEXT: v_writelane_b32 v40, s4, 0 6967; GFX10-NEXT: s_movk_i32 s4, 0x7b 6968; GFX10-NEXT: v_writelane_b32 v40, s30, 1 6969; GFX10-NEXT: v_writelane_b32 v40, s31, 2 6970; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 6971; GFX10-NEXT: v_readlane_b32 s31, v40, 2 6972; GFX10-NEXT: v_readlane_b32 s30, v40, 1 6973; GFX10-NEXT: v_readlane_b32 s4, v40, 0 6974; GFX10-NEXT: s_addk_i32 s32, 0xfe00 6975; GFX10-NEXT: v_readlane_b32 s33, v40, 3 6976; GFX10-NEXT: s_or_saveexec_b32 s34, -1 6977; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 6978; GFX10-NEXT: s_waitcnt_depctr 0xffe3 6979; GFX10-NEXT: s_mov_b32 exec_lo, s34 6980; GFX10-NEXT: s_waitcnt vmcnt(0) 6981; GFX10-NEXT: s_setpc_b64 s[30:31] 6982; 6983; GFX11-LABEL: test_call_external_void_func_i16_imm_inreg: 6984; GFX11: ; %bb.0: 6985; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6986; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 6987; GFX11-NEXT: s_or_saveexec_b32 s0, -1 6988; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 6989; GFX11-NEXT: s_mov_b32 exec_lo, s0 6990; GFX11-NEXT: v_writelane_b32 v40, s33, 3 6991; GFX11-NEXT: s_mov_b32 s33, s32 6992; GFX11-NEXT: s_add_i32 s32, s32, 16 6993; GFX11-NEXT: s_getpc_b64 s[0:1] 6994; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i16_inreg@rel32@lo+4 6995; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i16_inreg@rel32@hi+12 6996; GFX11-NEXT: v_writelane_b32 v40, s4, 0 6997; GFX11-NEXT: s_movk_i32 s4, 0x7b 6998; GFX11-NEXT: v_writelane_b32 v40, s30, 1 6999; GFX11-NEXT: v_writelane_b32 v40, s31, 2 7000; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 7001; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 7002; GFX11-NEXT: v_readlane_b32 s31, v40, 2 7003; GFX11-NEXT: v_readlane_b32 s30, v40, 1 7004; GFX11-NEXT: v_readlane_b32 s4, v40, 0 7005; GFX11-NEXT: s_add_i32 s32, s32, -16 7006; GFX11-NEXT: v_readlane_b32 s33, v40, 3 7007; GFX11-NEXT: s_or_saveexec_b32 s0, -1 7008; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 7009; GFX11-NEXT: s_mov_b32 exec_lo, s0 7010; GFX11-NEXT: s_waitcnt vmcnt(0) 7011; GFX11-NEXT: s_setpc_b64 s[30:31] 7012; 7013; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_imm_inreg: 7014; GFX10-SCRATCH: ; %bb.0: 7015; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7016; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 7017; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 7018; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 7019; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 7020; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 7021; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 3 7022; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 7023; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 7024; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 7025; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i16_inreg@rel32@lo+4 7026; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i16_inreg@rel32@hi+12 7027; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 7028; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b 7029; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 7030; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 7031; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 7032; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 7033; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 7034; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 7035; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 7036; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 7037; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 7038; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 7039; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 7040; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 7041; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 7042; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 7043 call amdgpu_gfx void @external_void_func_i16_inreg(i16 inreg 123) 7044 ret void 7045} 7046 7047define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { 7048; GFX9-LABEL: test_call_external_void_func_i32_imm_inreg: 7049; GFX9: ; %bb.0: 7050; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7051; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 7052; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 7053; GFX9-NEXT: s_mov_b64 exec, s[34:35] 7054; GFX9-NEXT: v_writelane_b32 v40, s33, 3 7055; GFX9-NEXT: v_writelane_b32 v40, s4, 0 7056; GFX9-NEXT: s_mov_b32 s33, s32 7057; GFX9-NEXT: s_addk_i32 s32, 0x400 7058; GFX9-NEXT: v_writelane_b32 v40, s30, 1 7059; GFX9-NEXT: s_mov_b32 s4, 42 7060; GFX9-NEXT: v_writelane_b32 v40, s31, 2 7061; GFX9-NEXT: s_getpc_b64 s[34:35] 7062; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i32_inreg@rel32@lo+4 7063; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i32_inreg@rel32@hi+12 7064; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 7065; GFX9-NEXT: v_readlane_b32 s31, v40, 2 7066; GFX9-NEXT: v_readlane_b32 s30, v40, 1 7067; GFX9-NEXT: v_readlane_b32 s4, v40, 0 7068; GFX9-NEXT: s_addk_i32 s32, 0xfc00 7069; GFX9-NEXT: v_readlane_b32 s33, v40, 3 7070; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 7071; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 7072; GFX9-NEXT: s_mov_b64 exec, s[34:35] 7073; GFX9-NEXT: s_waitcnt vmcnt(0) 7074; GFX9-NEXT: s_setpc_b64 s[30:31] 7075; 7076; GFX10-LABEL: test_call_external_void_func_i32_imm_inreg: 7077; GFX10: ; %bb.0: 7078; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7079; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 7080; GFX10-NEXT: s_or_saveexec_b32 s34, -1 7081; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 7082; GFX10-NEXT: s_waitcnt_depctr 0xffe3 7083; GFX10-NEXT: s_mov_b32 exec_lo, s34 7084; GFX10-NEXT: v_writelane_b32 v40, s33, 3 7085; GFX10-NEXT: s_mov_b32 s33, s32 7086; GFX10-NEXT: s_addk_i32 s32, 0x200 7087; GFX10-NEXT: s_getpc_b64 s[34:35] 7088; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i32_inreg@rel32@lo+4 7089; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i32_inreg@rel32@hi+12 7090; GFX10-NEXT: v_writelane_b32 v40, s4, 0 7091; GFX10-NEXT: s_mov_b32 s4, 42 7092; GFX10-NEXT: v_writelane_b32 v40, s30, 1 7093; GFX10-NEXT: v_writelane_b32 v40, s31, 2 7094; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 7095; GFX10-NEXT: v_readlane_b32 s31, v40, 2 7096; GFX10-NEXT: v_readlane_b32 s30, v40, 1 7097; GFX10-NEXT: v_readlane_b32 s4, v40, 0 7098; GFX10-NEXT: s_addk_i32 s32, 0xfe00 7099; GFX10-NEXT: v_readlane_b32 s33, v40, 3 7100; GFX10-NEXT: s_or_saveexec_b32 s34, -1 7101; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 7102; GFX10-NEXT: s_waitcnt_depctr 0xffe3 7103; GFX10-NEXT: s_mov_b32 exec_lo, s34 7104; GFX10-NEXT: s_waitcnt vmcnt(0) 7105; GFX10-NEXT: s_setpc_b64 s[30:31] 7106; 7107; GFX11-LABEL: test_call_external_void_func_i32_imm_inreg: 7108; GFX11: ; %bb.0: 7109; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7110; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 7111; GFX11-NEXT: s_or_saveexec_b32 s0, -1 7112; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 7113; GFX11-NEXT: s_mov_b32 exec_lo, s0 7114; GFX11-NEXT: v_writelane_b32 v40, s33, 3 7115; GFX11-NEXT: s_mov_b32 s33, s32 7116; GFX11-NEXT: s_add_i32 s32, s32, 16 7117; GFX11-NEXT: s_getpc_b64 s[0:1] 7118; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i32_inreg@rel32@lo+4 7119; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i32_inreg@rel32@hi+12 7120; GFX11-NEXT: v_writelane_b32 v40, s4, 0 7121; GFX11-NEXT: s_mov_b32 s4, 42 7122; GFX11-NEXT: v_writelane_b32 v40, s30, 1 7123; GFX11-NEXT: v_writelane_b32 v40, s31, 2 7124; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 7125; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 7126; GFX11-NEXT: v_readlane_b32 s31, v40, 2 7127; GFX11-NEXT: v_readlane_b32 s30, v40, 1 7128; GFX11-NEXT: v_readlane_b32 s4, v40, 0 7129; GFX11-NEXT: s_add_i32 s32, s32, -16 7130; GFX11-NEXT: v_readlane_b32 s33, v40, 3 7131; GFX11-NEXT: s_or_saveexec_b32 s0, -1 7132; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 7133; GFX11-NEXT: s_mov_b32 exec_lo, s0 7134; GFX11-NEXT: s_waitcnt vmcnt(0) 7135; GFX11-NEXT: s_setpc_b64 s[30:31] 7136; 7137; GFX10-SCRATCH-LABEL: test_call_external_void_func_i32_imm_inreg: 7138; GFX10-SCRATCH: ; %bb.0: 7139; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7140; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 7141; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 7142; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 7143; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 7144; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 7145; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 3 7146; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 7147; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 7148; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 7149; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i32_inreg@rel32@lo+4 7150; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i32_inreg@rel32@hi+12 7151; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 7152; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 42 7153; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 7154; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 7155; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 7156; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 7157; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 7158; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 7159; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 7160; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 7161; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 7162; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 7163; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 7164; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 7165; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 7166; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 7167 call amdgpu_gfx void @external_void_func_i32_inreg(i32 inreg 42) 7168 ret void 7169} 7170 7171define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { 7172; GFX9-LABEL: test_call_external_void_func_i64_imm_inreg: 7173; GFX9: ; %bb.0: 7174; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7175; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 7176; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 7177; GFX9-NEXT: s_mov_b64 exec, s[34:35] 7178; GFX9-NEXT: v_writelane_b32 v40, s33, 4 7179; GFX9-NEXT: v_writelane_b32 v40, s4, 0 7180; GFX9-NEXT: v_writelane_b32 v40, s5, 1 7181; GFX9-NEXT: s_mov_b32 s33, s32 7182; GFX9-NEXT: s_addk_i32 s32, 0x400 7183; GFX9-NEXT: v_writelane_b32 v40, s30, 2 7184; GFX9-NEXT: s_movk_i32 s4, 0x7b 7185; GFX9-NEXT: s_mov_b32 s5, 0 7186; GFX9-NEXT: v_writelane_b32 v40, s31, 3 7187; GFX9-NEXT: s_getpc_b64 s[34:35] 7188; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i64_inreg@rel32@lo+4 7189; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i64_inreg@rel32@hi+12 7190; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 7191; GFX9-NEXT: v_readlane_b32 s31, v40, 3 7192; GFX9-NEXT: v_readlane_b32 s30, v40, 2 7193; GFX9-NEXT: v_readlane_b32 s5, v40, 1 7194; GFX9-NEXT: v_readlane_b32 s4, v40, 0 7195; GFX9-NEXT: s_addk_i32 s32, 0xfc00 7196; GFX9-NEXT: v_readlane_b32 s33, v40, 4 7197; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 7198; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 7199; GFX9-NEXT: s_mov_b64 exec, s[34:35] 7200; GFX9-NEXT: s_waitcnt vmcnt(0) 7201; GFX9-NEXT: s_setpc_b64 s[30:31] 7202; 7203; GFX10-LABEL: test_call_external_void_func_i64_imm_inreg: 7204; GFX10: ; %bb.0: 7205; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7206; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 7207; GFX10-NEXT: s_or_saveexec_b32 s34, -1 7208; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 7209; GFX10-NEXT: s_waitcnt_depctr 0xffe3 7210; GFX10-NEXT: s_mov_b32 exec_lo, s34 7211; GFX10-NEXT: v_writelane_b32 v40, s33, 4 7212; GFX10-NEXT: s_mov_b32 s33, s32 7213; GFX10-NEXT: s_addk_i32 s32, 0x200 7214; GFX10-NEXT: s_getpc_b64 s[34:35] 7215; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i64_inreg@rel32@lo+4 7216; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i64_inreg@rel32@hi+12 7217; GFX10-NEXT: v_writelane_b32 v40, s4, 0 7218; GFX10-NEXT: s_movk_i32 s4, 0x7b 7219; GFX10-NEXT: v_writelane_b32 v40, s5, 1 7220; GFX10-NEXT: s_mov_b32 s5, 0 7221; GFX10-NEXT: v_writelane_b32 v40, s30, 2 7222; GFX10-NEXT: v_writelane_b32 v40, s31, 3 7223; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 7224; GFX10-NEXT: v_readlane_b32 s31, v40, 3 7225; GFX10-NEXT: v_readlane_b32 s30, v40, 2 7226; GFX10-NEXT: v_readlane_b32 s5, v40, 1 7227; GFX10-NEXT: v_readlane_b32 s4, v40, 0 7228; GFX10-NEXT: s_addk_i32 s32, 0xfe00 7229; GFX10-NEXT: v_readlane_b32 s33, v40, 4 7230; GFX10-NEXT: s_or_saveexec_b32 s34, -1 7231; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 7232; GFX10-NEXT: s_waitcnt_depctr 0xffe3 7233; GFX10-NEXT: s_mov_b32 exec_lo, s34 7234; GFX10-NEXT: s_waitcnt vmcnt(0) 7235; GFX10-NEXT: s_setpc_b64 s[30:31] 7236; 7237; GFX11-LABEL: test_call_external_void_func_i64_imm_inreg: 7238; GFX11: ; %bb.0: 7239; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7240; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 7241; GFX11-NEXT: s_or_saveexec_b32 s0, -1 7242; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 7243; GFX11-NEXT: s_mov_b32 exec_lo, s0 7244; GFX11-NEXT: v_writelane_b32 v40, s33, 4 7245; GFX11-NEXT: s_mov_b32 s33, s32 7246; GFX11-NEXT: s_add_i32 s32, s32, 16 7247; GFX11-NEXT: s_getpc_b64 s[0:1] 7248; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i64_inreg@rel32@lo+4 7249; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i64_inreg@rel32@hi+12 7250; GFX11-NEXT: v_writelane_b32 v40, s4, 0 7251; GFX11-NEXT: s_movk_i32 s4, 0x7b 7252; GFX11-NEXT: v_writelane_b32 v40, s5, 1 7253; GFX11-NEXT: s_mov_b32 s5, 0 7254; GFX11-NEXT: v_writelane_b32 v40, s30, 2 7255; GFX11-NEXT: v_writelane_b32 v40, s31, 3 7256; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 7257; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 7258; GFX11-NEXT: v_readlane_b32 s31, v40, 3 7259; GFX11-NEXT: v_readlane_b32 s30, v40, 2 7260; GFX11-NEXT: v_readlane_b32 s5, v40, 1 7261; GFX11-NEXT: v_readlane_b32 s4, v40, 0 7262; GFX11-NEXT: s_add_i32 s32, s32, -16 7263; GFX11-NEXT: v_readlane_b32 s33, v40, 4 7264; GFX11-NEXT: s_or_saveexec_b32 s0, -1 7265; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 7266; GFX11-NEXT: s_mov_b32 exec_lo, s0 7267; GFX11-NEXT: s_waitcnt vmcnt(0) 7268; GFX11-NEXT: s_setpc_b64 s[30:31] 7269; 7270; GFX10-SCRATCH-LABEL: test_call_external_void_func_i64_imm_inreg: 7271; GFX10-SCRATCH: ; %bb.0: 7272; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7273; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 7274; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 7275; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 7276; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 7277; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 7278; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 7279; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 7280; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 7281; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 7282; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i64_inreg@rel32@lo+4 7283; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i64_inreg@rel32@hi+12 7284; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 7285; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b 7286; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 7287; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0 7288; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 7289; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 7290; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 7291; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 7292; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 7293; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 7294; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 7295; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 7296; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 7297; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 7298; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 7299; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 7300; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 7301; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 7302; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 7303 call amdgpu_gfx void @external_void_func_i64_inreg(i64 inreg 123) 7304 ret void 7305} 7306 7307define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { 7308; GFX9-LABEL: test_call_external_void_func_v2i64_inreg: 7309; GFX9: ; %bb.0: 7310; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7311; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 7312; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 7313; GFX9-NEXT: s_mov_b64 exec, s[34:35] 7314; GFX9-NEXT: v_writelane_b32 v40, s33, 6 7315; GFX9-NEXT: v_writelane_b32 v40, s4, 0 7316; GFX9-NEXT: v_writelane_b32 v40, s5, 1 7317; GFX9-NEXT: v_writelane_b32 v40, s6, 2 7318; GFX9-NEXT: s_mov_b64 s[34:35], 0 7319; GFX9-NEXT: v_writelane_b32 v40, s7, 3 7320; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 7321; GFX9-NEXT: s_mov_b32 s33, s32 7322; GFX9-NEXT: s_addk_i32 s32, 0x400 7323; GFX9-NEXT: v_writelane_b32 v40, s30, 4 7324; GFX9-NEXT: v_writelane_b32 v40, s31, 5 7325; GFX9-NEXT: s_getpc_b64 s[34:35] 7326; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4 7327; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12 7328; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 7329; GFX9-NEXT: v_readlane_b32 s31, v40, 5 7330; GFX9-NEXT: v_readlane_b32 s30, v40, 4 7331; GFX9-NEXT: v_readlane_b32 s7, v40, 3 7332; GFX9-NEXT: v_readlane_b32 s6, v40, 2 7333; GFX9-NEXT: v_readlane_b32 s5, v40, 1 7334; GFX9-NEXT: v_readlane_b32 s4, v40, 0 7335; GFX9-NEXT: s_addk_i32 s32, 0xfc00 7336; GFX9-NEXT: v_readlane_b32 s33, v40, 6 7337; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 7338; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 7339; GFX9-NEXT: s_mov_b64 exec, s[34:35] 7340; GFX9-NEXT: s_waitcnt vmcnt(0) 7341; GFX9-NEXT: s_setpc_b64 s[30:31] 7342; 7343; GFX10-LABEL: test_call_external_void_func_v2i64_inreg: 7344; GFX10: ; %bb.0: 7345; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7346; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 7347; GFX10-NEXT: s_or_saveexec_b32 s34, -1 7348; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 7349; GFX10-NEXT: s_waitcnt_depctr 0xffe3 7350; GFX10-NEXT: s_mov_b32 exec_lo, s34 7351; GFX10-NEXT: v_writelane_b32 v40, s33, 6 7352; GFX10-NEXT: s_mov_b64 s[34:35], 0 7353; GFX10-NEXT: s_mov_b32 s33, s32 7354; GFX10-NEXT: s_addk_i32 s32, 0x200 7355; GFX10-NEXT: v_writelane_b32 v40, s4, 0 7356; GFX10-NEXT: v_writelane_b32 v40, s5, 1 7357; GFX10-NEXT: v_writelane_b32 v40, s6, 2 7358; GFX10-NEXT: v_writelane_b32 v40, s7, 3 7359; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 7360; GFX10-NEXT: s_getpc_b64 s[34:35] 7361; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4 7362; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12 7363; GFX10-NEXT: v_writelane_b32 v40, s30, 4 7364; GFX10-NEXT: v_writelane_b32 v40, s31, 5 7365; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 7366; GFX10-NEXT: v_readlane_b32 s31, v40, 5 7367; GFX10-NEXT: v_readlane_b32 s30, v40, 4 7368; GFX10-NEXT: v_readlane_b32 s7, v40, 3 7369; GFX10-NEXT: v_readlane_b32 s6, v40, 2 7370; GFX10-NEXT: v_readlane_b32 s5, v40, 1 7371; GFX10-NEXT: v_readlane_b32 s4, v40, 0 7372; GFX10-NEXT: s_addk_i32 s32, 0xfe00 7373; GFX10-NEXT: v_readlane_b32 s33, v40, 6 7374; GFX10-NEXT: s_or_saveexec_b32 s34, -1 7375; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 7376; GFX10-NEXT: s_waitcnt_depctr 0xffe3 7377; GFX10-NEXT: s_mov_b32 exec_lo, s34 7378; GFX10-NEXT: s_waitcnt vmcnt(0) 7379; GFX10-NEXT: s_setpc_b64 s[30:31] 7380; 7381; GFX11-LABEL: test_call_external_void_func_v2i64_inreg: 7382; GFX11: ; %bb.0: 7383; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7384; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 7385; GFX11-NEXT: s_or_saveexec_b32 s0, -1 7386; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 7387; GFX11-NEXT: s_mov_b32 exec_lo, s0 7388; GFX11-NEXT: v_writelane_b32 v40, s33, 6 7389; GFX11-NEXT: s_mov_b64 s[0:1], 0 7390; GFX11-NEXT: s_mov_b32 s33, s32 7391; GFX11-NEXT: s_add_i32 s32, s32, 16 7392; GFX11-NEXT: v_writelane_b32 v40, s4, 0 7393; GFX11-NEXT: v_writelane_b32 v40, s5, 1 7394; GFX11-NEXT: v_writelane_b32 v40, s6, 2 7395; GFX11-NEXT: v_writelane_b32 v40, s7, 3 7396; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0 7397; GFX11-NEXT: s_getpc_b64 s[0:1] 7398; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i64_inreg@rel32@lo+4 7399; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64_inreg@rel32@hi+12 7400; GFX11-NEXT: v_writelane_b32 v40, s30, 4 7401; GFX11-NEXT: v_writelane_b32 v40, s31, 5 7402; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 7403; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 7404; GFX11-NEXT: v_readlane_b32 s31, v40, 5 7405; GFX11-NEXT: v_readlane_b32 s30, v40, 4 7406; GFX11-NEXT: v_readlane_b32 s7, v40, 3 7407; GFX11-NEXT: v_readlane_b32 s6, v40, 2 7408; GFX11-NEXT: v_readlane_b32 s5, v40, 1 7409; GFX11-NEXT: v_readlane_b32 s4, v40, 0 7410; GFX11-NEXT: s_add_i32 s32, s32, -16 7411; GFX11-NEXT: v_readlane_b32 s33, v40, 6 7412; GFX11-NEXT: s_or_saveexec_b32 s0, -1 7413; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 7414; GFX11-NEXT: s_mov_b32 exec_lo, s0 7415; GFX11-NEXT: s_waitcnt vmcnt(0) 7416; GFX11-NEXT: s_setpc_b64 s[30:31] 7417; 7418; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64_inreg: 7419; GFX10-SCRATCH: ; %bb.0: 7420; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7421; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 7422; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 7423; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 7424; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 7425; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 7426; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 6 7427; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0 7428; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 7429; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 7430; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 7431; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 7432; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 7433; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 7434; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 7435; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 7436; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i64_inreg@rel32@lo+4 7437; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64_inreg@rel32@hi+12 7438; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 7439; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 7440; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 7441; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 7442; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 7443; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 7444; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 7445; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 7446; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 7447; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 7448; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 6 7449; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 7450; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 7451; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 7452; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 7453; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 7454; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 7455 %val = load <2 x i64>, <2 x i64> addrspace(4)* null 7456 call amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> inreg %val) 7457 ret void 7458} 7459 7460define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { 7461; GFX9-LABEL: test_call_external_void_func_v2i64_imm_inreg: 7462; GFX9: ; %bb.0: 7463; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7464; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 7465; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 7466; GFX9-NEXT: s_mov_b64 exec, s[34:35] 7467; GFX9-NEXT: v_writelane_b32 v40, s33, 6 7468; GFX9-NEXT: v_writelane_b32 v40, s4, 0 7469; GFX9-NEXT: v_writelane_b32 v40, s5, 1 7470; GFX9-NEXT: v_writelane_b32 v40, s6, 2 7471; GFX9-NEXT: v_writelane_b32 v40, s7, 3 7472; GFX9-NEXT: s_mov_b32 s33, s32 7473; GFX9-NEXT: s_addk_i32 s32, 0x400 7474; GFX9-NEXT: v_writelane_b32 v40, s30, 4 7475; GFX9-NEXT: s_mov_b32 s4, 1 7476; GFX9-NEXT: s_mov_b32 s5, 2 7477; GFX9-NEXT: s_mov_b32 s6, 3 7478; GFX9-NEXT: s_mov_b32 s7, 4 7479; GFX9-NEXT: v_writelane_b32 v40, s31, 5 7480; GFX9-NEXT: s_getpc_b64 s[34:35] 7481; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4 7482; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12 7483; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 7484; GFX9-NEXT: v_readlane_b32 s31, v40, 5 7485; GFX9-NEXT: v_readlane_b32 s30, v40, 4 7486; GFX9-NEXT: v_readlane_b32 s7, v40, 3 7487; GFX9-NEXT: v_readlane_b32 s6, v40, 2 7488; GFX9-NEXT: v_readlane_b32 s5, v40, 1 7489; GFX9-NEXT: v_readlane_b32 s4, v40, 0 7490; GFX9-NEXT: s_addk_i32 s32, 0xfc00 7491; GFX9-NEXT: v_readlane_b32 s33, v40, 6 7492; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 7493; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 7494; GFX9-NEXT: s_mov_b64 exec, s[34:35] 7495; GFX9-NEXT: s_waitcnt vmcnt(0) 7496; GFX9-NEXT: s_setpc_b64 s[30:31] 7497; 7498; GFX10-LABEL: test_call_external_void_func_v2i64_imm_inreg: 7499; GFX10: ; %bb.0: 7500; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7501; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 7502; GFX10-NEXT: s_or_saveexec_b32 s34, -1 7503; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 7504; GFX10-NEXT: s_waitcnt_depctr 0xffe3 7505; GFX10-NEXT: s_mov_b32 exec_lo, s34 7506; GFX10-NEXT: v_writelane_b32 v40, s33, 6 7507; GFX10-NEXT: s_mov_b32 s33, s32 7508; GFX10-NEXT: s_addk_i32 s32, 0x200 7509; GFX10-NEXT: s_getpc_b64 s[34:35] 7510; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4 7511; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12 7512; GFX10-NEXT: v_writelane_b32 v40, s4, 0 7513; GFX10-NEXT: s_mov_b32 s4, 1 7514; GFX10-NEXT: v_writelane_b32 v40, s5, 1 7515; GFX10-NEXT: s_mov_b32 s5, 2 7516; GFX10-NEXT: v_writelane_b32 v40, s6, 2 7517; GFX10-NEXT: s_mov_b32 s6, 3 7518; GFX10-NEXT: v_writelane_b32 v40, s7, 3 7519; GFX10-NEXT: s_mov_b32 s7, 4 7520; GFX10-NEXT: v_writelane_b32 v40, s30, 4 7521; GFX10-NEXT: v_writelane_b32 v40, s31, 5 7522; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 7523; GFX10-NEXT: v_readlane_b32 s31, v40, 5 7524; GFX10-NEXT: v_readlane_b32 s30, v40, 4 7525; GFX10-NEXT: v_readlane_b32 s7, v40, 3 7526; GFX10-NEXT: v_readlane_b32 s6, v40, 2 7527; GFX10-NEXT: v_readlane_b32 s5, v40, 1 7528; GFX10-NEXT: v_readlane_b32 s4, v40, 0 7529; GFX10-NEXT: s_addk_i32 s32, 0xfe00 7530; GFX10-NEXT: v_readlane_b32 s33, v40, 6 7531; GFX10-NEXT: s_or_saveexec_b32 s34, -1 7532; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 7533; GFX10-NEXT: s_waitcnt_depctr 0xffe3 7534; GFX10-NEXT: s_mov_b32 exec_lo, s34 7535; GFX10-NEXT: s_waitcnt vmcnt(0) 7536; GFX10-NEXT: s_setpc_b64 s[30:31] 7537; 7538; GFX11-LABEL: test_call_external_void_func_v2i64_imm_inreg: 7539; GFX11: ; %bb.0: 7540; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7541; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 7542; GFX11-NEXT: s_or_saveexec_b32 s0, -1 7543; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 7544; GFX11-NEXT: s_mov_b32 exec_lo, s0 7545; GFX11-NEXT: v_writelane_b32 v40, s33, 6 7546; GFX11-NEXT: s_mov_b32 s33, s32 7547; GFX11-NEXT: s_add_i32 s32, s32, 16 7548; GFX11-NEXT: s_getpc_b64 s[0:1] 7549; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i64_inreg@rel32@lo+4 7550; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64_inreg@rel32@hi+12 7551; GFX11-NEXT: v_writelane_b32 v40, s4, 0 7552; GFX11-NEXT: s_mov_b32 s4, 1 7553; GFX11-NEXT: v_writelane_b32 v40, s5, 1 7554; GFX11-NEXT: s_mov_b32 s5, 2 7555; GFX11-NEXT: v_writelane_b32 v40, s6, 2 7556; GFX11-NEXT: s_mov_b32 s6, 3 7557; GFX11-NEXT: v_writelane_b32 v40, s7, 3 7558; GFX11-NEXT: s_mov_b32 s7, 4 7559; GFX11-NEXT: v_writelane_b32 v40, s30, 4 7560; GFX11-NEXT: v_writelane_b32 v40, s31, 5 7561; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 7562; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 7563; GFX11-NEXT: v_readlane_b32 s31, v40, 5 7564; GFX11-NEXT: v_readlane_b32 s30, v40, 4 7565; GFX11-NEXT: v_readlane_b32 s7, v40, 3 7566; GFX11-NEXT: v_readlane_b32 s6, v40, 2 7567; GFX11-NEXT: v_readlane_b32 s5, v40, 1 7568; GFX11-NEXT: v_readlane_b32 s4, v40, 0 7569; GFX11-NEXT: s_add_i32 s32, s32, -16 7570; GFX11-NEXT: v_readlane_b32 s33, v40, 6 7571; GFX11-NEXT: s_or_saveexec_b32 s0, -1 7572; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 7573; GFX11-NEXT: s_mov_b32 exec_lo, s0 7574; GFX11-NEXT: s_waitcnt vmcnt(0) 7575; GFX11-NEXT: s_setpc_b64 s[30:31] 7576; 7577; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64_imm_inreg: 7578; GFX10-SCRATCH: ; %bb.0: 7579; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7580; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 7581; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 7582; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 7583; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 7584; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 7585; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 6 7586; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 7587; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 7588; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 7589; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i64_inreg@rel32@lo+4 7590; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64_inreg@rel32@hi+12 7591; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 7592; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 7593; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 7594; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 7595; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 7596; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3 7597; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 7598; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4 7599; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 7600; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 7601; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 7602; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 7603; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 7604; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 7605; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 7606; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 7607; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 7608; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 7609; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 6 7610; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 7611; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 7612; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 7613; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 7614; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 7615; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 7616 call amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> inreg <i64 8589934593, i64 17179869187>) 7617 ret void 7618} 7619 7620define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { 7621; GFX9-LABEL: test_call_external_void_func_v3i64_inreg: 7622; GFX9: ; %bb.0: 7623; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7624; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 7625; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 7626; GFX9-NEXT: s_mov_b64 exec, s[34:35] 7627; GFX9-NEXT: v_writelane_b32 v40, s33, 8 7628; GFX9-NEXT: v_writelane_b32 v40, s4, 0 7629; GFX9-NEXT: v_writelane_b32 v40, s5, 1 7630; GFX9-NEXT: v_writelane_b32 v40, s6, 2 7631; GFX9-NEXT: s_mov_b64 s[34:35], 0 7632; GFX9-NEXT: v_writelane_b32 v40, s7, 3 7633; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 7634; GFX9-NEXT: v_writelane_b32 v40, s8, 4 7635; GFX9-NEXT: v_writelane_b32 v40, s9, 5 7636; GFX9-NEXT: s_mov_b32 s33, s32 7637; GFX9-NEXT: s_addk_i32 s32, 0x400 7638; GFX9-NEXT: v_writelane_b32 v40, s30, 6 7639; GFX9-NEXT: s_mov_b32 s8, 1 7640; GFX9-NEXT: s_mov_b32 s9, 2 7641; GFX9-NEXT: v_writelane_b32 v40, s31, 7 7642; GFX9-NEXT: s_getpc_b64 s[34:35] 7643; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i64_inreg@rel32@lo+4 7644; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64_inreg@rel32@hi+12 7645; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 7646; GFX9-NEXT: v_readlane_b32 s31, v40, 7 7647; GFX9-NEXT: v_readlane_b32 s30, v40, 6 7648; GFX9-NEXT: v_readlane_b32 s9, v40, 5 7649; GFX9-NEXT: v_readlane_b32 s8, v40, 4 7650; GFX9-NEXT: v_readlane_b32 s7, v40, 3 7651; GFX9-NEXT: v_readlane_b32 s6, v40, 2 7652; GFX9-NEXT: v_readlane_b32 s5, v40, 1 7653; GFX9-NEXT: v_readlane_b32 s4, v40, 0 7654; GFX9-NEXT: s_addk_i32 s32, 0xfc00 7655; GFX9-NEXT: v_readlane_b32 s33, v40, 8 7656; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 7657; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 7658; GFX9-NEXT: s_mov_b64 exec, s[34:35] 7659; GFX9-NEXT: s_waitcnt vmcnt(0) 7660; GFX9-NEXT: s_setpc_b64 s[30:31] 7661; 7662; GFX10-LABEL: test_call_external_void_func_v3i64_inreg: 7663; GFX10: ; %bb.0: 7664; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7665; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 7666; GFX10-NEXT: s_or_saveexec_b32 s34, -1 7667; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 7668; GFX10-NEXT: s_waitcnt_depctr 0xffe3 7669; GFX10-NEXT: s_mov_b32 exec_lo, s34 7670; GFX10-NEXT: v_writelane_b32 v40, s33, 8 7671; GFX10-NEXT: s_mov_b64 s[34:35], 0 7672; GFX10-NEXT: s_mov_b32 s33, s32 7673; GFX10-NEXT: s_addk_i32 s32, 0x200 7674; GFX10-NEXT: v_writelane_b32 v40, s4, 0 7675; GFX10-NEXT: v_writelane_b32 v40, s5, 1 7676; GFX10-NEXT: v_writelane_b32 v40, s6, 2 7677; GFX10-NEXT: v_writelane_b32 v40, s7, 3 7678; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 7679; GFX10-NEXT: s_getpc_b64 s[34:35] 7680; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i64_inreg@rel32@lo+4 7681; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64_inreg@rel32@hi+12 7682; GFX10-NEXT: v_writelane_b32 v40, s8, 4 7683; GFX10-NEXT: s_mov_b32 s8, 1 7684; GFX10-NEXT: v_writelane_b32 v40, s9, 5 7685; GFX10-NEXT: s_mov_b32 s9, 2 7686; GFX10-NEXT: v_writelane_b32 v40, s30, 6 7687; GFX10-NEXT: v_writelane_b32 v40, s31, 7 7688; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 7689; GFX10-NEXT: v_readlane_b32 s31, v40, 7 7690; GFX10-NEXT: v_readlane_b32 s30, v40, 6 7691; GFX10-NEXT: v_readlane_b32 s9, v40, 5 7692; GFX10-NEXT: v_readlane_b32 s8, v40, 4 7693; GFX10-NEXT: v_readlane_b32 s7, v40, 3 7694; GFX10-NEXT: v_readlane_b32 s6, v40, 2 7695; GFX10-NEXT: v_readlane_b32 s5, v40, 1 7696; GFX10-NEXT: v_readlane_b32 s4, v40, 0 7697; GFX10-NEXT: s_addk_i32 s32, 0xfe00 7698; GFX10-NEXT: v_readlane_b32 s33, v40, 8 7699; GFX10-NEXT: s_or_saveexec_b32 s34, -1 7700; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 7701; GFX10-NEXT: s_waitcnt_depctr 0xffe3 7702; GFX10-NEXT: s_mov_b32 exec_lo, s34 7703; GFX10-NEXT: s_waitcnt vmcnt(0) 7704; GFX10-NEXT: s_setpc_b64 s[30:31] 7705; 7706; GFX11-LABEL: test_call_external_void_func_v3i64_inreg: 7707; GFX11: ; %bb.0: 7708; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7709; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 7710; GFX11-NEXT: s_or_saveexec_b32 s0, -1 7711; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 7712; GFX11-NEXT: s_mov_b32 exec_lo, s0 7713; GFX11-NEXT: v_writelane_b32 v40, s33, 8 7714; GFX11-NEXT: s_mov_b64 s[0:1], 0 7715; GFX11-NEXT: s_mov_b32 s33, s32 7716; GFX11-NEXT: s_add_i32 s32, s32, 16 7717; GFX11-NEXT: v_writelane_b32 v40, s4, 0 7718; GFX11-NEXT: v_writelane_b32 v40, s5, 1 7719; GFX11-NEXT: v_writelane_b32 v40, s6, 2 7720; GFX11-NEXT: v_writelane_b32 v40, s7, 3 7721; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0 7722; GFX11-NEXT: s_getpc_b64 s[0:1] 7723; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i64_inreg@rel32@lo+4 7724; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i64_inreg@rel32@hi+12 7725; GFX11-NEXT: v_writelane_b32 v40, s8, 4 7726; GFX11-NEXT: s_mov_b32 s8, 1 7727; GFX11-NEXT: v_writelane_b32 v40, s9, 5 7728; GFX11-NEXT: s_mov_b32 s9, 2 7729; GFX11-NEXT: v_writelane_b32 v40, s30, 6 7730; GFX11-NEXT: v_writelane_b32 v40, s31, 7 7731; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 7732; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 7733; GFX11-NEXT: v_readlane_b32 s31, v40, 7 7734; GFX11-NEXT: v_readlane_b32 s30, v40, 6 7735; GFX11-NEXT: v_readlane_b32 s9, v40, 5 7736; GFX11-NEXT: v_readlane_b32 s8, v40, 4 7737; GFX11-NEXT: v_readlane_b32 s7, v40, 3 7738; GFX11-NEXT: v_readlane_b32 s6, v40, 2 7739; GFX11-NEXT: v_readlane_b32 s5, v40, 1 7740; GFX11-NEXT: v_readlane_b32 s4, v40, 0 7741; GFX11-NEXT: s_add_i32 s32, s32, -16 7742; GFX11-NEXT: v_readlane_b32 s33, v40, 8 7743; GFX11-NEXT: s_or_saveexec_b32 s0, -1 7744; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 7745; GFX11-NEXT: s_mov_b32 exec_lo, s0 7746; GFX11-NEXT: s_waitcnt vmcnt(0) 7747; GFX11-NEXT: s_setpc_b64 s[30:31] 7748; 7749; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i64_inreg: 7750; GFX10-SCRATCH: ; %bb.0: 7751; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7752; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 7753; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 7754; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 7755; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 7756; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 7757; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 8 7758; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0 7759; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 7760; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 7761; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 7762; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 7763; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 7764; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 7765; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 7766; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 7767; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i64_inreg@rel32@lo+4 7768; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i64_inreg@rel32@hi+12 7769; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 7770; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 1 7771; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 7772; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 2 7773; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 6 7774; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 7 7775; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 7776; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 7 7777; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 6 7778; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 7779; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 7780; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 7781; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 7782; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 7783; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 7784; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 7785; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 8 7786; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 7787; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 7788; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 7789; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 7790; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 7791; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 7792 %load = load <2 x i64>, <2 x i64> addrspace(4)* null 7793 %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2> 7794 7795 call amdgpu_gfx void @external_void_func_v3i64_inreg(<3 x i64> inreg %val) 7796 ret void 7797} 7798 7799define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { 7800; GFX9-LABEL: test_call_external_void_func_v4i64_inreg: 7801; GFX9: ; %bb.0: 7802; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7803; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 7804; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 7805; GFX9-NEXT: s_mov_b64 exec, s[34:35] 7806; GFX9-NEXT: v_writelane_b32 v40, s33, 10 7807; GFX9-NEXT: v_writelane_b32 v40, s4, 0 7808; GFX9-NEXT: v_writelane_b32 v40, s5, 1 7809; GFX9-NEXT: v_writelane_b32 v40, s6, 2 7810; GFX9-NEXT: v_writelane_b32 v40, s7, 3 7811; GFX9-NEXT: s_mov_b64 s[34:35], 0 7812; GFX9-NEXT: v_writelane_b32 v40, s8, 4 7813; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 7814; GFX9-NEXT: v_writelane_b32 v40, s9, 5 7815; GFX9-NEXT: v_writelane_b32 v40, s10, 6 7816; GFX9-NEXT: v_writelane_b32 v40, s11, 7 7817; GFX9-NEXT: s_mov_b32 s33, s32 7818; GFX9-NEXT: s_addk_i32 s32, 0x400 7819; GFX9-NEXT: v_writelane_b32 v40, s30, 8 7820; GFX9-NEXT: s_mov_b32 s8, 1 7821; GFX9-NEXT: s_mov_b32 s9, 2 7822; GFX9-NEXT: s_mov_b32 s10, 3 7823; GFX9-NEXT: s_mov_b32 s11, 4 7824; GFX9-NEXT: v_writelane_b32 v40, s31, 9 7825; GFX9-NEXT: s_getpc_b64 s[34:35] 7826; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i64_inreg@rel32@lo+4 7827; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64_inreg@rel32@hi+12 7828; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 7829; GFX9-NEXT: v_readlane_b32 s31, v40, 9 7830; GFX9-NEXT: v_readlane_b32 s30, v40, 8 7831; GFX9-NEXT: v_readlane_b32 s11, v40, 7 7832; GFX9-NEXT: v_readlane_b32 s10, v40, 6 7833; GFX9-NEXT: v_readlane_b32 s9, v40, 5 7834; GFX9-NEXT: v_readlane_b32 s8, v40, 4 7835; GFX9-NEXT: v_readlane_b32 s7, v40, 3 7836; GFX9-NEXT: v_readlane_b32 s6, v40, 2 7837; GFX9-NEXT: v_readlane_b32 s5, v40, 1 7838; GFX9-NEXT: v_readlane_b32 s4, v40, 0 7839; GFX9-NEXT: s_addk_i32 s32, 0xfc00 7840; GFX9-NEXT: v_readlane_b32 s33, v40, 10 7841; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 7842; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 7843; GFX9-NEXT: s_mov_b64 exec, s[34:35] 7844; GFX9-NEXT: s_waitcnt vmcnt(0) 7845; GFX9-NEXT: s_setpc_b64 s[30:31] 7846; 7847; GFX10-LABEL: test_call_external_void_func_v4i64_inreg: 7848; GFX10: ; %bb.0: 7849; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7850; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 7851; GFX10-NEXT: s_or_saveexec_b32 s34, -1 7852; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 7853; GFX10-NEXT: s_waitcnt_depctr 0xffe3 7854; GFX10-NEXT: s_mov_b32 exec_lo, s34 7855; GFX10-NEXT: v_writelane_b32 v40, s33, 10 7856; GFX10-NEXT: s_mov_b64 s[34:35], 0 7857; GFX10-NEXT: s_mov_b32 s33, s32 7858; GFX10-NEXT: s_addk_i32 s32, 0x200 7859; GFX10-NEXT: v_writelane_b32 v40, s4, 0 7860; GFX10-NEXT: v_writelane_b32 v40, s5, 1 7861; GFX10-NEXT: v_writelane_b32 v40, s6, 2 7862; GFX10-NEXT: v_writelane_b32 v40, s7, 3 7863; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 7864; GFX10-NEXT: s_getpc_b64 s[34:35] 7865; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i64_inreg@rel32@lo+4 7866; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64_inreg@rel32@hi+12 7867; GFX10-NEXT: v_writelane_b32 v40, s8, 4 7868; GFX10-NEXT: s_mov_b32 s8, 1 7869; GFX10-NEXT: v_writelane_b32 v40, s9, 5 7870; GFX10-NEXT: s_mov_b32 s9, 2 7871; GFX10-NEXT: v_writelane_b32 v40, s10, 6 7872; GFX10-NEXT: s_mov_b32 s10, 3 7873; GFX10-NEXT: v_writelane_b32 v40, s11, 7 7874; GFX10-NEXT: s_mov_b32 s11, 4 7875; GFX10-NEXT: v_writelane_b32 v40, s30, 8 7876; GFX10-NEXT: v_writelane_b32 v40, s31, 9 7877; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 7878; GFX10-NEXT: v_readlane_b32 s31, v40, 9 7879; GFX10-NEXT: v_readlane_b32 s30, v40, 8 7880; GFX10-NEXT: v_readlane_b32 s11, v40, 7 7881; GFX10-NEXT: v_readlane_b32 s10, v40, 6 7882; GFX10-NEXT: v_readlane_b32 s9, v40, 5 7883; GFX10-NEXT: v_readlane_b32 s8, v40, 4 7884; GFX10-NEXT: v_readlane_b32 s7, v40, 3 7885; GFX10-NEXT: v_readlane_b32 s6, v40, 2 7886; GFX10-NEXT: v_readlane_b32 s5, v40, 1 7887; GFX10-NEXT: v_readlane_b32 s4, v40, 0 7888; GFX10-NEXT: s_addk_i32 s32, 0xfe00 7889; GFX10-NEXT: v_readlane_b32 s33, v40, 10 7890; GFX10-NEXT: s_or_saveexec_b32 s34, -1 7891; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 7892; GFX10-NEXT: s_waitcnt_depctr 0xffe3 7893; GFX10-NEXT: s_mov_b32 exec_lo, s34 7894; GFX10-NEXT: s_waitcnt vmcnt(0) 7895; GFX10-NEXT: s_setpc_b64 s[30:31] 7896; 7897; GFX11-LABEL: test_call_external_void_func_v4i64_inreg: 7898; GFX11: ; %bb.0: 7899; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7900; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 7901; GFX11-NEXT: s_or_saveexec_b32 s0, -1 7902; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 7903; GFX11-NEXT: s_mov_b32 exec_lo, s0 7904; GFX11-NEXT: v_writelane_b32 v40, s33, 10 7905; GFX11-NEXT: s_mov_b64 s[0:1], 0 7906; GFX11-NEXT: s_mov_b32 s33, s32 7907; GFX11-NEXT: s_add_i32 s32, s32, 16 7908; GFX11-NEXT: v_writelane_b32 v40, s4, 0 7909; GFX11-NEXT: v_writelane_b32 v40, s5, 1 7910; GFX11-NEXT: v_writelane_b32 v40, s6, 2 7911; GFX11-NEXT: v_writelane_b32 v40, s7, 3 7912; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0 7913; GFX11-NEXT: s_getpc_b64 s[0:1] 7914; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i64_inreg@rel32@lo+4 7915; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i64_inreg@rel32@hi+12 7916; GFX11-NEXT: v_writelane_b32 v40, s8, 4 7917; GFX11-NEXT: s_mov_b32 s8, 1 7918; GFX11-NEXT: v_writelane_b32 v40, s9, 5 7919; GFX11-NEXT: s_mov_b32 s9, 2 7920; GFX11-NEXT: v_writelane_b32 v40, s10, 6 7921; GFX11-NEXT: s_mov_b32 s10, 3 7922; GFX11-NEXT: v_writelane_b32 v40, s11, 7 7923; GFX11-NEXT: s_mov_b32 s11, 4 7924; GFX11-NEXT: v_writelane_b32 v40, s30, 8 7925; GFX11-NEXT: v_writelane_b32 v40, s31, 9 7926; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 7927; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 7928; GFX11-NEXT: v_readlane_b32 s31, v40, 9 7929; GFX11-NEXT: v_readlane_b32 s30, v40, 8 7930; GFX11-NEXT: v_readlane_b32 s11, v40, 7 7931; GFX11-NEXT: v_readlane_b32 s10, v40, 6 7932; GFX11-NEXT: v_readlane_b32 s9, v40, 5 7933; GFX11-NEXT: v_readlane_b32 s8, v40, 4 7934; GFX11-NEXT: v_readlane_b32 s7, v40, 3 7935; GFX11-NEXT: v_readlane_b32 s6, v40, 2 7936; GFX11-NEXT: v_readlane_b32 s5, v40, 1 7937; GFX11-NEXT: v_readlane_b32 s4, v40, 0 7938; GFX11-NEXT: s_add_i32 s32, s32, -16 7939; GFX11-NEXT: v_readlane_b32 s33, v40, 10 7940; GFX11-NEXT: s_or_saveexec_b32 s0, -1 7941; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 7942; GFX11-NEXT: s_mov_b32 exec_lo, s0 7943; GFX11-NEXT: s_waitcnt vmcnt(0) 7944; GFX11-NEXT: s_setpc_b64 s[30:31] 7945; 7946; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i64_inreg: 7947; GFX10-SCRATCH: ; %bb.0: 7948; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7949; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 7950; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 7951; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 7952; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 7953; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 7954; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 10 7955; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0 7956; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 7957; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 7958; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 7959; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 7960; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 7961; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 7962; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 7963; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 7964; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i64_inreg@rel32@lo+4 7965; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i64_inreg@rel32@hi+12 7966; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 7967; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 1 7968; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 7969; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 2 7970; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6 7971; GFX10-SCRATCH-NEXT: s_mov_b32 s10, 3 7972; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7 7973; GFX10-SCRATCH-NEXT: s_mov_b32 s11, 4 7974; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8 7975; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9 7976; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 7977; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 7978; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8 7979; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7 7980; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6 7981; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 7982; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 7983; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 7984; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 7985; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 7986; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 7987; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 7988; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 10 7989; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 7990; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 7991; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 7992; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 7993; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 7994; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 7995 %load = load <2 x i64>, <2 x i64> addrspace(4)* null 7996 %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 17179869187>, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 7997 call amdgpu_gfx void @external_void_func_v4i64_inreg(<4 x i64> inreg %val) 7998 ret void 7999} 8000 8001define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { 8002; GFX9-LABEL: test_call_external_void_func_f16_imm_inreg: 8003; GFX9: ; %bb.0: 8004; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8005; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 8006; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 8007; GFX9-NEXT: s_mov_b64 exec, s[34:35] 8008; GFX9-NEXT: v_writelane_b32 v40, s33, 3 8009; GFX9-NEXT: v_writelane_b32 v40, s4, 0 8010; GFX9-NEXT: s_mov_b32 s33, s32 8011; GFX9-NEXT: s_addk_i32 s32, 0x400 8012; GFX9-NEXT: v_writelane_b32 v40, s30, 1 8013; GFX9-NEXT: s_movk_i32 s4, 0x4400 8014; GFX9-NEXT: v_writelane_b32 v40, s31, 2 8015; GFX9-NEXT: s_getpc_b64 s[34:35] 8016; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f16_inreg@rel32@lo+4 8017; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f16_inreg@rel32@hi+12 8018; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 8019; GFX9-NEXT: v_readlane_b32 s31, v40, 2 8020; GFX9-NEXT: v_readlane_b32 s30, v40, 1 8021; GFX9-NEXT: v_readlane_b32 s4, v40, 0 8022; GFX9-NEXT: s_addk_i32 s32, 0xfc00 8023; GFX9-NEXT: v_readlane_b32 s33, v40, 3 8024; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 8025; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 8026; GFX9-NEXT: s_mov_b64 exec, s[34:35] 8027; GFX9-NEXT: s_waitcnt vmcnt(0) 8028; GFX9-NEXT: s_setpc_b64 s[30:31] 8029; 8030; GFX10-LABEL: test_call_external_void_func_f16_imm_inreg: 8031; GFX10: ; %bb.0: 8032; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8033; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 8034; GFX10-NEXT: s_or_saveexec_b32 s34, -1 8035; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 8036; GFX10-NEXT: s_waitcnt_depctr 0xffe3 8037; GFX10-NEXT: s_mov_b32 exec_lo, s34 8038; GFX10-NEXT: v_writelane_b32 v40, s33, 3 8039; GFX10-NEXT: s_mov_b32 s33, s32 8040; GFX10-NEXT: s_addk_i32 s32, 0x200 8041; GFX10-NEXT: s_getpc_b64 s[34:35] 8042; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f16_inreg@rel32@lo+4 8043; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f16_inreg@rel32@hi+12 8044; GFX10-NEXT: v_writelane_b32 v40, s4, 0 8045; GFX10-NEXT: s_movk_i32 s4, 0x4400 8046; GFX10-NEXT: v_writelane_b32 v40, s30, 1 8047; GFX10-NEXT: v_writelane_b32 v40, s31, 2 8048; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 8049; GFX10-NEXT: v_readlane_b32 s31, v40, 2 8050; GFX10-NEXT: v_readlane_b32 s30, v40, 1 8051; GFX10-NEXT: v_readlane_b32 s4, v40, 0 8052; GFX10-NEXT: s_addk_i32 s32, 0xfe00 8053; GFX10-NEXT: v_readlane_b32 s33, v40, 3 8054; GFX10-NEXT: s_or_saveexec_b32 s34, -1 8055; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 8056; GFX10-NEXT: s_waitcnt_depctr 0xffe3 8057; GFX10-NEXT: s_mov_b32 exec_lo, s34 8058; GFX10-NEXT: s_waitcnt vmcnt(0) 8059; GFX10-NEXT: s_setpc_b64 s[30:31] 8060; 8061; GFX11-LABEL: test_call_external_void_func_f16_imm_inreg: 8062; GFX11: ; %bb.0: 8063; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8064; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 8065; GFX11-NEXT: s_or_saveexec_b32 s0, -1 8066; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 8067; GFX11-NEXT: s_mov_b32 exec_lo, s0 8068; GFX11-NEXT: v_writelane_b32 v40, s33, 3 8069; GFX11-NEXT: s_mov_b32 s33, s32 8070; GFX11-NEXT: s_add_i32 s32, s32, 16 8071; GFX11-NEXT: s_getpc_b64 s[0:1] 8072; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_f16_inreg@rel32@lo+4 8073; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_f16_inreg@rel32@hi+12 8074; GFX11-NEXT: v_writelane_b32 v40, s4, 0 8075; GFX11-NEXT: s_movk_i32 s4, 0x4400 8076; GFX11-NEXT: v_writelane_b32 v40, s30, 1 8077; GFX11-NEXT: v_writelane_b32 v40, s31, 2 8078; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 8079; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 8080; GFX11-NEXT: v_readlane_b32 s31, v40, 2 8081; GFX11-NEXT: v_readlane_b32 s30, v40, 1 8082; GFX11-NEXT: v_readlane_b32 s4, v40, 0 8083; GFX11-NEXT: s_add_i32 s32, s32, -16 8084; GFX11-NEXT: v_readlane_b32 s33, v40, 3 8085; GFX11-NEXT: s_or_saveexec_b32 s0, -1 8086; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 8087; GFX11-NEXT: s_mov_b32 exec_lo, s0 8088; GFX11-NEXT: s_waitcnt vmcnt(0) 8089; GFX11-NEXT: s_setpc_b64 s[30:31] 8090; 8091; GFX10-SCRATCH-LABEL: test_call_external_void_func_f16_imm_inreg: 8092; GFX10-SCRATCH: ; %bb.0: 8093; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8094; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 8095; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 8096; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 8097; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 8098; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 8099; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 3 8100; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 8101; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 8102; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 8103; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f16_inreg@rel32@lo+4 8104; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f16_inreg@rel32@hi+12 8105; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 8106; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x4400 8107; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 8108; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 8109; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 8110; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 8111; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 8112; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 8113; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 8114; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 8115; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 8116; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 8117; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 8118; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 8119; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 8120; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 8121 call amdgpu_gfx void @external_void_func_f16_inreg(half inreg 4.0) 8122 ret void 8123} 8124 8125define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { 8126; GFX9-LABEL: test_call_external_void_func_f32_imm_inreg: 8127; GFX9: ; %bb.0: 8128; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8129; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 8130; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 8131; GFX9-NEXT: s_mov_b64 exec, s[34:35] 8132; GFX9-NEXT: v_writelane_b32 v40, s33, 3 8133; GFX9-NEXT: v_writelane_b32 v40, s4, 0 8134; GFX9-NEXT: s_mov_b32 s33, s32 8135; GFX9-NEXT: s_addk_i32 s32, 0x400 8136; GFX9-NEXT: v_writelane_b32 v40, s30, 1 8137; GFX9-NEXT: s_mov_b32 s4, 4.0 8138; GFX9-NEXT: v_writelane_b32 v40, s31, 2 8139; GFX9-NEXT: s_getpc_b64 s[34:35] 8140; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f32_inreg@rel32@lo+4 8141; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f32_inreg@rel32@hi+12 8142; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 8143; GFX9-NEXT: v_readlane_b32 s31, v40, 2 8144; GFX9-NEXT: v_readlane_b32 s30, v40, 1 8145; GFX9-NEXT: v_readlane_b32 s4, v40, 0 8146; GFX9-NEXT: s_addk_i32 s32, 0xfc00 8147; GFX9-NEXT: v_readlane_b32 s33, v40, 3 8148; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 8149; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 8150; GFX9-NEXT: s_mov_b64 exec, s[34:35] 8151; GFX9-NEXT: s_waitcnt vmcnt(0) 8152; GFX9-NEXT: s_setpc_b64 s[30:31] 8153; 8154; GFX10-LABEL: test_call_external_void_func_f32_imm_inreg: 8155; GFX10: ; %bb.0: 8156; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8157; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 8158; GFX10-NEXT: s_or_saveexec_b32 s34, -1 8159; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 8160; GFX10-NEXT: s_waitcnt_depctr 0xffe3 8161; GFX10-NEXT: s_mov_b32 exec_lo, s34 8162; GFX10-NEXT: v_writelane_b32 v40, s33, 3 8163; GFX10-NEXT: s_mov_b32 s33, s32 8164; GFX10-NEXT: s_addk_i32 s32, 0x200 8165; GFX10-NEXT: s_getpc_b64 s[34:35] 8166; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f32_inreg@rel32@lo+4 8167; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f32_inreg@rel32@hi+12 8168; GFX10-NEXT: v_writelane_b32 v40, s4, 0 8169; GFX10-NEXT: s_mov_b32 s4, 4.0 8170; GFX10-NEXT: v_writelane_b32 v40, s30, 1 8171; GFX10-NEXT: v_writelane_b32 v40, s31, 2 8172; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 8173; GFX10-NEXT: v_readlane_b32 s31, v40, 2 8174; GFX10-NEXT: v_readlane_b32 s30, v40, 1 8175; GFX10-NEXT: v_readlane_b32 s4, v40, 0 8176; GFX10-NEXT: s_addk_i32 s32, 0xfe00 8177; GFX10-NEXT: v_readlane_b32 s33, v40, 3 8178; GFX10-NEXT: s_or_saveexec_b32 s34, -1 8179; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 8180; GFX10-NEXT: s_waitcnt_depctr 0xffe3 8181; GFX10-NEXT: s_mov_b32 exec_lo, s34 8182; GFX10-NEXT: s_waitcnt vmcnt(0) 8183; GFX10-NEXT: s_setpc_b64 s[30:31] 8184; 8185; GFX11-LABEL: test_call_external_void_func_f32_imm_inreg: 8186; GFX11: ; %bb.0: 8187; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8188; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 8189; GFX11-NEXT: s_or_saveexec_b32 s0, -1 8190; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 8191; GFX11-NEXT: s_mov_b32 exec_lo, s0 8192; GFX11-NEXT: v_writelane_b32 v40, s33, 3 8193; GFX11-NEXT: s_mov_b32 s33, s32 8194; GFX11-NEXT: s_add_i32 s32, s32, 16 8195; GFX11-NEXT: s_getpc_b64 s[0:1] 8196; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_f32_inreg@rel32@lo+4 8197; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_f32_inreg@rel32@hi+12 8198; GFX11-NEXT: v_writelane_b32 v40, s4, 0 8199; GFX11-NEXT: s_mov_b32 s4, 4.0 8200; GFX11-NEXT: v_writelane_b32 v40, s30, 1 8201; GFX11-NEXT: v_writelane_b32 v40, s31, 2 8202; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 8203; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 8204; GFX11-NEXT: v_readlane_b32 s31, v40, 2 8205; GFX11-NEXT: v_readlane_b32 s30, v40, 1 8206; GFX11-NEXT: v_readlane_b32 s4, v40, 0 8207; GFX11-NEXT: s_add_i32 s32, s32, -16 8208; GFX11-NEXT: v_readlane_b32 s33, v40, 3 8209; GFX11-NEXT: s_or_saveexec_b32 s0, -1 8210; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 8211; GFX11-NEXT: s_mov_b32 exec_lo, s0 8212; GFX11-NEXT: s_waitcnt vmcnt(0) 8213; GFX11-NEXT: s_setpc_b64 s[30:31] 8214; 8215; GFX10-SCRATCH-LABEL: test_call_external_void_func_f32_imm_inreg: 8216; GFX10-SCRATCH: ; %bb.0: 8217; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8218; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 8219; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 8220; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 8221; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 8222; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 8223; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 3 8224; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 8225; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 8226; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 8227; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f32_inreg@rel32@lo+4 8228; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f32_inreg@rel32@hi+12 8229; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 8230; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 4.0 8231; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 8232; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 8233; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 8234; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 8235; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 8236; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 8237; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 8238; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 8239; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 8240; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 8241; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 8242; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 8243; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 8244; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 8245 call amdgpu_gfx void @external_void_func_f32_inreg(float inreg 4.0) 8246 ret void 8247} 8248 8249define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { 8250; GFX9-LABEL: test_call_external_void_func_v2f32_imm_inreg: 8251; GFX9: ; %bb.0: 8252; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8253; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 8254; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 8255; GFX9-NEXT: s_mov_b64 exec, s[34:35] 8256; GFX9-NEXT: v_writelane_b32 v40, s33, 4 8257; GFX9-NEXT: v_writelane_b32 v40, s4, 0 8258; GFX9-NEXT: v_writelane_b32 v40, s5, 1 8259; GFX9-NEXT: s_mov_b32 s33, s32 8260; GFX9-NEXT: s_addk_i32 s32, 0x400 8261; GFX9-NEXT: v_writelane_b32 v40, s30, 2 8262; GFX9-NEXT: s_mov_b32 s4, 1.0 8263; GFX9-NEXT: s_mov_b32 s5, 2.0 8264; GFX9-NEXT: v_writelane_b32 v40, s31, 3 8265; GFX9-NEXT: s_getpc_b64 s[34:35] 8266; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f32_inreg@rel32@lo+4 8267; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f32_inreg@rel32@hi+12 8268; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 8269; GFX9-NEXT: v_readlane_b32 s31, v40, 3 8270; GFX9-NEXT: v_readlane_b32 s30, v40, 2 8271; GFX9-NEXT: v_readlane_b32 s5, v40, 1 8272; GFX9-NEXT: v_readlane_b32 s4, v40, 0 8273; GFX9-NEXT: s_addk_i32 s32, 0xfc00 8274; GFX9-NEXT: v_readlane_b32 s33, v40, 4 8275; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 8276; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 8277; GFX9-NEXT: s_mov_b64 exec, s[34:35] 8278; GFX9-NEXT: s_waitcnt vmcnt(0) 8279; GFX9-NEXT: s_setpc_b64 s[30:31] 8280; 8281; GFX10-LABEL: test_call_external_void_func_v2f32_imm_inreg: 8282; GFX10: ; %bb.0: 8283; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8284; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 8285; GFX10-NEXT: s_or_saveexec_b32 s34, -1 8286; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 8287; GFX10-NEXT: s_waitcnt_depctr 0xffe3 8288; GFX10-NEXT: s_mov_b32 exec_lo, s34 8289; GFX10-NEXT: v_writelane_b32 v40, s33, 4 8290; GFX10-NEXT: s_mov_b32 s33, s32 8291; GFX10-NEXT: s_addk_i32 s32, 0x200 8292; GFX10-NEXT: s_getpc_b64 s[34:35] 8293; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f32_inreg@rel32@lo+4 8294; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f32_inreg@rel32@hi+12 8295; GFX10-NEXT: v_writelane_b32 v40, s4, 0 8296; GFX10-NEXT: s_mov_b32 s4, 1.0 8297; GFX10-NEXT: v_writelane_b32 v40, s5, 1 8298; GFX10-NEXT: s_mov_b32 s5, 2.0 8299; GFX10-NEXT: v_writelane_b32 v40, s30, 2 8300; GFX10-NEXT: v_writelane_b32 v40, s31, 3 8301; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 8302; GFX10-NEXT: v_readlane_b32 s31, v40, 3 8303; GFX10-NEXT: v_readlane_b32 s30, v40, 2 8304; GFX10-NEXT: v_readlane_b32 s5, v40, 1 8305; GFX10-NEXT: v_readlane_b32 s4, v40, 0 8306; GFX10-NEXT: s_addk_i32 s32, 0xfe00 8307; GFX10-NEXT: v_readlane_b32 s33, v40, 4 8308; GFX10-NEXT: s_or_saveexec_b32 s34, -1 8309; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 8310; GFX10-NEXT: s_waitcnt_depctr 0xffe3 8311; GFX10-NEXT: s_mov_b32 exec_lo, s34 8312; GFX10-NEXT: s_waitcnt vmcnt(0) 8313; GFX10-NEXT: s_setpc_b64 s[30:31] 8314; 8315; GFX11-LABEL: test_call_external_void_func_v2f32_imm_inreg: 8316; GFX11: ; %bb.0: 8317; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8318; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 8319; GFX11-NEXT: s_or_saveexec_b32 s0, -1 8320; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 8321; GFX11-NEXT: s_mov_b32 exec_lo, s0 8322; GFX11-NEXT: v_writelane_b32 v40, s33, 4 8323; GFX11-NEXT: s_mov_b32 s33, s32 8324; GFX11-NEXT: s_add_i32 s32, s32, 16 8325; GFX11-NEXT: s_getpc_b64 s[0:1] 8326; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2f32_inreg@rel32@lo+4 8327; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2f32_inreg@rel32@hi+12 8328; GFX11-NEXT: v_writelane_b32 v40, s4, 0 8329; GFX11-NEXT: s_mov_b32 s4, 1.0 8330; GFX11-NEXT: v_writelane_b32 v40, s5, 1 8331; GFX11-NEXT: s_mov_b32 s5, 2.0 8332; GFX11-NEXT: v_writelane_b32 v40, s30, 2 8333; GFX11-NEXT: v_writelane_b32 v40, s31, 3 8334; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 8335; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 8336; GFX11-NEXT: v_readlane_b32 s31, v40, 3 8337; GFX11-NEXT: v_readlane_b32 s30, v40, 2 8338; GFX11-NEXT: v_readlane_b32 s5, v40, 1 8339; GFX11-NEXT: v_readlane_b32 s4, v40, 0 8340; GFX11-NEXT: s_add_i32 s32, s32, -16 8341; GFX11-NEXT: v_readlane_b32 s33, v40, 4 8342; GFX11-NEXT: s_or_saveexec_b32 s0, -1 8343; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 8344; GFX11-NEXT: s_mov_b32 exec_lo, s0 8345; GFX11-NEXT: s_waitcnt vmcnt(0) 8346; GFX11-NEXT: s_setpc_b64 s[30:31] 8347; 8348; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f32_imm_inreg: 8349; GFX10-SCRATCH: ; %bb.0: 8350; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8351; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 8352; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 8353; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 8354; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 8355; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 8356; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 8357; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 8358; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 8359; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 8360; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f32_inreg@rel32@lo+4 8361; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f32_inreg@rel32@hi+12 8362; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 8363; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0 8364; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 8365; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 8366; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 8367; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 8368; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 8369; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 8370; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 8371; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 8372; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 8373; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 8374; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 8375; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 8376; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 8377; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 8378; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 8379; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 8380; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 8381 call amdgpu_gfx void @external_void_func_v2f32_inreg(<2 x float> inreg <float 1.0, float 2.0>) 8382 ret void 8383} 8384 8385define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { 8386; GFX9-LABEL: test_call_external_void_func_v3f32_imm_inreg: 8387; GFX9: ; %bb.0: 8388; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8389; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 8390; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 8391; GFX9-NEXT: s_mov_b64 exec, s[34:35] 8392; GFX9-NEXT: v_writelane_b32 v40, s33, 5 8393; GFX9-NEXT: v_writelane_b32 v40, s4, 0 8394; GFX9-NEXT: v_writelane_b32 v40, s5, 1 8395; GFX9-NEXT: v_writelane_b32 v40, s6, 2 8396; GFX9-NEXT: s_mov_b32 s33, s32 8397; GFX9-NEXT: s_addk_i32 s32, 0x400 8398; GFX9-NEXT: v_writelane_b32 v40, s30, 3 8399; GFX9-NEXT: s_mov_b32 s4, 1.0 8400; GFX9-NEXT: s_mov_b32 s5, 2.0 8401; GFX9-NEXT: s_mov_b32 s6, 4.0 8402; GFX9-NEXT: v_writelane_b32 v40, s31, 4 8403; GFX9-NEXT: s_getpc_b64 s[34:35] 8404; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f32_inreg@rel32@lo+4 8405; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f32_inreg@rel32@hi+12 8406; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 8407; GFX9-NEXT: v_readlane_b32 s31, v40, 4 8408; GFX9-NEXT: v_readlane_b32 s30, v40, 3 8409; GFX9-NEXT: v_readlane_b32 s6, v40, 2 8410; GFX9-NEXT: v_readlane_b32 s5, v40, 1 8411; GFX9-NEXT: v_readlane_b32 s4, v40, 0 8412; GFX9-NEXT: s_addk_i32 s32, 0xfc00 8413; GFX9-NEXT: v_readlane_b32 s33, v40, 5 8414; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 8415; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 8416; GFX9-NEXT: s_mov_b64 exec, s[34:35] 8417; GFX9-NEXT: s_waitcnt vmcnt(0) 8418; GFX9-NEXT: s_setpc_b64 s[30:31] 8419; 8420; GFX10-LABEL: test_call_external_void_func_v3f32_imm_inreg: 8421; GFX10: ; %bb.0: 8422; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8423; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 8424; GFX10-NEXT: s_or_saveexec_b32 s34, -1 8425; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 8426; GFX10-NEXT: s_waitcnt_depctr 0xffe3 8427; GFX10-NEXT: s_mov_b32 exec_lo, s34 8428; GFX10-NEXT: v_writelane_b32 v40, s33, 5 8429; GFX10-NEXT: s_mov_b32 s33, s32 8430; GFX10-NEXT: s_addk_i32 s32, 0x200 8431; GFX10-NEXT: s_getpc_b64 s[34:35] 8432; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f32_inreg@rel32@lo+4 8433; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f32_inreg@rel32@hi+12 8434; GFX10-NEXT: v_writelane_b32 v40, s4, 0 8435; GFX10-NEXT: s_mov_b32 s4, 1.0 8436; GFX10-NEXT: v_writelane_b32 v40, s5, 1 8437; GFX10-NEXT: s_mov_b32 s5, 2.0 8438; GFX10-NEXT: v_writelane_b32 v40, s6, 2 8439; GFX10-NEXT: s_mov_b32 s6, 4.0 8440; GFX10-NEXT: v_writelane_b32 v40, s30, 3 8441; GFX10-NEXT: v_writelane_b32 v40, s31, 4 8442; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 8443; GFX10-NEXT: v_readlane_b32 s31, v40, 4 8444; GFX10-NEXT: v_readlane_b32 s30, v40, 3 8445; GFX10-NEXT: v_readlane_b32 s6, v40, 2 8446; GFX10-NEXT: v_readlane_b32 s5, v40, 1 8447; GFX10-NEXT: v_readlane_b32 s4, v40, 0 8448; GFX10-NEXT: s_addk_i32 s32, 0xfe00 8449; GFX10-NEXT: v_readlane_b32 s33, v40, 5 8450; GFX10-NEXT: s_or_saveexec_b32 s34, -1 8451; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 8452; GFX10-NEXT: s_waitcnt_depctr 0xffe3 8453; GFX10-NEXT: s_mov_b32 exec_lo, s34 8454; GFX10-NEXT: s_waitcnt vmcnt(0) 8455; GFX10-NEXT: s_setpc_b64 s[30:31] 8456; 8457; GFX11-LABEL: test_call_external_void_func_v3f32_imm_inreg: 8458; GFX11: ; %bb.0: 8459; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8460; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 8461; GFX11-NEXT: s_or_saveexec_b32 s0, -1 8462; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 8463; GFX11-NEXT: s_mov_b32 exec_lo, s0 8464; GFX11-NEXT: v_writelane_b32 v40, s33, 5 8465; GFX11-NEXT: s_mov_b32 s33, s32 8466; GFX11-NEXT: s_add_i32 s32, s32, 16 8467; GFX11-NEXT: s_getpc_b64 s[0:1] 8468; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3f32_inreg@rel32@lo+4 8469; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3f32_inreg@rel32@hi+12 8470; GFX11-NEXT: v_writelane_b32 v40, s4, 0 8471; GFX11-NEXT: s_mov_b32 s4, 1.0 8472; GFX11-NEXT: v_writelane_b32 v40, s5, 1 8473; GFX11-NEXT: s_mov_b32 s5, 2.0 8474; GFX11-NEXT: v_writelane_b32 v40, s6, 2 8475; GFX11-NEXT: s_mov_b32 s6, 4.0 8476; GFX11-NEXT: v_writelane_b32 v40, s30, 3 8477; GFX11-NEXT: v_writelane_b32 v40, s31, 4 8478; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 8479; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 8480; GFX11-NEXT: v_readlane_b32 s31, v40, 4 8481; GFX11-NEXT: v_readlane_b32 s30, v40, 3 8482; GFX11-NEXT: v_readlane_b32 s6, v40, 2 8483; GFX11-NEXT: v_readlane_b32 s5, v40, 1 8484; GFX11-NEXT: v_readlane_b32 s4, v40, 0 8485; GFX11-NEXT: s_add_i32 s32, s32, -16 8486; GFX11-NEXT: v_readlane_b32 s33, v40, 5 8487; GFX11-NEXT: s_or_saveexec_b32 s0, -1 8488; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 8489; GFX11-NEXT: s_mov_b32 exec_lo, s0 8490; GFX11-NEXT: s_waitcnt vmcnt(0) 8491; GFX11-NEXT: s_setpc_b64 s[30:31] 8492; 8493; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f32_imm_inreg: 8494; GFX10-SCRATCH: ; %bb.0: 8495; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8496; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 8497; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 8498; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 8499; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 8500; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 8501; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 5 8502; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 8503; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 8504; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 8505; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f32_inreg@rel32@lo+4 8506; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f32_inreg@rel32@hi+12 8507; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 8508; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0 8509; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 8510; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 8511; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 8512; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 4.0 8513; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 3 8514; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 4 8515; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 8516; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 4 8517; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 3 8518; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 8519; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 8520; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 8521; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 8522; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 5 8523; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 8524; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 8525; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 8526; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 8527; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 8528; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 8529 call amdgpu_gfx void @external_void_func_v3f32_inreg(<3 x float> inreg <float 1.0, float 2.0, float 4.0>) 8530 ret void 8531} 8532 8533define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { 8534; GFX9-LABEL: test_call_external_void_func_v5f32_imm_inreg: 8535; GFX9: ; %bb.0: 8536; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8537; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 8538; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 8539; GFX9-NEXT: s_mov_b64 exec, s[34:35] 8540; GFX9-NEXT: v_writelane_b32 v40, s33, 7 8541; GFX9-NEXT: v_writelane_b32 v40, s4, 0 8542; GFX9-NEXT: v_writelane_b32 v40, s5, 1 8543; GFX9-NEXT: v_writelane_b32 v40, s6, 2 8544; GFX9-NEXT: v_writelane_b32 v40, s7, 3 8545; GFX9-NEXT: v_writelane_b32 v40, s8, 4 8546; GFX9-NEXT: s_mov_b32 s33, s32 8547; GFX9-NEXT: s_addk_i32 s32, 0x400 8548; GFX9-NEXT: v_writelane_b32 v40, s30, 5 8549; GFX9-NEXT: s_mov_b32 s4, 1.0 8550; GFX9-NEXT: s_mov_b32 s5, 2.0 8551; GFX9-NEXT: s_mov_b32 s6, 4.0 8552; GFX9-NEXT: s_mov_b32 s7, -1.0 8553; GFX9-NEXT: s_mov_b32 s8, 0.5 8554; GFX9-NEXT: v_writelane_b32 v40, s31, 6 8555; GFX9-NEXT: s_getpc_b64 s[34:35] 8556; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5f32_inreg@rel32@lo+4 8557; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5f32_inreg@rel32@hi+12 8558; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 8559; GFX9-NEXT: v_readlane_b32 s31, v40, 6 8560; GFX9-NEXT: v_readlane_b32 s30, v40, 5 8561; GFX9-NEXT: v_readlane_b32 s8, v40, 4 8562; GFX9-NEXT: v_readlane_b32 s7, v40, 3 8563; GFX9-NEXT: v_readlane_b32 s6, v40, 2 8564; GFX9-NEXT: v_readlane_b32 s5, v40, 1 8565; GFX9-NEXT: v_readlane_b32 s4, v40, 0 8566; GFX9-NEXT: s_addk_i32 s32, 0xfc00 8567; GFX9-NEXT: v_readlane_b32 s33, v40, 7 8568; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 8569; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 8570; GFX9-NEXT: s_mov_b64 exec, s[34:35] 8571; GFX9-NEXT: s_waitcnt vmcnt(0) 8572; GFX9-NEXT: s_setpc_b64 s[30:31] 8573; 8574; GFX10-LABEL: test_call_external_void_func_v5f32_imm_inreg: 8575; GFX10: ; %bb.0: 8576; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8577; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 8578; GFX10-NEXT: s_or_saveexec_b32 s34, -1 8579; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 8580; GFX10-NEXT: s_waitcnt_depctr 0xffe3 8581; GFX10-NEXT: s_mov_b32 exec_lo, s34 8582; GFX10-NEXT: v_writelane_b32 v40, s33, 7 8583; GFX10-NEXT: s_mov_b32 s33, s32 8584; GFX10-NEXT: s_addk_i32 s32, 0x200 8585; GFX10-NEXT: s_getpc_b64 s[34:35] 8586; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5f32_inreg@rel32@lo+4 8587; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5f32_inreg@rel32@hi+12 8588; GFX10-NEXT: v_writelane_b32 v40, s4, 0 8589; GFX10-NEXT: s_mov_b32 s4, 1.0 8590; GFX10-NEXT: v_writelane_b32 v40, s5, 1 8591; GFX10-NEXT: s_mov_b32 s5, 2.0 8592; GFX10-NEXT: v_writelane_b32 v40, s6, 2 8593; GFX10-NEXT: s_mov_b32 s6, 4.0 8594; GFX10-NEXT: v_writelane_b32 v40, s7, 3 8595; GFX10-NEXT: s_mov_b32 s7, -1.0 8596; GFX10-NEXT: v_writelane_b32 v40, s8, 4 8597; GFX10-NEXT: s_mov_b32 s8, 0.5 8598; GFX10-NEXT: v_writelane_b32 v40, s30, 5 8599; GFX10-NEXT: v_writelane_b32 v40, s31, 6 8600; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 8601; GFX10-NEXT: v_readlane_b32 s31, v40, 6 8602; GFX10-NEXT: v_readlane_b32 s30, v40, 5 8603; GFX10-NEXT: v_readlane_b32 s8, v40, 4 8604; GFX10-NEXT: v_readlane_b32 s7, v40, 3 8605; GFX10-NEXT: v_readlane_b32 s6, v40, 2 8606; GFX10-NEXT: v_readlane_b32 s5, v40, 1 8607; GFX10-NEXT: v_readlane_b32 s4, v40, 0 8608; GFX10-NEXT: s_addk_i32 s32, 0xfe00 8609; GFX10-NEXT: v_readlane_b32 s33, v40, 7 8610; GFX10-NEXT: s_or_saveexec_b32 s34, -1 8611; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 8612; GFX10-NEXT: s_waitcnt_depctr 0xffe3 8613; GFX10-NEXT: s_mov_b32 exec_lo, s34 8614; GFX10-NEXT: s_waitcnt vmcnt(0) 8615; GFX10-NEXT: s_setpc_b64 s[30:31] 8616; 8617; GFX11-LABEL: test_call_external_void_func_v5f32_imm_inreg: 8618; GFX11: ; %bb.0: 8619; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8620; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 8621; GFX11-NEXT: s_or_saveexec_b32 s0, -1 8622; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 8623; GFX11-NEXT: s_mov_b32 exec_lo, s0 8624; GFX11-NEXT: v_writelane_b32 v40, s33, 7 8625; GFX11-NEXT: s_mov_b32 s33, s32 8626; GFX11-NEXT: s_add_i32 s32, s32, 16 8627; GFX11-NEXT: s_getpc_b64 s[0:1] 8628; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v5f32_inreg@rel32@lo+4 8629; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v5f32_inreg@rel32@hi+12 8630; GFX11-NEXT: v_writelane_b32 v40, s4, 0 8631; GFX11-NEXT: s_mov_b32 s4, 1.0 8632; GFX11-NEXT: v_writelane_b32 v40, s5, 1 8633; GFX11-NEXT: s_mov_b32 s5, 2.0 8634; GFX11-NEXT: v_writelane_b32 v40, s6, 2 8635; GFX11-NEXT: s_mov_b32 s6, 4.0 8636; GFX11-NEXT: v_writelane_b32 v40, s7, 3 8637; GFX11-NEXT: s_mov_b32 s7, -1.0 8638; GFX11-NEXT: v_writelane_b32 v40, s8, 4 8639; GFX11-NEXT: s_mov_b32 s8, 0.5 8640; GFX11-NEXT: v_writelane_b32 v40, s30, 5 8641; GFX11-NEXT: v_writelane_b32 v40, s31, 6 8642; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 8643; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 8644; GFX11-NEXT: v_readlane_b32 s31, v40, 6 8645; GFX11-NEXT: v_readlane_b32 s30, v40, 5 8646; GFX11-NEXT: v_readlane_b32 s8, v40, 4 8647; GFX11-NEXT: v_readlane_b32 s7, v40, 3 8648; GFX11-NEXT: v_readlane_b32 s6, v40, 2 8649; GFX11-NEXT: v_readlane_b32 s5, v40, 1 8650; GFX11-NEXT: v_readlane_b32 s4, v40, 0 8651; GFX11-NEXT: s_add_i32 s32, s32, -16 8652; GFX11-NEXT: v_readlane_b32 s33, v40, 7 8653; GFX11-NEXT: s_or_saveexec_b32 s0, -1 8654; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 8655; GFX11-NEXT: s_mov_b32 exec_lo, s0 8656; GFX11-NEXT: s_waitcnt vmcnt(0) 8657; GFX11-NEXT: s_setpc_b64 s[30:31] 8658; 8659; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5f32_imm_inreg: 8660; GFX10-SCRATCH: ; %bb.0: 8661; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8662; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 8663; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 8664; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 8665; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 8666; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 8667; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 7 8668; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 8669; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 8670; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 8671; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v5f32_inreg@rel32@lo+4 8672; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5f32_inreg@rel32@hi+12 8673; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 8674; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0 8675; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 8676; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 8677; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 8678; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 4.0 8679; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 8680; GFX10-SCRATCH-NEXT: s_mov_b32 s7, -1.0 8681; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 8682; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 0.5 8683; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 5 8684; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 6 8685; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 8686; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 6 8687; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 5 8688; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 8689; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 8690; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 8691; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 8692; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 8693; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 8694; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 7 8695; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 8696; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 8697; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 8698; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 8699; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 8700; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 8701 call amdgpu_gfx void @external_void_func_v5f32_inreg(<5 x float> inreg <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>) 8702 ret void 8703} 8704 8705define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { 8706; GFX9-LABEL: test_call_external_void_func_f64_imm_inreg: 8707; GFX9: ; %bb.0: 8708; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8709; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 8710; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 8711; GFX9-NEXT: s_mov_b64 exec, s[34:35] 8712; GFX9-NEXT: v_writelane_b32 v40, s33, 4 8713; GFX9-NEXT: v_writelane_b32 v40, s4, 0 8714; GFX9-NEXT: v_writelane_b32 v40, s5, 1 8715; GFX9-NEXT: s_mov_b32 s33, s32 8716; GFX9-NEXT: s_addk_i32 s32, 0x400 8717; GFX9-NEXT: v_writelane_b32 v40, s30, 2 8718; GFX9-NEXT: s_mov_b32 s4, 0 8719; GFX9-NEXT: s_mov_b32 s5, 0x40100000 8720; GFX9-NEXT: v_writelane_b32 v40, s31, 3 8721; GFX9-NEXT: s_getpc_b64 s[34:35] 8722; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f64_inreg@rel32@lo+4 8723; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f64_inreg@rel32@hi+12 8724; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 8725; GFX9-NEXT: v_readlane_b32 s31, v40, 3 8726; GFX9-NEXT: v_readlane_b32 s30, v40, 2 8727; GFX9-NEXT: v_readlane_b32 s5, v40, 1 8728; GFX9-NEXT: v_readlane_b32 s4, v40, 0 8729; GFX9-NEXT: s_addk_i32 s32, 0xfc00 8730; GFX9-NEXT: v_readlane_b32 s33, v40, 4 8731; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 8732; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 8733; GFX9-NEXT: s_mov_b64 exec, s[34:35] 8734; GFX9-NEXT: s_waitcnt vmcnt(0) 8735; GFX9-NEXT: s_setpc_b64 s[30:31] 8736; 8737; GFX10-LABEL: test_call_external_void_func_f64_imm_inreg: 8738; GFX10: ; %bb.0: 8739; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8740; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 8741; GFX10-NEXT: s_or_saveexec_b32 s34, -1 8742; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 8743; GFX10-NEXT: s_waitcnt_depctr 0xffe3 8744; GFX10-NEXT: s_mov_b32 exec_lo, s34 8745; GFX10-NEXT: v_writelane_b32 v40, s33, 4 8746; GFX10-NEXT: s_mov_b32 s33, s32 8747; GFX10-NEXT: s_addk_i32 s32, 0x200 8748; GFX10-NEXT: s_getpc_b64 s[34:35] 8749; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f64_inreg@rel32@lo+4 8750; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f64_inreg@rel32@hi+12 8751; GFX10-NEXT: v_writelane_b32 v40, s4, 0 8752; GFX10-NEXT: s_mov_b32 s4, 0 8753; GFX10-NEXT: v_writelane_b32 v40, s5, 1 8754; GFX10-NEXT: s_mov_b32 s5, 0x40100000 8755; GFX10-NEXT: v_writelane_b32 v40, s30, 2 8756; GFX10-NEXT: v_writelane_b32 v40, s31, 3 8757; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 8758; GFX10-NEXT: v_readlane_b32 s31, v40, 3 8759; GFX10-NEXT: v_readlane_b32 s30, v40, 2 8760; GFX10-NEXT: v_readlane_b32 s5, v40, 1 8761; GFX10-NEXT: v_readlane_b32 s4, v40, 0 8762; GFX10-NEXT: s_addk_i32 s32, 0xfe00 8763; GFX10-NEXT: v_readlane_b32 s33, v40, 4 8764; GFX10-NEXT: s_or_saveexec_b32 s34, -1 8765; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 8766; GFX10-NEXT: s_waitcnt_depctr 0xffe3 8767; GFX10-NEXT: s_mov_b32 exec_lo, s34 8768; GFX10-NEXT: s_waitcnt vmcnt(0) 8769; GFX10-NEXT: s_setpc_b64 s[30:31] 8770; 8771; GFX11-LABEL: test_call_external_void_func_f64_imm_inreg: 8772; GFX11: ; %bb.0: 8773; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8774; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 8775; GFX11-NEXT: s_or_saveexec_b32 s0, -1 8776; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 8777; GFX11-NEXT: s_mov_b32 exec_lo, s0 8778; GFX11-NEXT: v_writelane_b32 v40, s33, 4 8779; GFX11-NEXT: s_mov_b32 s33, s32 8780; GFX11-NEXT: s_add_i32 s32, s32, 16 8781; GFX11-NEXT: s_getpc_b64 s[0:1] 8782; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_f64_inreg@rel32@lo+4 8783; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_f64_inreg@rel32@hi+12 8784; GFX11-NEXT: v_writelane_b32 v40, s4, 0 8785; GFX11-NEXT: s_mov_b32 s4, 0 8786; GFX11-NEXT: v_writelane_b32 v40, s5, 1 8787; GFX11-NEXT: s_mov_b32 s5, 0x40100000 8788; GFX11-NEXT: v_writelane_b32 v40, s30, 2 8789; GFX11-NEXT: v_writelane_b32 v40, s31, 3 8790; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 8791; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 8792; GFX11-NEXT: v_readlane_b32 s31, v40, 3 8793; GFX11-NEXT: v_readlane_b32 s30, v40, 2 8794; GFX11-NEXT: v_readlane_b32 s5, v40, 1 8795; GFX11-NEXT: v_readlane_b32 s4, v40, 0 8796; GFX11-NEXT: s_add_i32 s32, s32, -16 8797; GFX11-NEXT: v_readlane_b32 s33, v40, 4 8798; GFX11-NEXT: s_or_saveexec_b32 s0, -1 8799; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 8800; GFX11-NEXT: s_mov_b32 exec_lo, s0 8801; GFX11-NEXT: s_waitcnt vmcnt(0) 8802; GFX11-NEXT: s_setpc_b64 s[30:31] 8803; 8804; GFX10-SCRATCH-LABEL: test_call_external_void_func_f64_imm_inreg: 8805; GFX10-SCRATCH: ; %bb.0: 8806; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8807; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 8808; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 8809; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 8810; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 8811; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 8812; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 8813; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 8814; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 8815; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 8816; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f64_inreg@rel32@lo+4 8817; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f64_inreg@rel32@hi+12 8818; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 8819; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0 8820; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 8821; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0x40100000 8822; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 8823; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 8824; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 8825; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 8826; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 8827; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 8828; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 8829; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 8830; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 8831; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 8832; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 8833; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 8834; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 8835; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 8836; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 8837 call amdgpu_gfx void @external_void_func_f64_inreg(double inreg 4.0) 8838 ret void 8839} 8840 8841define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { 8842; GFX9-LABEL: test_call_external_void_func_v2f64_imm_inreg: 8843; GFX9: ; %bb.0: 8844; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8845; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 8846; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 8847; GFX9-NEXT: s_mov_b64 exec, s[34:35] 8848; GFX9-NEXT: v_writelane_b32 v40, s33, 6 8849; GFX9-NEXT: v_writelane_b32 v40, s4, 0 8850; GFX9-NEXT: v_writelane_b32 v40, s5, 1 8851; GFX9-NEXT: v_writelane_b32 v40, s6, 2 8852; GFX9-NEXT: v_writelane_b32 v40, s7, 3 8853; GFX9-NEXT: s_mov_b32 s33, s32 8854; GFX9-NEXT: s_addk_i32 s32, 0x400 8855; GFX9-NEXT: v_writelane_b32 v40, s30, 4 8856; GFX9-NEXT: s_mov_b32 s4, 0 8857; GFX9-NEXT: s_mov_b32 s5, 2.0 8858; GFX9-NEXT: s_mov_b32 s6, 0 8859; GFX9-NEXT: s_mov_b32 s7, 0x40100000 8860; GFX9-NEXT: v_writelane_b32 v40, s31, 5 8861; GFX9-NEXT: s_getpc_b64 s[34:35] 8862; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f64_inreg@rel32@lo+4 8863; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f64_inreg@rel32@hi+12 8864; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 8865; GFX9-NEXT: v_readlane_b32 s31, v40, 5 8866; GFX9-NEXT: v_readlane_b32 s30, v40, 4 8867; GFX9-NEXT: v_readlane_b32 s7, v40, 3 8868; GFX9-NEXT: v_readlane_b32 s6, v40, 2 8869; GFX9-NEXT: v_readlane_b32 s5, v40, 1 8870; GFX9-NEXT: v_readlane_b32 s4, v40, 0 8871; GFX9-NEXT: s_addk_i32 s32, 0xfc00 8872; GFX9-NEXT: v_readlane_b32 s33, v40, 6 8873; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 8874; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 8875; GFX9-NEXT: s_mov_b64 exec, s[34:35] 8876; GFX9-NEXT: s_waitcnt vmcnt(0) 8877; GFX9-NEXT: s_setpc_b64 s[30:31] 8878; 8879; GFX10-LABEL: test_call_external_void_func_v2f64_imm_inreg: 8880; GFX10: ; %bb.0: 8881; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8882; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 8883; GFX10-NEXT: s_or_saveexec_b32 s34, -1 8884; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 8885; GFX10-NEXT: s_waitcnt_depctr 0xffe3 8886; GFX10-NEXT: s_mov_b32 exec_lo, s34 8887; GFX10-NEXT: v_writelane_b32 v40, s33, 6 8888; GFX10-NEXT: s_mov_b32 s33, s32 8889; GFX10-NEXT: s_addk_i32 s32, 0x200 8890; GFX10-NEXT: s_getpc_b64 s[34:35] 8891; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f64_inreg@rel32@lo+4 8892; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f64_inreg@rel32@hi+12 8893; GFX10-NEXT: v_writelane_b32 v40, s4, 0 8894; GFX10-NEXT: s_mov_b32 s4, 0 8895; GFX10-NEXT: v_writelane_b32 v40, s5, 1 8896; GFX10-NEXT: s_mov_b32 s5, 2.0 8897; GFX10-NEXT: v_writelane_b32 v40, s6, 2 8898; GFX10-NEXT: s_mov_b32 s6, 0 8899; GFX10-NEXT: v_writelane_b32 v40, s7, 3 8900; GFX10-NEXT: s_mov_b32 s7, 0x40100000 8901; GFX10-NEXT: v_writelane_b32 v40, s30, 4 8902; GFX10-NEXT: v_writelane_b32 v40, s31, 5 8903; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 8904; GFX10-NEXT: v_readlane_b32 s31, v40, 5 8905; GFX10-NEXT: v_readlane_b32 s30, v40, 4 8906; GFX10-NEXT: v_readlane_b32 s7, v40, 3 8907; GFX10-NEXT: v_readlane_b32 s6, v40, 2 8908; GFX10-NEXT: v_readlane_b32 s5, v40, 1 8909; GFX10-NEXT: v_readlane_b32 s4, v40, 0 8910; GFX10-NEXT: s_addk_i32 s32, 0xfe00 8911; GFX10-NEXT: v_readlane_b32 s33, v40, 6 8912; GFX10-NEXT: s_or_saveexec_b32 s34, -1 8913; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 8914; GFX10-NEXT: s_waitcnt_depctr 0xffe3 8915; GFX10-NEXT: s_mov_b32 exec_lo, s34 8916; GFX10-NEXT: s_waitcnt vmcnt(0) 8917; GFX10-NEXT: s_setpc_b64 s[30:31] 8918; 8919; GFX11-LABEL: test_call_external_void_func_v2f64_imm_inreg: 8920; GFX11: ; %bb.0: 8921; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8922; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 8923; GFX11-NEXT: s_or_saveexec_b32 s0, -1 8924; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 8925; GFX11-NEXT: s_mov_b32 exec_lo, s0 8926; GFX11-NEXT: v_writelane_b32 v40, s33, 6 8927; GFX11-NEXT: s_mov_b32 s33, s32 8928; GFX11-NEXT: s_add_i32 s32, s32, 16 8929; GFX11-NEXT: s_getpc_b64 s[0:1] 8930; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2f64_inreg@rel32@lo+4 8931; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2f64_inreg@rel32@hi+12 8932; GFX11-NEXT: v_writelane_b32 v40, s4, 0 8933; GFX11-NEXT: s_mov_b32 s4, 0 8934; GFX11-NEXT: v_writelane_b32 v40, s5, 1 8935; GFX11-NEXT: s_mov_b32 s5, 2.0 8936; GFX11-NEXT: v_writelane_b32 v40, s6, 2 8937; GFX11-NEXT: s_mov_b32 s6, 0 8938; GFX11-NEXT: v_writelane_b32 v40, s7, 3 8939; GFX11-NEXT: s_mov_b32 s7, 0x40100000 8940; GFX11-NEXT: v_writelane_b32 v40, s30, 4 8941; GFX11-NEXT: v_writelane_b32 v40, s31, 5 8942; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 8943; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 8944; GFX11-NEXT: v_readlane_b32 s31, v40, 5 8945; GFX11-NEXT: v_readlane_b32 s30, v40, 4 8946; GFX11-NEXT: v_readlane_b32 s7, v40, 3 8947; GFX11-NEXT: v_readlane_b32 s6, v40, 2 8948; GFX11-NEXT: v_readlane_b32 s5, v40, 1 8949; GFX11-NEXT: v_readlane_b32 s4, v40, 0 8950; GFX11-NEXT: s_add_i32 s32, s32, -16 8951; GFX11-NEXT: v_readlane_b32 s33, v40, 6 8952; GFX11-NEXT: s_or_saveexec_b32 s0, -1 8953; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 8954; GFX11-NEXT: s_mov_b32 exec_lo, s0 8955; GFX11-NEXT: s_waitcnt vmcnt(0) 8956; GFX11-NEXT: s_setpc_b64 s[30:31] 8957; 8958; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f64_imm_inreg: 8959; GFX10-SCRATCH: ; %bb.0: 8960; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8961; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 8962; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 8963; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 8964; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 8965; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 8966; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 6 8967; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 8968; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 8969; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 8970; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f64_inreg@rel32@lo+4 8971; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f64_inreg@rel32@hi+12 8972; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 8973; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0 8974; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 8975; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 8976; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 8977; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 0 8978; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 8979; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 0x40100000 8980; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 8981; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 8982; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 8983; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 8984; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 8985; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 8986; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 8987; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 8988; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 8989; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 8990; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 6 8991; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 8992; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 8993; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 8994; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 8995; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 8996; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 8997 call amdgpu_gfx void @external_void_func_v2f64_inreg(<2 x double> inreg <double 2.0, double 4.0>) 8998 ret void 8999} 9000 9001define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { 9002; GFX9-LABEL: test_call_external_void_func_v3f64_imm_inreg: 9003; GFX9: ; %bb.0: 9004; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9005; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 9006; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 9007; GFX9-NEXT: s_mov_b64 exec, s[34:35] 9008; GFX9-NEXT: v_writelane_b32 v40, s33, 8 9009; GFX9-NEXT: v_writelane_b32 v40, s4, 0 9010; GFX9-NEXT: v_writelane_b32 v40, s5, 1 9011; GFX9-NEXT: v_writelane_b32 v40, s6, 2 9012; GFX9-NEXT: v_writelane_b32 v40, s7, 3 9013; GFX9-NEXT: v_writelane_b32 v40, s8, 4 9014; GFX9-NEXT: v_writelane_b32 v40, s9, 5 9015; GFX9-NEXT: s_mov_b32 s33, s32 9016; GFX9-NEXT: s_addk_i32 s32, 0x400 9017; GFX9-NEXT: v_writelane_b32 v40, s30, 6 9018; GFX9-NEXT: s_mov_b32 s4, 0 9019; GFX9-NEXT: s_mov_b32 s5, 2.0 9020; GFX9-NEXT: s_mov_b32 s6, 0 9021; GFX9-NEXT: s_mov_b32 s7, 0x40100000 9022; GFX9-NEXT: s_mov_b32 s8, 0 9023; GFX9-NEXT: s_mov_b32 s9, 0x40200000 9024; GFX9-NEXT: v_writelane_b32 v40, s31, 7 9025; GFX9-NEXT: s_getpc_b64 s[34:35] 9026; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f64_inreg@rel32@lo+4 9027; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f64_inreg@rel32@hi+12 9028; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 9029; GFX9-NEXT: v_readlane_b32 s31, v40, 7 9030; GFX9-NEXT: v_readlane_b32 s30, v40, 6 9031; GFX9-NEXT: v_readlane_b32 s9, v40, 5 9032; GFX9-NEXT: v_readlane_b32 s8, v40, 4 9033; GFX9-NEXT: v_readlane_b32 s7, v40, 3 9034; GFX9-NEXT: v_readlane_b32 s6, v40, 2 9035; GFX9-NEXT: v_readlane_b32 s5, v40, 1 9036; GFX9-NEXT: v_readlane_b32 s4, v40, 0 9037; GFX9-NEXT: s_addk_i32 s32, 0xfc00 9038; GFX9-NEXT: v_readlane_b32 s33, v40, 8 9039; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 9040; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 9041; GFX9-NEXT: s_mov_b64 exec, s[34:35] 9042; GFX9-NEXT: s_waitcnt vmcnt(0) 9043; GFX9-NEXT: s_setpc_b64 s[30:31] 9044; 9045; GFX10-LABEL: test_call_external_void_func_v3f64_imm_inreg: 9046; GFX10: ; %bb.0: 9047; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9048; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 9049; GFX10-NEXT: s_or_saveexec_b32 s34, -1 9050; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 9051; GFX10-NEXT: s_waitcnt_depctr 0xffe3 9052; GFX10-NEXT: s_mov_b32 exec_lo, s34 9053; GFX10-NEXT: v_writelane_b32 v40, s33, 8 9054; GFX10-NEXT: s_mov_b32 s33, s32 9055; GFX10-NEXT: s_addk_i32 s32, 0x200 9056; GFX10-NEXT: s_getpc_b64 s[34:35] 9057; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f64_inreg@rel32@lo+4 9058; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f64_inreg@rel32@hi+12 9059; GFX10-NEXT: v_writelane_b32 v40, s4, 0 9060; GFX10-NEXT: s_mov_b32 s4, 0 9061; GFX10-NEXT: v_writelane_b32 v40, s5, 1 9062; GFX10-NEXT: s_mov_b32 s5, 2.0 9063; GFX10-NEXT: v_writelane_b32 v40, s6, 2 9064; GFX10-NEXT: s_mov_b32 s6, 0 9065; GFX10-NEXT: v_writelane_b32 v40, s7, 3 9066; GFX10-NEXT: s_mov_b32 s7, 0x40100000 9067; GFX10-NEXT: v_writelane_b32 v40, s8, 4 9068; GFX10-NEXT: s_mov_b32 s8, 0 9069; GFX10-NEXT: v_writelane_b32 v40, s9, 5 9070; GFX10-NEXT: s_mov_b32 s9, 0x40200000 9071; GFX10-NEXT: v_writelane_b32 v40, s30, 6 9072; GFX10-NEXT: v_writelane_b32 v40, s31, 7 9073; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 9074; GFX10-NEXT: v_readlane_b32 s31, v40, 7 9075; GFX10-NEXT: v_readlane_b32 s30, v40, 6 9076; GFX10-NEXT: v_readlane_b32 s9, v40, 5 9077; GFX10-NEXT: v_readlane_b32 s8, v40, 4 9078; GFX10-NEXT: v_readlane_b32 s7, v40, 3 9079; GFX10-NEXT: v_readlane_b32 s6, v40, 2 9080; GFX10-NEXT: v_readlane_b32 s5, v40, 1 9081; GFX10-NEXT: v_readlane_b32 s4, v40, 0 9082; GFX10-NEXT: s_addk_i32 s32, 0xfe00 9083; GFX10-NEXT: v_readlane_b32 s33, v40, 8 9084; GFX10-NEXT: s_or_saveexec_b32 s34, -1 9085; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 9086; GFX10-NEXT: s_waitcnt_depctr 0xffe3 9087; GFX10-NEXT: s_mov_b32 exec_lo, s34 9088; GFX10-NEXT: s_waitcnt vmcnt(0) 9089; GFX10-NEXT: s_setpc_b64 s[30:31] 9090; 9091; GFX11-LABEL: test_call_external_void_func_v3f64_imm_inreg: 9092; GFX11: ; %bb.0: 9093; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9094; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 9095; GFX11-NEXT: s_or_saveexec_b32 s0, -1 9096; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 9097; GFX11-NEXT: s_mov_b32 exec_lo, s0 9098; GFX11-NEXT: v_writelane_b32 v40, s33, 8 9099; GFX11-NEXT: s_mov_b32 s33, s32 9100; GFX11-NEXT: s_add_i32 s32, s32, 16 9101; GFX11-NEXT: s_getpc_b64 s[0:1] 9102; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3f64_inreg@rel32@lo+4 9103; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3f64_inreg@rel32@hi+12 9104; GFX11-NEXT: v_writelane_b32 v40, s4, 0 9105; GFX11-NEXT: s_mov_b32 s4, 0 9106; GFX11-NEXT: v_writelane_b32 v40, s5, 1 9107; GFX11-NEXT: s_mov_b32 s5, 2.0 9108; GFX11-NEXT: v_writelane_b32 v40, s6, 2 9109; GFX11-NEXT: s_mov_b32 s6, 0 9110; GFX11-NEXT: v_writelane_b32 v40, s7, 3 9111; GFX11-NEXT: s_mov_b32 s7, 0x40100000 9112; GFX11-NEXT: v_writelane_b32 v40, s8, 4 9113; GFX11-NEXT: s_mov_b32 s8, 0 9114; GFX11-NEXT: v_writelane_b32 v40, s9, 5 9115; GFX11-NEXT: s_mov_b32 s9, 0x40200000 9116; GFX11-NEXT: v_writelane_b32 v40, s30, 6 9117; GFX11-NEXT: v_writelane_b32 v40, s31, 7 9118; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 9119; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 9120; GFX11-NEXT: v_readlane_b32 s31, v40, 7 9121; GFX11-NEXT: v_readlane_b32 s30, v40, 6 9122; GFX11-NEXT: v_readlane_b32 s9, v40, 5 9123; GFX11-NEXT: v_readlane_b32 s8, v40, 4 9124; GFX11-NEXT: v_readlane_b32 s7, v40, 3 9125; GFX11-NEXT: v_readlane_b32 s6, v40, 2 9126; GFX11-NEXT: v_readlane_b32 s5, v40, 1 9127; GFX11-NEXT: v_readlane_b32 s4, v40, 0 9128; GFX11-NEXT: s_add_i32 s32, s32, -16 9129; GFX11-NEXT: v_readlane_b32 s33, v40, 8 9130; GFX11-NEXT: s_or_saveexec_b32 s0, -1 9131; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 9132; GFX11-NEXT: s_mov_b32 exec_lo, s0 9133; GFX11-NEXT: s_waitcnt vmcnt(0) 9134; GFX11-NEXT: s_setpc_b64 s[30:31] 9135; 9136; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f64_imm_inreg: 9137; GFX10-SCRATCH: ; %bb.0: 9138; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9139; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 9140; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 9141; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 9142; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 9143; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 9144; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 8 9145; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 9146; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 9147; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 9148; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f64_inreg@rel32@lo+4 9149; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f64_inreg@rel32@hi+12 9150; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 9151; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0 9152; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 9153; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 9154; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 9155; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 0 9156; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 9157; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 0x40100000 9158; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 9159; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 0 9160; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 9161; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 0x40200000 9162; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 6 9163; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 7 9164; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 9165; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 7 9166; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 6 9167; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 9168; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 9169; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 9170; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 9171; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 9172; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 9173; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 9174; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 8 9175; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 9176; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 9177; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 9178; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 9179; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 9180; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 9181 call amdgpu_gfx void @external_void_func_v3f64_inreg(<3 x double> inreg <double 2.0, double 4.0, double 8.0>) 9182 ret void 9183} 9184 9185define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { 9186; GFX9-LABEL: test_call_external_void_func_v2i16_inreg: 9187; GFX9: ; %bb.0: 9188; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9189; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 9190; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 9191; GFX9-NEXT: s_mov_b64 exec, s[34:35] 9192; GFX9-NEXT: v_writelane_b32 v40, s33, 3 9193; GFX9-NEXT: v_writelane_b32 v40, s4, 0 9194; GFX9-NEXT: s_load_dword s4, s[34:35], 0x0 9195; GFX9-NEXT: s_mov_b32 s33, s32 9196; GFX9-NEXT: s_addk_i32 s32, 0x400 9197; GFX9-NEXT: v_writelane_b32 v40, s30, 1 9198; GFX9-NEXT: v_writelane_b32 v40, s31, 2 9199; GFX9-NEXT: s_getpc_b64 s[34:35] 9200; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i16_inreg@rel32@lo+4 9201; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i16_inreg@rel32@hi+12 9202; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 9203; GFX9-NEXT: v_readlane_b32 s31, v40, 2 9204; GFX9-NEXT: v_readlane_b32 s30, v40, 1 9205; GFX9-NEXT: v_readlane_b32 s4, v40, 0 9206; GFX9-NEXT: s_addk_i32 s32, 0xfc00 9207; GFX9-NEXT: v_readlane_b32 s33, v40, 3 9208; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 9209; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 9210; GFX9-NEXT: s_mov_b64 exec, s[34:35] 9211; GFX9-NEXT: s_waitcnt vmcnt(0) 9212; GFX9-NEXT: s_setpc_b64 s[30:31] 9213; 9214; GFX10-LABEL: test_call_external_void_func_v2i16_inreg: 9215; GFX10: ; %bb.0: 9216; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9217; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 9218; GFX10-NEXT: s_or_saveexec_b32 s34, -1 9219; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 9220; GFX10-NEXT: s_waitcnt_depctr 0xffe3 9221; GFX10-NEXT: s_mov_b32 exec_lo, s34 9222; GFX10-NEXT: v_writelane_b32 v40, s33, 3 9223; GFX10-NEXT: s_mov_b32 s33, s32 9224; GFX10-NEXT: s_addk_i32 s32, 0x200 9225; GFX10-NEXT: v_writelane_b32 v40, s4, 0 9226; GFX10-NEXT: s_load_dword s4, s[34:35], 0x0 9227; GFX10-NEXT: s_getpc_b64 s[34:35] 9228; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i16_inreg@rel32@lo+4 9229; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i16_inreg@rel32@hi+12 9230; GFX10-NEXT: v_writelane_b32 v40, s30, 1 9231; GFX10-NEXT: v_writelane_b32 v40, s31, 2 9232; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 9233; GFX10-NEXT: v_readlane_b32 s31, v40, 2 9234; GFX10-NEXT: v_readlane_b32 s30, v40, 1 9235; GFX10-NEXT: v_readlane_b32 s4, v40, 0 9236; GFX10-NEXT: s_addk_i32 s32, 0xfe00 9237; GFX10-NEXT: v_readlane_b32 s33, v40, 3 9238; GFX10-NEXT: s_or_saveexec_b32 s34, -1 9239; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 9240; GFX10-NEXT: s_waitcnt_depctr 0xffe3 9241; GFX10-NEXT: s_mov_b32 exec_lo, s34 9242; GFX10-NEXT: s_waitcnt vmcnt(0) 9243; GFX10-NEXT: s_setpc_b64 s[30:31] 9244; 9245; GFX11-LABEL: test_call_external_void_func_v2i16_inreg: 9246; GFX11: ; %bb.0: 9247; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9248; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 9249; GFX11-NEXT: s_or_saveexec_b32 s0, -1 9250; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 9251; GFX11-NEXT: s_mov_b32 exec_lo, s0 9252; GFX11-NEXT: v_writelane_b32 v40, s33, 3 9253; GFX11-NEXT: s_mov_b32 s33, s32 9254; GFX11-NEXT: s_add_i32 s32, s32, 16 9255; GFX11-NEXT: v_writelane_b32 v40, s4, 0 9256; GFX11-NEXT: s_load_b32 s4, s[0:1], 0x0 9257; GFX11-NEXT: s_getpc_b64 s[0:1] 9258; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i16_inreg@rel32@lo+4 9259; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i16_inreg@rel32@hi+12 9260; GFX11-NEXT: v_writelane_b32 v40, s30, 1 9261; GFX11-NEXT: v_writelane_b32 v40, s31, 2 9262; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 9263; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 9264; GFX11-NEXT: v_readlane_b32 s31, v40, 2 9265; GFX11-NEXT: v_readlane_b32 s30, v40, 1 9266; GFX11-NEXT: v_readlane_b32 s4, v40, 0 9267; GFX11-NEXT: s_add_i32 s32, s32, -16 9268; GFX11-NEXT: v_readlane_b32 s33, v40, 3 9269; GFX11-NEXT: s_or_saveexec_b32 s0, -1 9270; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 9271; GFX11-NEXT: s_mov_b32 exec_lo, s0 9272; GFX11-NEXT: s_waitcnt vmcnt(0) 9273; GFX11-NEXT: s_setpc_b64 s[30:31] 9274; 9275; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i16_inreg: 9276; GFX10-SCRATCH: ; %bb.0: 9277; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9278; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 9279; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 9280; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 9281; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 9282; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 9283; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 3 9284; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 9285; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 9286; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 9287; GFX10-SCRATCH-NEXT: s_load_dword s4, s[0:1], 0x0 9288; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 9289; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i16_inreg@rel32@lo+4 9290; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i16_inreg@rel32@hi+12 9291; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 9292; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 9293; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 9294; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 9295; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 9296; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 9297; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 9298; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 9299; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 9300; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 9301; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 9302; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 9303; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 9304; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 9305 %val = load <2 x i16>, <2 x i16> addrspace(4)* undef 9306 call amdgpu_gfx void @external_void_func_v2i16_inreg(<2 x i16> inreg %val) 9307 ret void 9308} 9309 9310define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { 9311; GFX9-LABEL: test_call_external_void_func_v3i16_inreg: 9312; GFX9: ; %bb.0: 9313; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9314; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 9315; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 9316; GFX9-NEXT: s_mov_b64 exec, s[34:35] 9317; GFX9-NEXT: v_writelane_b32 v40, s33, 4 9318; GFX9-NEXT: v_writelane_b32 v40, s4, 0 9319; GFX9-NEXT: v_writelane_b32 v40, s5, 1 9320; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 9321; GFX9-NEXT: s_mov_b32 s33, s32 9322; GFX9-NEXT: s_addk_i32 s32, 0x400 9323; GFX9-NEXT: v_writelane_b32 v40, s30, 2 9324; GFX9-NEXT: v_writelane_b32 v40, s31, 3 9325; GFX9-NEXT: s_getpc_b64 s[34:35] 9326; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4 9327; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12 9328; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 9329; GFX9-NEXT: v_readlane_b32 s31, v40, 3 9330; GFX9-NEXT: v_readlane_b32 s30, v40, 2 9331; GFX9-NEXT: v_readlane_b32 s5, v40, 1 9332; GFX9-NEXT: v_readlane_b32 s4, v40, 0 9333; GFX9-NEXT: s_addk_i32 s32, 0xfc00 9334; GFX9-NEXT: v_readlane_b32 s33, v40, 4 9335; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 9336; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 9337; GFX9-NEXT: s_mov_b64 exec, s[34:35] 9338; GFX9-NEXT: s_waitcnt vmcnt(0) 9339; GFX9-NEXT: s_setpc_b64 s[30:31] 9340; 9341; GFX10-LABEL: test_call_external_void_func_v3i16_inreg: 9342; GFX10: ; %bb.0: 9343; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9344; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 9345; GFX10-NEXT: s_or_saveexec_b32 s34, -1 9346; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 9347; GFX10-NEXT: s_waitcnt_depctr 0xffe3 9348; GFX10-NEXT: s_mov_b32 exec_lo, s34 9349; GFX10-NEXT: v_writelane_b32 v40, s33, 4 9350; GFX10-NEXT: s_mov_b32 s33, s32 9351; GFX10-NEXT: s_addk_i32 s32, 0x200 9352; GFX10-NEXT: v_writelane_b32 v40, s4, 0 9353; GFX10-NEXT: v_writelane_b32 v40, s5, 1 9354; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 9355; GFX10-NEXT: s_getpc_b64 s[34:35] 9356; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4 9357; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12 9358; GFX10-NEXT: v_writelane_b32 v40, s30, 2 9359; GFX10-NEXT: v_writelane_b32 v40, s31, 3 9360; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 9361; GFX10-NEXT: v_readlane_b32 s31, v40, 3 9362; GFX10-NEXT: v_readlane_b32 s30, v40, 2 9363; GFX10-NEXT: v_readlane_b32 s5, v40, 1 9364; GFX10-NEXT: v_readlane_b32 s4, v40, 0 9365; GFX10-NEXT: s_addk_i32 s32, 0xfe00 9366; GFX10-NEXT: v_readlane_b32 s33, v40, 4 9367; GFX10-NEXT: s_or_saveexec_b32 s34, -1 9368; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 9369; GFX10-NEXT: s_waitcnt_depctr 0xffe3 9370; GFX10-NEXT: s_mov_b32 exec_lo, s34 9371; GFX10-NEXT: s_waitcnt vmcnt(0) 9372; GFX10-NEXT: s_setpc_b64 s[30:31] 9373; 9374; GFX11-LABEL: test_call_external_void_func_v3i16_inreg: 9375; GFX11: ; %bb.0: 9376; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9377; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 9378; GFX11-NEXT: s_or_saveexec_b32 s0, -1 9379; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 9380; GFX11-NEXT: s_mov_b32 exec_lo, s0 9381; GFX11-NEXT: v_writelane_b32 v40, s33, 4 9382; GFX11-NEXT: s_mov_b32 s33, s32 9383; GFX11-NEXT: s_add_i32 s32, s32, 16 9384; GFX11-NEXT: v_writelane_b32 v40, s4, 0 9385; GFX11-NEXT: v_writelane_b32 v40, s5, 1 9386; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 9387; GFX11-NEXT: s_getpc_b64 s[0:1] 9388; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i16_inreg@rel32@lo+4 9389; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16_inreg@rel32@hi+12 9390; GFX11-NEXT: v_writelane_b32 v40, s30, 2 9391; GFX11-NEXT: v_writelane_b32 v40, s31, 3 9392; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 9393; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 9394; GFX11-NEXT: v_readlane_b32 s31, v40, 3 9395; GFX11-NEXT: v_readlane_b32 s30, v40, 2 9396; GFX11-NEXT: v_readlane_b32 s5, v40, 1 9397; GFX11-NEXT: v_readlane_b32 s4, v40, 0 9398; GFX11-NEXT: s_add_i32 s32, s32, -16 9399; GFX11-NEXT: v_readlane_b32 s33, v40, 4 9400; GFX11-NEXT: s_or_saveexec_b32 s0, -1 9401; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 9402; GFX11-NEXT: s_mov_b32 exec_lo, s0 9403; GFX11-NEXT: s_waitcnt vmcnt(0) 9404; GFX11-NEXT: s_setpc_b64 s[30:31] 9405; 9406; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16_inreg: 9407; GFX10-SCRATCH: ; %bb.0: 9408; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9409; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 9410; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 9411; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 9412; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 9413; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 9414; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 9415; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 9416; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 9417; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 9418; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 9419; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 9420; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 9421; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i16_inreg@rel32@lo+4 9422; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16_inreg@rel32@hi+12 9423; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 9424; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 9425; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 9426; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 9427; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 9428; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 9429; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 9430; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 9431; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 9432; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 9433; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 9434; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 9435; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 9436; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 9437; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 9438 %val = load <3 x i16>, <3 x i16> addrspace(4)* undef 9439 call amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> inreg %val) 9440 ret void 9441} 9442 9443define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { 9444; GFX9-LABEL: test_call_external_void_func_v3f16_inreg: 9445; GFX9: ; %bb.0: 9446; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9447; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 9448; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 9449; GFX9-NEXT: s_mov_b64 exec, s[34:35] 9450; GFX9-NEXT: v_writelane_b32 v40, s33, 4 9451; GFX9-NEXT: v_writelane_b32 v40, s4, 0 9452; GFX9-NEXT: v_writelane_b32 v40, s5, 1 9453; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 9454; GFX9-NEXT: s_mov_b32 s33, s32 9455; GFX9-NEXT: s_addk_i32 s32, 0x400 9456; GFX9-NEXT: v_writelane_b32 v40, s30, 2 9457; GFX9-NEXT: v_writelane_b32 v40, s31, 3 9458; GFX9-NEXT: s_getpc_b64 s[34:35] 9459; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4 9460; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12 9461; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 9462; GFX9-NEXT: v_readlane_b32 s31, v40, 3 9463; GFX9-NEXT: v_readlane_b32 s30, v40, 2 9464; GFX9-NEXT: v_readlane_b32 s5, v40, 1 9465; GFX9-NEXT: v_readlane_b32 s4, v40, 0 9466; GFX9-NEXT: s_addk_i32 s32, 0xfc00 9467; GFX9-NEXT: v_readlane_b32 s33, v40, 4 9468; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 9469; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 9470; GFX9-NEXT: s_mov_b64 exec, s[34:35] 9471; GFX9-NEXT: s_waitcnt vmcnt(0) 9472; GFX9-NEXT: s_setpc_b64 s[30:31] 9473; 9474; GFX10-LABEL: test_call_external_void_func_v3f16_inreg: 9475; GFX10: ; %bb.0: 9476; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9477; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 9478; GFX10-NEXT: s_or_saveexec_b32 s34, -1 9479; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 9480; GFX10-NEXT: s_waitcnt_depctr 0xffe3 9481; GFX10-NEXT: s_mov_b32 exec_lo, s34 9482; GFX10-NEXT: v_writelane_b32 v40, s33, 4 9483; GFX10-NEXT: s_mov_b32 s33, s32 9484; GFX10-NEXT: s_addk_i32 s32, 0x200 9485; GFX10-NEXT: v_writelane_b32 v40, s4, 0 9486; GFX10-NEXT: v_writelane_b32 v40, s5, 1 9487; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 9488; GFX10-NEXT: s_getpc_b64 s[34:35] 9489; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4 9490; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12 9491; GFX10-NEXT: v_writelane_b32 v40, s30, 2 9492; GFX10-NEXT: v_writelane_b32 v40, s31, 3 9493; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 9494; GFX10-NEXT: v_readlane_b32 s31, v40, 3 9495; GFX10-NEXT: v_readlane_b32 s30, v40, 2 9496; GFX10-NEXT: v_readlane_b32 s5, v40, 1 9497; GFX10-NEXT: v_readlane_b32 s4, v40, 0 9498; GFX10-NEXT: s_addk_i32 s32, 0xfe00 9499; GFX10-NEXT: v_readlane_b32 s33, v40, 4 9500; GFX10-NEXT: s_or_saveexec_b32 s34, -1 9501; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 9502; GFX10-NEXT: s_waitcnt_depctr 0xffe3 9503; GFX10-NEXT: s_mov_b32 exec_lo, s34 9504; GFX10-NEXT: s_waitcnt vmcnt(0) 9505; GFX10-NEXT: s_setpc_b64 s[30:31] 9506; 9507; GFX11-LABEL: test_call_external_void_func_v3f16_inreg: 9508; GFX11: ; %bb.0: 9509; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9510; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 9511; GFX11-NEXT: s_or_saveexec_b32 s0, -1 9512; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 9513; GFX11-NEXT: s_mov_b32 exec_lo, s0 9514; GFX11-NEXT: v_writelane_b32 v40, s33, 4 9515; GFX11-NEXT: s_mov_b32 s33, s32 9516; GFX11-NEXT: s_add_i32 s32, s32, 16 9517; GFX11-NEXT: v_writelane_b32 v40, s4, 0 9518; GFX11-NEXT: v_writelane_b32 v40, s5, 1 9519; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 9520; GFX11-NEXT: s_getpc_b64 s[0:1] 9521; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3f16_inreg@rel32@lo+4 9522; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16_inreg@rel32@hi+12 9523; GFX11-NEXT: v_writelane_b32 v40, s30, 2 9524; GFX11-NEXT: v_writelane_b32 v40, s31, 3 9525; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 9526; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 9527; GFX11-NEXT: v_readlane_b32 s31, v40, 3 9528; GFX11-NEXT: v_readlane_b32 s30, v40, 2 9529; GFX11-NEXT: v_readlane_b32 s5, v40, 1 9530; GFX11-NEXT: v_readlane_b32 s4, v40, 0 9531; GFX11-NEXT: s_add_i32 s32, s32, -16 9532; GFX11-NEXT: v_readlane_b32 s33, v40, 4 9533; GFX11-NEXT: s_or_saveexec_b32 s0, -1 9534; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 9535; GFX11-NEXT: s_mov_b32 exec_lo, s0 9536; GFX11-NEXT: s_waitcnt vmcnt(0) 9537; GFX11-NEXT: s_setpc_b64 s[30:31] 9538; 9539; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16_inreg: 9540; GFX10-SCRATCH: ; %bb.0: 9541; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9542; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 9543; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 9544; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 9545; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 9546; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 9547; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 9548; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 9549; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 9550; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 9551; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 9552; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 9553; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 9554; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f16_inreg@rel32@lo+4 9555; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16_inreg@rel32@hi+12 9556; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 9557; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 9558; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 9559; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 9560; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 9561; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 9562; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 9563; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 9564; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 9565; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 9566; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 9567; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 9568; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 9569; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 9570; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 9571 %val = load <3 x half>, <3 x half> addrspace(4)* undef 9572 call amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> inreg %val) 9573 ret void 9574} 9575 9576define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { 9577; GFX9-LABEL: test_call_external_void_func_v3i16_imm_inreg: 9578; GFX9: ; %bb.0: 9579; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9580; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 9581; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 9582; GFX9-NEXT: s_mov_b64 exec, s[34:35] 9583; GFX9-NEXT: v_writelane_b32 v40, s33, 4 9584; GFX9-NEXT: v_writelane_b32 v40, s4, 0 9585; GFX9-NEXT: v_writelane_b32 v40, s5, 1 9586; GFX9-NEXT: s_mov_b32 s33, s32 9587; GFX9-NEXT: s_addk_i32 s32, 0x400 9588; GFX9-NEXT: v_writelane_b32 v40, s30, 2 9589; GFX9-NEXT: s_mov_b32 s4, 0x20001 9590; GFX9-NEXT: s_mov_b32 s5, 3 9591; GFX9-NEXT: v_writelane_b32 v40, s31, 3 9592; GFX9-NEXT: s_getpc_b64 s[34:35] 9593; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4 9594; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12 9595; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 9596; GFX9-NEXT: v_readlane_b32 s31, v40, 3 9597; GFX9-NEXT: v_readlane_b32 s30, v40, 2 9598; GFX9-NEXT: v_readlane_b32 s5, v40, 1 9599; GFX9-NEXT: v_readlane_b32 s4, v40, 0 9600; GFX9-NEXT: s_addk_i32 s32, 0xfc00 9601; GFX9-NEXT: v_readlane_b32 s33, v40, 4 9602; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 9603; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 9604; GFX9-NEXT: s_mov_b64 exec, s[34:35] 9605; GFX9-NEXT: s_waitcnt vmcnt(0) 9606; GFX9-NEXT: s_setpc_b64 s[30:31] 9607; 9608; GFX10-LABEL: test_call_external_void_func_v3i16_imm_inreg: 9609; GFX10: ; %bb.0: 9610; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9611; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 9612; GFX10-NEXT: s_or_saveexec_b32 s34, -1 9613; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 9614; GFX10-NEXT: s_waitcnt_depctr 0xffe3 9615; GFX10-NEXT: s_mov_b32 exec_lo, s34 9616; GFX10-NEXT: v_writelane_b32 v40, s33, 4 9617; GFX10-NEXT: s_mov_b32 s33, s32 9618; GFX10-NEXT: s_addk_i32 s32, 0x200 9619; GFX10-NEXT: s_getpc_b64 s[34:35] 9620; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4 9621; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12 9622; GFX10-NEXT: v_writelane_b32 v40, s4, 0 9623; GFX10-NEXT: s_mov_b32 s4, 0x20001 9624; GFX10-NEXT: v_writelane_b32 v40, s5, 1 9625; GFX10-NEXT: s_mov_b32 s5, 3 9626; GFX10-NEXT: v_writelane_b32 v40, s30, 2 9627; GFX10-NEXT: v_writelane_b32 v40, s31, 3 9628; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 9629; GFX10-NEXT: v_readlane_b32 s31, v40, 3 9630; GFX10-NEXT: v_readlane_b32 s30, v40, 2 9631; GFX10-NEXT: v_readlane_b32 s5, v40, 1 9632; GFX10-NEXT: v_readlane_b32 s4, v40, 0 9633; GFX10-NEXT: s_addk_i32 s32, 0xfe00 9634; GFX10-NEXT: v_readlane_b32 s33, v40, 4 9635; GFX10-NEXT: s_or_saveexec_b32 s34, -1 9636; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 9637; GFX10-NEXT: s_waitcnt_depctr 0xffe3 9638; GFX10-NEXT: s_mov_b32 exec_lo, s34 9639; GFX10-NEXT: s_waitcnt vmcnt(0) 9640; GFX10-NEXT: s_setpc_b64 s[30:31] 9641; 9642; GFX11-LABEL: test_call_external_void_func_v3i16_imm_inreg: 9643; GFX11: ; %bb.0: 9644; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9645; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 9646; GFX11-NEXT: s_or_saveexec_b32 s0, -1 9647; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 9648; GFX11-NEXT: s_mov_b32 exec_lo, s0 9649; GFX11-NEXT: v_writelane_b32 v40, s33, 4 9650; GFX11-NEXT: s_mov_b32 s33, s32 9651; GFX11-NEXT: s_add_i32 s32, s32, 16 9652; GFX11-NEXT: s_getpc_b64 s[0:1] 9653; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i16_inreg@rel32@lo+4 9654; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16_inreg@rel32@hi+12 9655; GFX11-NEXT: v_writelane_b32 v40, s4, 0 9656; GFX11-NEXT: s_mov_b32 s4, 0x20001 9657; GFX11-NEXT: v_writelane_b32 v40, s5, 1 9658; GFX11-NEXT: s_mov_b32 s5, 3 9659; GFX11-NEXT: v_writelane_b32 v40, s30, 2 9660; GFX11-NEXT: v_writelane_b32 v40, s31, 3 9661; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 9662; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 9663; GFX11-NEXT: v_readlane_b32 s31, v40, 3 9664; GFX11-NEXT: v_readlane_b32 s30, v40, 2 9665; GFX11-NEXT: v_readlane_b32 s5, v40, 1 9666; GFX11-NEXT: v_readlane_b32 s4, v40, 0 9667; GFX11-NEXT: s_add_i32 s32, s32, -16 9668; GFX11-NEXT: v_readlane_b32 s33, v40, 4 9669; GFX11-NEXT: s_or_saveexec_b32 s0, -1 9670; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 9671; GFX11-NEXT: s_mov_b32 exec_lo, s0 9672; GFX11-NEXT: s_waitcnt vmcnt(0) 9673; GFX11-NEXT: s_setpc_b64 s[30:31] 9674; 9675; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16_imm_inreg: 9676; GFX10-SCRATCH: ; %bb.0: 9677; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9678; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 9679; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 9680; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 9681; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 9682; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 9683; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 9684; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 9685; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 9686; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 9687; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i16_inreg@rel32@lo+4 9688; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16_inreg@rel32@hi+12 9689; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 9690; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x20001 9691; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 9692; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 3 9693; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 9694; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 9695; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 9696; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 9697; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 9698; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 9699; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 9700; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 9701; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 9702; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 9703; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 9704; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 9705; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 9706; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 9707; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 9708 call amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> inreg <i16 1, i16 2, i16 3>) 9709 ret void 9710} 9711 9712define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { 9713; GFX9-LABEL: test_call_external_void_func_v3f16_imm_inreg: 9714; GFX9: ; %bb.0: 9715; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9716; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 9717; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 9718; GFX9-NEXT: s_mov_b64 exec, s[34:35] 9719; GFX9-NEXT: v_writelane_b32 v40, s33, 4 9720; GFX9-NEXT: v_writelane_b32 v40, s4, 0 9721; GFX9-NEXT: v_writelane_b32 v40, s5, 1 9722; GFX9-NEXT: s_mov_b32 s33, s32 9723; GFX9-NEXT: s_addk_i32 s32, 0x400 9724; GFX9-NEXT: v_writelane_b32 v40, s30, 2 9725; GFX9-NEXT: s_mov_b32 s4, 0x40003c00 9726; GFX9-NEXT: s_movk_i32 s5, 0x4400 9727; GFX9-NEXT: v_writelane_b32 v40, s31, 3 9728; GFX9-NEXT: s_getpc_b64 s[34:35] 9729; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4 9730; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12 9731; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 9732; GFX9-NEXT: v_readlane_b32 s31, v40, 3 9733; GFX9-NEXT: v_readlane_b32 s30, v40, 2 9734; GFX9-NEXT: v_readlane_b32 s5, v40, 1 9735; GFX9-NEXT: v_readlane_b32 s4, v40, 0 9736; GFX9-NEXT: s_addk_i32 s32, 0xfc00 9737; GFX9-NEXT: v_readlane_b32 s33, v40, 4 9738; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 9739; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 9740; GFX9-NEXT: s_mov_b64 exec, s[34:35] 9741; GFX9-NEXT: s_waitcnt vmcnt(0) 9742; GFX9-NEXT: s_setpc_b64 s[30:31] 9743; 9744; GFX10-LABEL: test_call_external_void_func_v3f16_imm_inreg: 9745; GFX10: ; %bb.0: 9746; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9747; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 9748; GFX10-NEXT: s_or_saveexec_b32 s34, -1 9749; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 9750; GFX10-NEXT: s_waitcnt_depctr 0xffe3 9751; GFX10-NEXT: s_mov_b32 exec_lo, s34 9752; GFX10-NEXT: v_writelane_b32 v40, s33, 4 9753; GFX10-NEXT: s_mov_b32 s33, s32 9754; GFX10-NEXT: s_addk_i32 s32, 0x200 9755; GFX10-NEXT: s_getpc_b64 s[34:35] 9756; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4 9757; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12 9758; GFX10-NEXT: v_writelane_b32 v40, s4, 0 9759; GFX10-NEXT: s_mov_b32 s4, 0x40003c00 9760; GFX10-NEXT: v_writelane_b32 v40, s5, 1 9761; GFX10-NEXT: s_movk_i32 s5, 0x4400 9762; GFX10-NEXT: v_writelane_b32 v40, s30, 2 9763; GFX10-NEXT: v_writelane_b32 v40, s31, 3 9764; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 9765; GFX10-NEXT: v_readlane_b32 s31, v40, 3 9766; GFX10-NEXT: v_readlane_b32 s30, v40, 2 9767; GFX10-NEXT: v_readlane_b32 s5, v40, 1 9768; GFX10-NEXT: v_readlane_b32 s4, v40, 0 9769; GFX10-NEXT: s_addk_i32 s32, 0xfe00 9770; GFX10-NEXT: v_readlane_b32 s33, v40, 4 9771; GFX10-NEXT: s_or_saveexec_b32 s34, -1 9772; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 9773; GFX10-NEXT: s_waitcnt_depctr 0xffe3 9774; GFX10-NEXT: s_mov_b32 exec_lo, s34 9775; GFX10-NEXT: s_waitcnt vmcnt(0) 9776; GFX10-NEXT: s_setpc_b64 s[30:31] 9777; 9778; GFX11-LABEL: test_call_external_void_func_v3f16_imm_inreg: 9779; GFX11: ; %bb.0: 9780; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9781; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 9782; GFX11-NEXT: s_or_saveexec_b32 s0, -1 9783; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 9784; GFX11-NEXT: s_mov_b32 exec_lo, s0 9785; GFX11-NEXT: v_writelane_b32 v40, s33, 4 9786; GFX11-NEXT: s_mov_b32 s33, s32 9787; GFX11-NEXT: s_add_i32 s32, s32, 16 9788; GFX11-NEXT: s_getpc_b64 s[0:1] 9789; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3f16_inreg@rel32@lo+4 9790; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16_inreg@rel32@hi+12 9791; GFX11-NEXT: v_writelane_b32 v40, s4, 0 9792; GFX11-NEXT: s_mov_b32 s4, 0x40003c00 9793; GFX11-NEXT: v_writelane_b32 v40, s5, 1 9794; GFX11-NEXT: s_movk_i32 s5, 0x4400 9795; GFX11-NEXT: v_writelane_b32 v40, s30, 2 9796; GFX11-NEXT: v_writelane_b32 v40, s31, 3 9797; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 9798; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 9799; GFX11-NEXT: v_readlane_b32 s31, v40, 3 9800; GFX11-NEXT: v_readlane_b32 s30, v40, 2 9801; GFX11-NEXT: v_readlane_b32 s5, v40, 1 9802; GFX11-NEXT: v_readlane_b32 s4, v40, 0 9803; GFX11-NEXT: s_add_i32 s32, s32, -16 9804; GFX11-NEXT: v_readlane_b32 s33, v40, 4 9805; GFX11-NEXT: s_or_saveexec_b32 s0, -1 9806; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 9807; GFX11-NEXT: s_mov_b32 exec_lo, s0 9808; GFX11-NEXT: s_waitcnt vmcnt(0) 9809; GFX11-NEXT: s_setpc_b64 s[30:31] 9810; 9811; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16_imm_inreg: 9812; GFX10-SCRATCH: ; %bb.0: 9813; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9814; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 9815; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 9816; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 9817; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 9818; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 9819; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 9820; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 9821; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 9822; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 9823; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f16_inreg@rel32@lo+4 9824; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16_inreg@rel32@hi+12 9825; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 9826; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x40003c00 9827; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 9828; GFX10-SCRATCH-NEXT: s_movk_i32 s5, 0x4400 9829; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 9830; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 9831; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 9832; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 9833; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 9834; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 9835; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 9836; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 9837; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 9838; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 9839; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 9840; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 9841; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 9842; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 9843; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 9844 call amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> inreg <half 1.0, half 2.0, half 4.0>) 9845 ret void 9846} 9847 9848define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { 9849; GFX9-LABEL: test_call_external_void_func_v4i16_inreg: 9850; GFX9: ; %bb.0: 9851; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9852; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 9853; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 9854; GFX9-NEXT: s_mov_b64 exec, s[34:35] 9855; GFX9-NEXT: v_writelane_b32 v40, s33, 4 9856; GFX9-NEXT: v_writelane_b32 v40, s4, 0 9857; GFX9-NEXT: v_writelane_b32 v40, s5, 1 9858; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 9859; GFX9-NEXT: s_mov_b32 s33, s32 9860; GFX9-NEXT: s_addk_i32 s32, 0x400 9861; GFX9-NEXT: v_writelane_b32 v40, s30, 2 9862; GFX9-NEXT: v_writelane_b32 v40, s31, 3 9863; GFX9-NEXT: s_getpc_b64 s[34:35] 9864; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4 9865; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12 9866; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 9867; GFX9-NEXT: v_readlane_b32 s31, v40, 3 9868; GFX9-NEXT: v_readlane_b32 s30, v40, 2 9869; GFX9-NEXT: v_readlane_b32 s5, v40, 1 9870; GFX9-NEXT: v_readlane_b32 s4, v40, 0 9871; GFX9-NEXT: s_addk_i32 s32, 0xfc00 9872; GFX9-NEXT: v_readlane_b32 s33, v40, 4 9873; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 9874; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 9875; GFX9-NEXT: s_mov_b64 exec, s[34:35] 9876; GFX9-NEXT: s_waitcnt vmcnt(0) 9877; GFX9-NEXT: s_setpc_b64 s[30:31] 9878; 9879; GFX10-LABEL: test_call_external_void_func_v4i16_inreg: 9880; GFX10: ; %bb.0: 9881; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9882; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 9883; GFX10-NEXT: s_or_saveexec_b32 s34, -1 9884; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 9885; GFX10-NEXT: s_waitcnt_depctr 0xffe3 9886; GFX10-NEXT: s_mov_b32 exec_lo, s34 9887; GFX10-NEXT: v_writelane_b32 v40, s33, 4 9888; GFX10-NEXT: s_mov_b32 s33, s32 9889; GFX10-NEXT: s_addk_i32 s32, 0x200 9890; GFX10-NEXT: v_writelane_b32 v40, s4, 0 9891; GFX10-NEXT: v_writelane_b32 v40, s5, 1 9892; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 9893; GFX10-NEXT: s_getpc_b64 s[34:35] 9894; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4 9895; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12 9896; GFX10-NEXT: v_writelane_b32 v40, s30, 2 9897; GFX10-NEXT: v_writelane_b32 v40, s31, 3 9898; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 9899; GFX10-NEXT: v_readlane_b32 s31, v40, 3 9900; GFX10-NEXT: v_readlane_b32 s30, v40, 2 9901; GFX10-NEXT: v_readlane_b32 s5, v40, 1 9902; GFX10-NEXT: v_readlane_b32 s4, v40, 0 9903; GFX10-NEXT: s_addk_i32 s32, 0xfe00 9904; GFX10-NEXT: v_readlane_b32 s33, v40, 4 9905; GFX10-NEXT: s_or_saveexec_b32 s34, -1 9906; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 9907; GFX10-NEXT: s_waitcnt_depctr 0xffe3 9908; GFX10-NEXT: s_mov_b32 exec_lo, s34 9909; GFX10-NEXT: s_waitcnt vmcnt(0) 9910; GFX10-NEXT: s_setpc_b64 s[30:31] 9911; 9912; GFX11-LABEL: test_call_external_void_func_v4i16_inreg: 9913; GFX11: ; %bb.0: 9914; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9915; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 9916; GFX11-NEXT: s_or_saveexec_b32 s0, -1 9917; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 9918; GFX11-NEXT: s_mov_b32 exec_lo, s0 9919; GFX11-NEXT: v_writelane_b32 v40, s33, 4 9920; GFX11-NEXT: s_mov_b32 s33, s32 9921; GFX11-NEXT: s_add_i32 s32, s32, 16 9922; GFX11-NEXT: v_writelane_b32 v40, s4, 0 9923; GFX11-NEXT: v_writelane_b32 v40, s5, 1 9924; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 9925; GFX11-NEXT: s_getpc_b64 s[0:1] 9926; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i16_inreg@rel32@lo+4 9927; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16_inreg@rel32@hi+12 9928; GFX11-NEXT: v_writelane_b32 v40, s30, 2 9929; GFX11-NEXT: v_writelane_b32 v40, s31, 3 9930; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 9931; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 9932; GFX11-NEXT: v_readlane_b32 s31, v40, 3 9933; GFX11-NEXT: v_readlane_b32 s30, v40, 2 9934; GFX11-NEXT: v_readlane_b32 s5, v40, 1 9935; GFX11-NEXT: v_readlane_b32 s4, v40, 0 9936; GFX11-NEXT: s_add_i32 s32, s32, -16 9937; GFX11-NEXT: v_readlane_b32 s33, v40, 4 9938; GFX11-NEXT: s_or_saveexec_b32 s0, -1 9939; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 9940; GFX11-NEXT: s_mov_b32 exec_lo, s0 9941; GFX11-NEXT: s_waitcnt vmcnt(0) 9942; GFX11-NEXT: s_setpc_b64 s[30:31] 9943; 9944; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16_inreg: 9945; GFX10-SCRATCH: ; %bb.0: 9946; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9947; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 9948; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 9949; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 9950; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 9951; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 9952; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 9953; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 9954; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 9955; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 9956; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 9957; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 9958; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 9959; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i16_inreg@rel32@lo+4 9960; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16_inreg@rel32@hi+12 9961; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 9962; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 9963; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 9964; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 9965; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 9966; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 9967; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 9968; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 9969; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 9970; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 9971; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 9972; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 9973; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 9974; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 9975; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 9976 %val = load <4 x i16>, <4 x i16> addrspace(4)* undef 9977 call amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> inreg %val) 9978 ret void 9979} 9980 9981define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { 9982; GFX9-LABEL: test_call_external_void_func_v4i16_imm_inreg: 9983; GFX9: ; %bb.0: 9984; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9985; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 9986; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 9987; GFX9-NEXT: s_mov_b64 exec, s[34:35] 9988; GFX9-NEXT: v_writelane_b32 v40, s33, 4 9989; GFX9-NEXT: v_writelane_b32 v40, s4, 0 9990; GFX9-NEXT: v_writelane_b32 v40, s5, 1 9991; GFX9-NEXT: s_mov_b32 s33, s32 9992; GFX9-NEXT: s_addk_i32 s32, 0x400 9993; GFX9-NEXT: v_writelane_b32 v40, s30, 2 9994; GFX9-NEXT: s_mov_b32 s4, 0x20001 9995; GFX9-NEXT: s_mov_b32 s5, 0x40003 9996; GFX9-NEXT: v_writelane_b32 v40, s31, 3 9997; GFX9-NEXT: s_getpc_b64 s[34:35] 9998; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4 9999; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12 10000; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 10001; GFX9-NEXT: v_readlane_b32 s31, v40, 3 10002; GFX9-NEXT: v_readlane_b32 s30, v40, 2 10003; GFX9-NEXT: v_readlane_b32 s5, v40, 1 10004; GFX9-NEXT: v_readlane_b32 s4, v40, 0 10005; GFX9-NEXT: s_addk_i32 s32, 0xfc00 10006; GFX9-NEXT: v_readlane_b32 s33, v40, 4 10007; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 10008; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 10009; GFX9-NEXT: s_mov_b64 exec, s[34:35] 10010; GFX9-NEXT: s_waitcnt vmcnt(0) 10011; GFX9-NEXT: s_setpc_b64 s[30:31] 10012; 10013; GFX10-LABEL: test_call_external_void_func_v4i16_imm_inreg: 10014; GFX10: ; %bb.0: 10015; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10016; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 10017; GFX10-NEXT: s_or_saveexec_b32 s34, -1 10018; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 10019; GFX10-NEXT: s_waitcnt_depctr 0xffe3 10020; GFX10-NEXT: s_mov_b32 exec_lo, s34 10021; GFX10-NEXT: v_writelane_b32 v40, s33, 4 10022; GFX10-NEXT: s_mov_b32 s33, s32 10023; GFX10-NEXT: s_addk_i32 s32, 0x200 10024; GFX10-NEXT: s_getpc_b64 s[34:35] 10025; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4 10026; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12 10027; GFX10-NEXT: v_writelane_b32 v40, s4, 0 10028; GFX10-NEXT: s_mov_b32 s4, 0x20001 10029; GFX10-NEXT: v_writelane_b32 v40, s5, 1 10030; GFX10-NEXT: s_mov_b32 s5, 0x40003 10031; GFX10-NEXT: v_writelane_b32 v40, s30, 2 10032; GFX10-NEXT: v_writelane_b32 v40, s31, 3 10033; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 10034; GFX10-NEXT: v_readlane_b32 s31, v40, 3 10035; GFX10-NEXT: v_readlane_b32 s30, v40, 2 10036; GFX10-NEXT: v_readlane_b32 s5, v40, 1 10037; GFX10-NEXT: v_readlane_b32 s4, v40, 0 10038; GFX10-NEXT: s_addk_i32 s32, 0xfe00 10039; GFX10-NEXT: v_readlane_b32 s33, v40, 4 10040; GFX10-NEXT: s_or_saveexec_b32 s34, -1 10041; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 10042; GFX10-NEXT: s_waitcnt_depctr 0xffe3 10043; GFX10-NEXT: s_mov_b32 exec_lo, s34 10044; GFX10-NEXT: s_waitcnt vmcnt(0) 10045; GFX10-NEXT: s_setpc_b64 s[30:31] 10046; 10047; GFX11-LABEL: test_call_external_void_func_v4i16_imm_inreg: 10048; GFX11: ; %bb.0: 10049; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10050; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 10051; GFX11-NEXT: s_or_saveexec_b32 s0, -1 10052; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 10053; GFX11-NEXT: s_mov_b32 exec_lo, s0 10054; GFX11-NEXT: v_writelane_b32 v40, s33, 4 10055; GFX11-NEXT: s_mov_b32 s33, s32 10056; GFX11-NEXT: s_add_i32 s32, s32, 16 10057; GFX11-NEXT: s_getpc_b64 s[0:1] 10058; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i16_inreg@rel32@lo+4 10059; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16_inreg@rel32@hi+12 10060; GFX11-NEXT: v_writelane_b32 v40, s4, 0 10061; GFX11-NEXT: s_mov_b32 s4, 0x20001 10062; GFX11-NEXT: v_writelane_b32 v40, s5, 1 10063; GFX11-NEXT: s_mov_b32 s5, 0x40003 10064; GFX11-NEXT: v_writelane_b32 v40, s30, 2 10065; GFX11-NEXT: v_writelane_b32 v40, s31, 3 10066; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 10067; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 10068; GFX11-NEXT: v_readlane_b32 s31, v40, 3 10069; GFX11-NEXT: v_readlane_b32 s30, v40, 2 10070; GFX11-NEXT: v_readlane_b32 s5, v40, 1 10071; GFX11-NEXT: v_readlane_b32 s4, v40, 0 10072; GFX11-NEXT: s_add_i32 s32, s32, -16 10073; GFX11-NEXT: v_readlane_b32 s33, v40, 4 10074; GFX11-NEXT: s_or_saveexec_b32 s0, -1 10075; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 10076; GFX11-NEXT: s_mov_b32 exec_lo, s0 10077; GFX11-NEXT: s_waitcnt vmcnt(0) 10078; GFX11-NEXT: s_setpc_b64 s[30:31] 10079; 10080; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16_imm_inreg: 10081; GFX10-SCRATCH: ; %bb.0: 10082; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10083; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 10084; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 10085; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 10086; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 10087; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 10088; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 10089; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 10090; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 10091; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 10092; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i16_inreg@rel32@lo+4 10093; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16_inreg@rel32@hi+12 10094; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 10095; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x20001 10096; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 10097; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0x40003 10098; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 10099; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 10100; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 10101; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 10102; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 10103; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 10104; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 10105; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 10106; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 10107; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 10108; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 10109; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 10110; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 10111; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 10112; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 10113 call amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> inreg <i16 1, i16 2, i16 3, i16 4>) 10114 ret void 10115} 10116 10117define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { 10118; GFX9-LABEL: test_call_external_void_func_v2f16_inreg: 10119; GFX9: ; %bb.0: 10120; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10121; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 10122; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 10123; GFX9-NEXT: s_mov_b64 exec, s[34:35] 10124; GFX9-NEXT: v_writelane_b32 v40, s33, 3 10125; GFX9-NEXT: v_writelane_b32 v40, s4, 0 10126; GFX9-NEXT: s_load_dword s4, s[34:35], 0x0 10127; GFX9-NEXT: s_mov_b32 s33, s32 10128; GFX9-NEXT: s_addk_i32 s32, 0x400 10129; GFX9-NEXT: v_writelane_b32 v40, s30, 1 10130; GFX9-NEXT: v_writelane_b32 v40, s31, 2 10131; GFX9-NEXT: s_getpc_b64 s[34:35] 10132; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f16_inreg@rel32@lo+4 10133; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f16_inreg@rel32@hi+12 10134; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 10135; GFX9-NEXT: v_readlane_b32 s31, v40, 2 10136; GFX9-NEXT: v_readlane_b32 s30, v40, 1 10137; GFX9-NEXT: v_readlane_b32 s4, v40, 0 10138; GFX9-NEXT: s_addk_i32 s32, 0xfc00 10139; GFX9-NEXT: v_readlane_b32 s33, v40, 3 10140; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 10141; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 10142; GFX9-NEXT: s_mov_b64 exec, s[34:35] 10143; GFX9-NEXT: s_waitcnt vmcnt(0) 10144; GFX9-NEXT: s_setpc_b64 s[30:31] 10145; 10146; GFX10-LABEL: test_call_external_void_func_v2f16_inreg: 10147; GFX10: ; %bb.0: 10148; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10149; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 10150; GFX10-NEXT: s_or_saveexec_b32 s34, -1 10151; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 10152; GFX10-NEXT: s_waitcnt_depctr 0xffe3 10153; GFX10-NEXT: s_mov_b32 exec_lo, s34 10154; GFX10-NEXT: v_writelane_b32 v40, s33, 3 10155; GFX10-NEXT: s_mov_b32 s33, s32 10156; GFX10-NEXT: s_addk_i32 s32, 0x200 10157; GFX10-NEXT: v_writelane_b32 v40, s4, 0 10158; GFX10-NEXT: s_load_dword s4, s[34:35], 0x0 10159; GFX10-NEXT: s_getpc_b64 s[34:35] 10160; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f16_inreg@rel32@lo+4 10161; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f16_inreg@rel32@hi+12 10162; GFX10-NEXT: v_writelane_b32 v40, s30, 1 10163; GFX10-NEXT: v_writelane_b32 v40, s31, 2 10164; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 10165; GFX10-NEXT: v_readlane_b32 s31, v40, 2 10166; GFX10-NEXT: v_readlane_b32 s30, v40, 1 10167; GFX10-NEXT: v_readlane_b32 s4, v40, 0 10168; GFX10-NEXT: s_addk_i32 s32, 0xfe00 10169; GFX10-NEXT: v_readlane_b32 s33, v40, 3 10170; GFX10-NEXT: s_or_saveexec_b32 s34, -1 10171; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 10172; GFX10-NEXT: s_waitcnt_depctr 0xffe3 10173; GFX10-NEXT: s_mov_b32 exec_lo, s34 10174; GFX10-NEXT: s_waitcnt vmcnt(0) 10175; GFX10-NEXT: s_setpc_b64 s[30:31] 10176; 10177; GFX11-LABEL: test_call_external_void_func_v2f16_inreg: 10178; GFX11: ; %bb.0: 10179; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10180; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 10181; GFX11-NEXT: s_or_saveexec_b32 s0, -1 10182; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 10183; GFX11-NEXT: s_mov_b32 exec_lo, s0 10184; GFX11-NEXT: v_writelane_b32 v40, s33, 3 10185; GFX11-NEXT: s_mov_b32 s33, s32 10186; GFX11-NEXT: s_add_i32 s32, s32, 16 10187; GFX11-NEXT: v_writelane_b32 v40, s4, 0 10188; GFX11-NEXT: s_load_b32 s4, s[0:1], 0x0 10189; GFX11-NEXT: s_getpc_b64 s[0:1] 10190; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2f16_inreg@rel32@lo+4 10191; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2f16_inreg@rel32@hi+12 10192; GFX11-NEXT: v_writelane_b32 v40, s30, 1 10193; GFX11-NEXT: v_writelane_b32 v40, s31, 2 10194; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 10195; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 10196; GFX11-NEXT: v_readlane_b32 s31, v40, 2 10197; GFX11-NEXT: v_readlane_b32 s30, v40, 1 10198; GFX11-NEXT: v_readlane_b32 s4, v40, 0 10199; GFX11-NEXT: s_add_i32 s32, s32, -16 10200; GFX11-NEXT: v_readlane_b32 s33, v40, 3 10201; GFX11-NEXT: s_or_saveexec_b32 s0, -1 10202; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 10203; GFX11-NEXT: s_mov_b32 exec_lo, s0 10204; GFX11-NEXT: s_waitcnt vmcnt(0) 10205; GFX11-NEXT: s_setpc_b64 s[30:31] 10206; 10207; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f16_inreg: 10208; GFX10-SCRATCH: ; %bb.0: 10209; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10210; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 10211; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 10212; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 10213; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 10214; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 10215; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 3 10216; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 10217; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 10218; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 10219; GFX10-SCRATCH-NEXT: s_load_dword s4, s[0:1], 0x0 10220; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 10221; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f16_inreg@rel32@lo+4 10222; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f16_inreg@rel32@hi+12 10223; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 10224; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 10225; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 10226; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 10227; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 10228; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 10229; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 10230; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 10231; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 10232; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 10233; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 10234; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 10235; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 10236; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 10237 %val = load <2 x half>, <2 x half> addrspace(4)* undef 10238 call amdgpu_gfx void @external_void_func_v2f16_inreg(<2 x half> inreg %val) 10239 ret void 10240} 10241 10242define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { 10243; GFX9-LABEL: test_call_external_void_func_v2i32_inreg: 10244; GFX9: ; %bb.0: 10245; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10246; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 10247; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 10248; GFX9-NEXT: s_mov_b64 exec, s[34:35] 10249; GFX9-NEXT: v_writelane_b32 v40, s33, 4 10250; GFX9-NEXT: v_writelane_b32 v40, s4, 0 10251; GFX9-NEXT: v_writelane_b32 v40, s5, 1 10252; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 10253; GFX9-NEXT: s_mov_b32 s33, s32 10254; GFX9-NEXT: s_addk_i32 s32, 0x400 10255; GFX9-NEXT: v_writelane_b32 v40, s30, 2 10256; GFX9-NEXT: v_writelane_b32 v40, s31, 3 10257; GFX9-NEXT: s_getpc_b64 s[34:35] 10258; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4 10259; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12 10260; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 10261; GFX9-NEXT: v_readlane_b32 s31, v40, 3 10262; GFX9-NEXT: v_readlane_b32 s30, v40, 2 10263; GFX9-NEXT: v_readlane_b32 s5, v40, 1 10264; GFX9-NEXT: v_readlane_b32 s4, v40, 0 10265; GFX9-NEXT: s_addk_i32 s32, 0xfc00 10266; GFX9-NEXT: v_readlane_b32 s33, v40, 4 10267; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 10268; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 10269; GFX9-NEXT: s_mov_b64 exec, s[34:35] 10270; GFX9-NEXT: s_waitcnt vmcnt(0) 10271; GFX9-NEXT: s_setpc_b64 s[30:31] 10272; 10273; GFX10-LABEL: test_call_external_void_func_v2i32_inreg: 10274; GFX10: ; %bb.0: 10275; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10276; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 10277; GFX10-NEXT: s_or_saveexec_b32 s34, -1 10278; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 10279; GFX10-NEXT: s_waitcnt_depctr 0xffe3 10280; GFX10-NEXT: s_mov_b32 exec_lo, s34 10281; GFX10-NEXT: v_writelane_b32 v40, s33, 4 10282; GFX10-NEXT: s_mov_b32 s33, s32 10283; GFX10-NEXT: s_addk_i32 s32, 0x200 10284; GFX10-NEXT: v_writelane_b32 v40, s4, 0 10285; GFX10-NEXT: v_writelane_b32 v40, s5, 1 10286; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 10287; GFX10-NEXT: s_getpc_b64 s[34:35] 10288; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4 10289; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12 10290; GFX10-NEXT: v_writelane_b32 v40, s30, 2 10291; GFX10-NEXT: v_writelane_b32 v40, s31, 3 10292; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 10293; GFX10-NEXT: v_readlane_b32 s31, v40, 3 10294; GFX10-NEXT: v_readlane_b32 s30, v40, 2 10295; GFX10-NEXT: v_readlane_b32 s5, v40, 1 10296; GFX10-NEXT: v_readlane_b32 s4, v40, 0 10297; GFX10-NEXT: s_addk_i32 s32, 0xfe00 10298; GFX10-NEXT: v_readlane_b32 s33, v40, 4 10299; GFX10-NEXT: s_or_saveexec_b32 s34, -1 10300; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 10301; GFX10-NEXT: s_waitcnt_depctr 0xffe3 10302; GFX10-NEXT: s_mov_b32 exec_lo, s34 10303; GFX10-NEXT: s_waitcnt vmcnt(0) 10304; GFX10-NEXT: s_setpc_b64 s[30:31] 10305; 10306; GFX11-LABEL: test_call_external_void_func_v2i32_inreg: 10307; GFX11: ; %bb.0: 10308; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10309; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 10310; GFX11-NEXT: s_or_saveexec_b32 s0, -1 10311; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 10312; GFX11-NEXT: s_mov_b32 exec_lo, s0 10313; GFX11-NEXT: v_writelane_b32 v40, s33, 4 10314; GFX11-NEXT: s_mov_b32 s33, s32 10315; GFX11-NEXT: s_add_i32 s32, s32, 16 10316; GFX11-NEXT: v_writelane_b32 v40, s4, 0 10317; GFX11-NEXT: v_writelane_b32 v40, s5, 1 10318; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 10319; GFX11-NEXT: s_getpc_b64 s[0:1] 10320; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i32_inreg@rel32@lo+4 10321; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32_inreg@rel32@hi+12 10322; GFX11-NEXT: v_writelane_b32 v40, s30, 2 10323; GFX11-NEXT: v_writelane_b32 v40, s31, 3 10324; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 10325; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 10326; GFX11-NEXT: v_readlane_b32 s31, v40, 3 10327; GFX11-NEXT: v_readlane_b32 s30, v40, 2 10328; GFX11-NEXT: v_readlane_b32 s5, v40, 1 10329; GFX11-NEXT: v_readlane_b32 s4, v40, 0 10330; GFX11-NEXT: s_add_i32 s32, s32, -16 10331; GFX11-NEXT: v_readlane_b32 s33, v40, 4 10332; GFX11-NEXT: s_or_saveexec_b32 s0, -1 10333; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 10334; GFX11-NEXT: s_mov_b32 exec_lo, s0 10335; GFX11-NEXT: s_waitcnt vmcnt(0) 10336; GFX11-NEXT: s_setpc_b64 s[30:31] 10337; 10338; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32_inreg: 10339; GFX10-SCRATCH: ; %bb.0: 10340; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10341; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 10342; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 10343; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 10344; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 10345; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 10346; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 10347; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 10348; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 10349; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 10350; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 10351; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 10352; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 10353; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i32_inreg@rel32@lo+4 10354; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32_inreg@rel32@hi+12 10355; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 10356; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 10357; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 10358; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 10359; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 10360; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 10361; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 10362; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 10363; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 10364; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 10365; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 10366; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 10367; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 10368; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 10369; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 10370 %val = load <2 x i32>, <2 x i32> addrspace(4)* undef 10371 call amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> inreg %val) 10372 ret void 10373} 10374 10375define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { 10376; GFX9-LABEL: test_call_external_void_func_v2i32_imm_inreg: 10377; GFX9: ; %bb.0: 10378; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10379; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 10380; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 10381; GFX9-NEXT: s_mov_b64 exec, s[34:35] 10382; GFX9-NEXT: v_writelane_b32 v40, s33, 4 10383; GFX9-NEXT: v_writelane_b32 v40, s4, 0 10384; GFX9-NEXT: v_writelane_b32 v40, s5, 1 10385; GFX9-NEXT: s_mov_b32 s33, s32 10386; GFX9-NEXT: s_addk_i32 s32, 0x400 10387; GFX9-NEXT: v_writelane_b32 v40, s30, 2 10388; GFX9-NEXT: s_mov_b32 s4, 1 10389; GFX9-NEXT: s_mov_b32 s5, 2 10390; GFX9-NEXT: v_writelane_b32 v40, s31, 3 10391; GFX9-NEXT: s_getpc_b64 s[34:35] 10392; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4 10393; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12 10394; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 10395; GFX9-NEXT: v_readlane_b32 s31, v40, 3 10396; GFX9-NEXT: v_readlane_b32 s30, v40, 2 10397; GFX9-NEXT: v_readlane_b32 s5, v40, 1 10398; GFX9-NEXT: v_readlane_b32 s4, v40, 0 10399; GFX9-NEXT: s_addk_i32 s32, 0xfc00 10400; GFX9-NEXT: v_readlane_b32 s33, v40, 4 10401; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 10402; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 10403; GFX9-NEXT: s_mov_b64 exec, s[34:35] 10404; GFX9-NEXT: s_waitcnt vmcnt(0) 10405; GFX9-NEXT: s_setpc_b64 s[30:31] 10406; 10407; GFX10-LABEL: test_call_external_void_func_v2i32_imm_inreg: 10408; GFX10: ; %bb.0: 10409; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10410; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 10411; GFX10-NEXT: s_or_saveexec_b32 s34, -1 10412; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 10413; GFX10-NEXT: s_waitcnt_depctr 0xffe3 10414; GFX10-NEXT: s_mov_b32 exec_lo, s34 10415; GFX10-NEXT: v_writelane_b32 v40, s33, 4 10416; GFX10-NEXT: s_mov_b32 s33, s32 10417; GFX10-NEXT: s_addk_i32 s32, 0x200 10418; GFX10-NEXT: s_getpc_b64 s[34:35] 10419; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4 10420; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12 10421; GFX10-NEXT: v_writelane_b32 v40, s4, 0 10422; GFX10-NEXT: s_mov_b32 s4, 1 10423; GFX10-NEXT: v_writelane_b32 v40, s5, 1 10424; GFX10-NEXT: s_mov_b32 s5, 2 10425; GFX10-NEXT: v_writelane_b32 v40, s30, 2 10426; GFX10-NEXT: v_writelane_b32 v40, s31, 3 10427; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 10428; GFX10-NEXT: v_readlane_b32 s31, v40, 3 10429; GFX10-NEXT: v_readlane_b32 s30, v40, 2 10430; GFX10-NEXT: v_readlane_b32 s5, v40, 1 10431; GFX10-NEXT: v_readlane_b32 s4, v40, 0 10432; GFX10-NEXT: s_addk_i32 s32, 0xfe00 10433; GFX10-NEXT: v_readlane_b32 s33, v40, 4 10434; GFX10-NEXT: s_or_saveexec_b32 s34, -1 10435; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 10436; GFX10-NEXT: s_waitcnt_depctr 0xffe3 10437; GFX10-NEXT: s_mov_b32 exec_lo, s34 10438; GFX10-NEXT: s_waitcnt vmcnt(0) 10439; GFX10-NEXT: s_setpc_b64 s[30:31] 10440; 10441; GFX11-LABEL: test_call_external_void_func_v2i32_imm_inreg: 10442; GFX11: ; %bb.0: 10443; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10444; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 10445; GFX11-NEXT: s_or_saveexec_b32 s0, -1 10446; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 10447; GFX11-NEXT: s_mov_b32 exec_lo, s0 10448; GFX11-NEXT: v_writelane_b32 v40, s33, 4 10449; GFX11-NEXT: s_mov_b32 s33, s32 10450; GFX11-NEXT: s_add_i32 s32, s32, 16 10451; GFX11-NEXT: s_getpc_b64 s[0:1] 10452; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i32_inreg@rel32@lo+4 10453; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32_inreg@rel32@hi+12 10454; GFX11-NEXT: v_writelane_b32 v40, s4, 0 10455; GFX11-NEXT: s_mov_b32 s4, 1 10456; GFX11-NEXT: v_writelane_b32 v40, s5, 1 10457; GFX11-NEXT: s_mov_b32 s5, 2 10458; GFX11-NEXT: v_writelane_b32 v40, s30, 2 10459; GFX11-NEXT: v_writelane_b32 v40, s31, 3 10460; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 10461; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 10462; GFX11-NEXT: v_readlane_b32 s31, v40, 3 10463; GFX11-NEXT: v_readlane_b32 s30, v40, 2 10464; GFX11-NEXT: v_readlane_b32 s5, v40, 1 10465; GFX11-NEXT: v_readlane_b32 s4, v40, 0 10466; GFX11-NEXT: s_add_i32 s32, s32, -16 10467; GFX11-NEXT: v_readlane_b32 s33, v40, 4 10468; GFX11-NEXT: s_or_saveexec_b32 s0, -1 10469; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 10470; GFX11-NEXT: s_mov_b32 exec_lo, s0 10471; GFX11-NEXT: s_waitcnt vmcnt(0) 10472; GFX11-NEXT: s_setpc_b64 s[30:31] 10473; 10474; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32_imm_inreg: 10475; GFX10-SCRATCH: ; %bb.0: 10476; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10477; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 10478; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 10479; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 10480; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 10481; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 10482; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 10483; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 10484; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 10485; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 10486; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i32_inreg@rel32@lo+4 10487; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32_inreg@rel32@hi+12 10488; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 10489; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 10490; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 10491; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 10492; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 10493; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 10494; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 10495; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 10496; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 10497; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 10498; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 10499; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 10500; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 10501; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 10502; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 10503; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 10504; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 10505; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 10506; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 10507 call amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> inreg <i32 1, i32 2>) 10508 ret void 10509} 10510 10511define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { 10512; GFX9-LABEL: test_call_external_void_func_v3i32_imm_inreg: 10513; GFX9: ; %bb.0: 10514; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10515; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 10516; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 10517; GFX9-NEXT: s_mov_b64 exec, s[34:35] 10518; GFX9-NEXT: v_writelane_b32 v40, s33, 5 10519; GFX9-NEXT: v_writelane_b32 v40, s4, 0 10520; GFX9-NEXT: v_writelane_b32 v40, s5, 1 10521; GFX9-NEXT: v_writelane_b32 v40, s6, 2 10522; GFX9-NEXT: s_mov_b32 s33, s32 10523; GFX9-NEXT: s_addk_i32 s32, 0x400 10524; GFX9-NEXT: v_writelane_b32 v40, s30, 3 10525; GFX9-NEXT: s_mov_b32 s4, 3 10526; GFX9-NEXT: s_mov_b32 s5, 4 10527; GFX9-NEXT: s_mov_b32 s6, 5 10528; GFX9-NEXT: v_writelane_b32 v40, s31, 4 10529; GFX9-NEXT: s_getpc_b64 s[34:35] 10530; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_inreg@rel32@lo+4 10531; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_inreg@rel32@hi+12 10532; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 10533; GFX9-NEXT: v_readlane_b32 s31, v40, 4 10534; GFX9-NEXT: v_readlane_b32 s30, v40, 3 10535; GFX9-NEXT: v_readlane_b32 s6, v40, 2 10536; GFX9-NEXT: v_readlane_b32 s5, v40, 1 10537; GFX9-NEXT: v_readlane_b32 s4, v40, 0 10538; GFX9-NEXT: s_addk_i32 s32, 0xfc00 10539; GFX9-NEXT: v_readlane_b32 s33, v40, 5 10540; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 10541; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 10542; GFX9-NEXT: s_mov_b64 exec, s[34:35] 10543; GFX9-NEXT: s_waitcnt vmcnt(0) 10544; GFX9-NEXT: s_setpc_b64 s[30:31] 10545; 10546; GFX10-LABEL: test_call_external_void_func_v3i32_imm_inreg: 10547; GFX10: ; %bb.0: 10548; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10549; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 10550; GFX10-NEXT: s_or_saveexec_b32 s34, -1 10551; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 10552; GFX10-NEXT: s_waitcnt_depctr 0xffe3 10553; GFX10-NEXT: s_mov_b32 exec_lo, s34 10554; GFX10-NEXT: v_writelane_b32 v40, s33, 5 10555; GFX10-NEXT: s_mov_b32 s33, s32 10556; GFX10-NEXT: s_addk_i32 s32, 0x200 10557; GFX10-NEXT: s_getpc_b64 s[34:35] 10558; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_inreg@rel32@lo+4 10559; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_inreg@rel32@hi+12 10560; GFX10-NEXT: v_writelane_b32 v40, s4, 0 10561; GFX10-NEXT: s_mov_b32 s4, 3 10562; GFX10-NEXT: v_writelane_b32 v40, s5, 1 10563; GFX10-NEXT: s_mov_b32 s5, 4 10564; GFX10-NEXT: v_writelane_b32 v40, s6, 2 10565; GFX10-NEXT: s_mov_b32 s6, 5 10566; GFX10-NEXT: v_writelane_b32 v40, s30, 3 10567; GFX10-NEXT: v_writelane_b32 v40, s31, 4 10568; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 10569; GFX10-NEXT: v_readlane_b32 s31, v40, 4 10570; GFX10-NEXT: v_readlane_b32 s30, v40, 3 10571; GFX10-NEXT: v_readlane_b32 s6, v40, 2 10572; GFX10-NEXT: v_readlane_b32 s5, v40, 1 10573; GFX10-NEXT: v_readlane_b32 s4, v40, 0 10574; GFX10-NEXT: s_addk_i32 s32, 0xfe00 10575; GFX10-NEXT: v_readlane_b32 s33, v40, 5 10576; GFX10-NEXT: s_or_saveexec_b32 s34, -1 10577; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 10578; GFX10-NEXT: s_waitcnt_depctr 0xffe3 10579; GFX10-NEXT: s_mov_b32 exec_lo, s34 10580; GFX10-NEXT: s_waitcnt vmcnt(0) 10581; GFX10-NEXT: s_setpc_b64 s[30:31] 10582; 10583; GFX11-LABEL: test_call_external_void_func_v3i32_imm_inreg: 10584; GFX11: ; %bb.0: 10585; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10586; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 10587; GFX11-NEXT: s_or_saveexec_b32 s0, -1 10588; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 10589; GFX11-NEXT: s_mov_b32 exec_lo, s0 10590; GFX11-NEXT: v_writelane_b32 v40, s33, 5 10591; GFX11-NEXT: s_mov_b32 s33, s32 10592; GFX11-NEXT: s_add_i32 s32, s32, 16 10593; GFX11-NEXT: s_getpc_b64 s[0:1] 10594; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i32_inreg@rel32@lo+4 10595; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_inreg@rel32@hi+12 10596; GFX11-NEXT: v_writelane_b32 v40, s4, 0 10597; GFX11-NEXT: s_mov_b32 s4, 3 10598; GFX11-NEXT: v_writelane_b32 v40, s5, 1 10599; GFX11-NEXT: s_mov_b32 s5, 4 10600; GFX11-NEXT: v_writelane_b32 v40, s6, 2 10601; GFX11-NEXT: s_mov_b32 s6, 5 10602; GFX11-NEXT: v_writelane_b32 v40, s30, 3 10603; GFX11-NEXT: v_writelane_b32 v40, s31, 4 10604; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 10605; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 10606; GFX11-NEXT: v_readlane_b32 s31, v40, 4 10607; GFX11-NEXT: v_readlane_b32 s30, v40, 3 10608; GFX11-NEXT: v_readlane_b32 s6, v40, 2 10609; GFX11-NEXT: v_readlane_b32 s5, v40, 1 10610; GFX11-NEXT: v_readlane_b32 s4, v40, 0 10611; GFX11-NEXT: s_add_i32 s32, s32, -16 10612; GFX11-NEXT: v_readlane_b32 s33, v40, 5 10613; GFX11-NEXT: s_or_saveexec_b32 s0, -1 10614; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 10615; GFX11-NEXT: s_mov_b32 exec_lo, s0 10616; GFX11-NEXT: s_waitcnt vmcnt(0) 10617; GFX11-NEXT: s_setpc_b64 s[30:31] 10618; 10619; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_imm_inreg: 10620; GFX10-SCRATCH: ; %bb.0: 10621; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10622; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 10623; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 10624; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 10625; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 10626; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 10627; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 5 10628; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 10629; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 10630; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 10631; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i32_inreg@rel32@lo+4 10632; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_inreg@rel32@hi+12 10633; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 10634; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 3 10635; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 10636; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 4 10637; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 10638; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 5 10639; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 3 10640; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 4 10641; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 10642; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 4 10643; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 3 10644; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 10645; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 10646; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 10647; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 10648; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 5 10649; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 10650; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 10651; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 10652; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 10653; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 10654; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 10655 call amdgpu_gfx void @external_void_func_v3i32_inreg(<3 x i32> inreg <i32 3, i32 4, i32 5>) 10656 ret void 10657} 10658 10659define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { 10660; GFX9-LABEL: test_call_external_void_func_v3i32_i32_inreg: 10661; GFX9: ; %bb.0: 10662; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10663; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 10664; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 10665; GFX9-NEXT: s_mov_b64 exec, s[34:35] 10666; GFX9-NEXT: v_writelane_b32 v40, s33, 6 10667; GFX9-NEXT: v_writelane_b32 v40, s4, 0 10668; GFX9-NEXT: v_writelane_b32 v40, s5, 1 10669; GFX9-NEXT: v_writelane_b32 v40, s6, 2 10670; GFX9-NEXT: v_writelane_b32 v40, s7, 3 10671; GFX9-NEXT: s_mov_b32 s33, s32 10672; GFX9-NEXT: s_addk_i32 s32, 0x400 10673; GFX9-NEXT: v_writelane_b32 v40, s30, 4 10674; GFX9-NEXT: s_mov_b32 s4, 3 10675; GFX9-NEXT: s_mov_b32 s5, 4 10676; GFX9-NEXT: s_mov_b32 s6, 5 10677; GFX9-NEXT: s_mov_b32 s7, 6 10678; GFX9-NEXT: v_writelane_b32 v40, s31, 5 10679; GFX9-NEXT: s_getpc_b64 s[34:35] 10680; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_i32_inreg@rel32@lo+4 10681; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_i32_inreg@rel32@hi+12 10682; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 10683; GFX9-NEXT: v_readlane_b32 s31, v40, 5 10684; GFX9-NEXT: v_readlane_b32 s30, v40, 4 10685; GFX9-NEXT: v_readlane_b32 s7, v40, 3 10686; GFX9-NEXT: v_readlane_b32 s6, v40, 2 10687; GFX9-NEXT: v_readlane_b32 s5, v40, 1 10688; GFX9-NEXT: v_readlane_b32 s4, v40, 0 10689; GFX9-NEXT: s_addk_i32 s32, 0xfc00 10690; GFX9-NEXT: v_readlane_b32 s33, v40, 6 10691; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 10692; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 10693; GFX9-NEXT: s_mov_b64 exec, s[34:35] 10694; GFX9-NEXT: s_waitcnt vmcnt(0) 10695; GFX9-NEXT: s_setpc_b64 s[30:31] 10696; 10697; GFX10-LABEL: test_call_external_void_func_v3i32_i32_inreg: 10698; GFX10: ; %bb.0: 10699; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10700; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 10701; GFX10-NEXT: s_or_saveexec_b32 s34, -1 10702; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 10703; GFX10-NEXT: s_waitcnt_depctr 0xffe3 10704; GFX10-NEXT: s_mov_b32 exec_lo, s34 10705; GFX10-NEXT: v_writelane_b32 v40, s33, 6 10706; GFX10-NEXT: s_mov_b32 s33, s32 10707; GFX10-NEXT: s_addk_i32 s32, 0x200 10708; GFX10-NEXT: s_getpc_b64 s[34:35] 10709; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_i32_inreg@rel32@lo+4 10710; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_i32_inreg@rel32@hi+12 10711; GFX10-NEXT: v_writelane_b32 v40, s4, 0 10712; GFX10-NEXT: s_mov_b32 s4, 3 10713; GFX10-NEXT: v_writelane_b32 v40, s5, 1 10714; GFX10-NEXT: s_mov_b32 s5, 4 10715; GFX10-NEXT: v_writelane_b32 v40, s6, 2 10716; GFX10-NEXT: s_mov_b32 s6, 5 10717; GFX10-NEXT: v_writelane_b32 v40, s7, 3 10718; GFX10-NEXT: s_mov_b32 s7, 6 10719; GFX10-NEXT: v_writelane_b32 v40, s30, 4 10720; GFX10-NEXT: v_writelane_b32 v40, s31, 5 10721; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 10722; GFX10-NEXT: v_readlane_b32 s31, v40, 5 10723; GFX10-NEXT: v_readlane_b32 s30, v40, 4 10724; GFX10-NEXT: v_readlane_b32 s7, v40, 3 10725; GFX10-NEXT: v_readlane_b32 s6, v40, 2 10726; GFX10-NEXT: v_readlane_b32 s5, v40, 1 10727; GFX10-NEXT: v_readlane_b32 s4, v40, 0 10728; GFX10-NEXT: s_addk_i32 s32, 0xfe00 10729; GFX10-NEXT: v_readlane_b32 s33, v40, 6 10730; GFX10-NEXT: s_or_saveexec_b32 s34, -1 10731; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 10732; GFX10-NEXT: s_waitcnt_depctr 0xffe3 10733; GFX10-NEXT: s_mov_b32 exec_lo, s34 10734; GFX10-NEXT: s_waitcnt vmcnt(0) 10735; GFX10-NEXT: s_setpc_b64 s[30:31] 10736; 10737; GFX11-LABEL: test_call_external_void_func_v3i32_i32_inreg: 10738; GFX11: ; %bb.0: 10739; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10740; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 10741; GFX11-NEXT: s_or_saveexec_b32 s0, -1 10742; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 10743; GFX11-NEXT: s_mov_b32 exec_lo, s0 10744; GFX11-NEXT: v_writelane_b32 v40, s33, 6 10745; GFX11-NEXT: s_mov_b32 s33, s32 10746; GFX11-NEXT: s_add_i32 s32, s32, 16 10747; GFX11-NEXT: s_getpc_b64 s[0:1] 10748; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i32_i32_inreg@rel32@lo+4 10749; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_i32_inreg@rel32@hi+12 10750; GFX11-NEXT: v_writelane_b32 v40, s4, 0 10751; GFX11-NEXT: s_mov_b32 s4, 3 10752; GFX11-NEXT: v_writelane_b32 v40, s5, 1 10753; GFX11-NEXT: s_mov_b32 s5, 4 10754; GFX11-NEXT: v_writelane_b32 v40, s6, 2 10755; GFX11-NEXT: s_mov_b32 s6, 5 10756; GFX11-NEXT: v_writelane_b32 v40, s7, 3 10757; GFX11-NEXT: s_mov_b32 s7, 6 10758; GFX11-NEXT: v_writelane_b32 v40, s30, 4 10759; GFX11-NEXT: v_writelane_b32 v40, s31, 5 10760; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 10761; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 10762; GFX11-NEXT: v_readlane_b32 s31, v40, 5 10763; GFX11-NEXT: v_readlane_b32 s30, v40, 4 10764; GFX11-NEXT: v_readlane_b32 s7, v40, 3 10765; GFX11-NEXT: v_readlane_b32 s6, v40, 2 10766; GFX11-NEXT: v_readlane_b32 s5, v40, 1 10767; GFX11-NEXT: v_readlane_b32 s4, v40, 0 10768; GFX11-NEXT: s_add_i32 s32, s32, -16 10769; GFX11-NEXT: v_readlane_b32 s33, v40, 6 10770; GFX11-NEXT: s_or_saveexec_b32 s0, -1 10771; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 10772; GFX11-NEXT: s_mov_b32 exec_lo, s0 10773; GFX11-NEXT: s_waitcnt vmcnt(0) 10774; GFX11-NEXT: s_setpc_b64 s[30:31] 10775; 10776; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_i32_inreg: 10777; GFX10-SCRATCH: ; %bb.0: 10778; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10779; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 10780; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 10781; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 10782; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 10783; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 10784; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 6 10785; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 10786; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 10787; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 10788; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i32_i32_inreg@rel32@lo+4 10789; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_i32_inreg@rel32@hi+12 10790; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 10791; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 3 10792; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 10793; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 4 10794; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 10795; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 5 10796; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 10797; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 6 10798; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 10799; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 10800; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 10801; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 10802; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 10803; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 10804; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 10805; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 10806; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 10807; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 10808; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 6 10809; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 10810; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 10811; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 10812; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 10813; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 10814; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 10815 call amdgpu_gfx void @external_void_func_v3i32_i32_inreg(<3 x i32> inreg <i32 3, i32 4, i32 5>, i32 inreg 6) 10816 ret void 10817} 10818 10819define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { 10820; GFX9-LABEL: test_call_external_void_func_v4i32_inreg: 10821; GFX9: ; %bb.0: 10822; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10823; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 10824; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 10825; GFX9-NEXT: s_mov_b64 exec, s[34:35] 10826; GFX9-NEXT: v_writelane_b32 v40, s33, 6 10827; GFX9-NEXT: v_writelane_b32 v40, s4, 0 10828; GFX9-NEXT: v_writelane_b32 v40, s5, 1 10829; GFX9-NEXT: v_writelane_b32 v40, s6, 2 10830; GFX9-NEXT: v_writelane_b32 v40, s7, 3 10831; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 10832; GFX9-NEXT: s_mov_b32 s33, s32 10833; GFX9-NEXT: s_addk_i32 s32, 0x400 10834; GFX9-NEXT: v_writelane_b32 v40, s30, 4 10835; GFX9-NEXT: v_writelane_b32 v40, s31, 5 10836; GFX9-NEXT: s_getpc_b64 s[34:35] 10837; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4 10838; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12 10839; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 10840; GFX9-NEXT: v_readlane_b32 s31, v40, 5 10841; GFX9-NEXT: v_readlane_b32 s30, v40, 4 10842; GFX9-NEXT: v_readlane_b32 s7, v40, 3 10843; GFX9-NEXT: v_readlane_b32 s6, v40, 2 10844; GFX9-NEXT: v_readlane_b32 s5, v40, 1 10845; GFX9-NEXT: v_readlane_b32 s4, v40, 0 10846; GFX9-NEXT: s_addk_i32 s32, 0xfc00 10847; GFX9-NEXT: v_readlane_b32 s33, v40, 6 10848; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 10849; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 10850; GFX9-NEXT: s_mov_b64 exec, s[34:35] 10851; GFX9-NEXT: s_waitcnt vmcnt(0) 10852; GFX9-NEXT: s_setpc_b64 s[30:31] 10853; 10854; GFX10-LABEL: test_call_external_void_func_v4i32_inreg: 10855; GFX10: ; %bb.0: 10856; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10857; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 10858; GFX10-NEXT: s_or_saveexec_b32 s34, -1 10859; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 10860; GFX10-NEXT: s_waitcnt_depctr 0xffe3 10861; GFX10-NEXT: s_mov_b32 exec_lo, s34 10862; GFX10-NEXT: v_writelane_b32 v40, s33, 6 10863; GFX10-NEXT: s_mov_b32 s33, s32 10864; GFX10-NEXT: s_addk_i32 s32, 0x200 10865; GFX10-NEXT: v_writelane_b32 v40, s4, 0 10866; GFX10-NEXT: v_writelane_b32 v40, s5, 1 10867; GFX10-NEXT: v_writelane_b32 v40, s6, 2 10868; GFX10-NEXT: v_writelane_b32 v40, s7, 3 10869; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 10870; GFX10-NEXT: s_getpc_b64 s[34:35] 10871; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4 10872; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12 10873; GFX10-NEXT: v_writelane_b32 v40, s30, 4 10874; GFX10-NEXT: v_writelane_b32 v40, s31, 5 10875; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 10876; GFX10-NEXT: v_readlane_b32 s31, v40, 5 10877; GFX10-NEXT: v_readlane_b32 s30, v40, 4 10878; GFX10-NEXT: v_readlane_b32 s7, v40, 3 10879; GFX10-NEXT: v_readlane_b32 s6, v40, 2 10880; GFX10-NEXT: v_readlane_b32 s5, v40, 1 10881; GFX10-NEXT: v_readlane_b32 s4, v40, 0 10882; GFX10-NEXT: s_addk_i32 s32, 0xfe00 10883; GFX10-NEXT: v_readlane_b32 s33, v40, 6 10884; GFX10-NEXT: s_or_saveexec_b32 s34, -1 10885; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 10886; GFX10-NEXT: s_waitcnt_depctr 0xffe3 10887; GFX10-NEXT: s_mov_b32 exec_lo, s34 10888; GFX10-NEXT: s_waitcnt vmcnt(0) 10889; GFX10-NEXT: s_setpc_b64 s[30:31] 10890; 10891; GFX11-LABEL: test_call_external_void_func_v4i32_inreg: 10892; GFX11: ; %bb.0: 10893; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10894; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 10895; GFX11-NEXT: s_or_saveexec_b32 s0, -1 10896; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 10897; GFX11-NEXT: s_mov_b32 exec_lo, s0 10898; GFX11-NEXT: v_writelane_b32 v40, s33, 6 10899; GFX11-NEXT: s_mov_b32 s33, s32 10900; GFX11-NEXT: s_add_i32 s32, s32, 16 10901; GFX11-NEXT: v_writelane_b32 v40, s4, 0 10902; GFX11-NEXT: v_writelane_b32 v40, s5, 1 10903; GFX11-NEXT: v_writelane_b32 v40, s6, 2 10904; GFX11-NEXT: v_writelane_b32 v40, s7, 3 10905; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0 10906; GFX11-NEXT: s_getpc_b64 s[0:1] 10907; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i32_inreg@rel32@lo+4 10908; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32_inreg@rel32@hi+12 10909; GFX11-NEXT: v_writelane_b32 v40, s30, 4 10910; GFX11-NEXT: v_writelane_b32 v40, s31, 5 10911; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 10912; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 10913; GFX11-NEXT: v_readlane_b32 s31, v40, 5 10914; GFX11-NEXT: v_readlane_b32 s30, v40, 4 10915; GFX11-NEXT: v_readlane_b32 s7, v40, 3 10916; GFX11-NEXT: v_readlane_b32 s6, v40, 2 10917; GFX11-NEXT: v_readlane_b32 s5, v40, 1 10918; GFX11-NEXT: v_readlane_b32 s4, v40, 0 10919; GFX11-NEXT: s_add_i32 s32, s32, -16 10920; GFX11-NEXT: v_readlane_b32 s33, v40, 6 10921; GFX11-NEXT: s_or_saveexec_b32 s0, -1 10922; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 10923; GFX11-NEXT: s_mov_b32 exec_lo, s0 10924; GFX11-NEXT: s_waitcnt vmcnt(0) 10925; GFX11-NEXT: s_setpc_b64 s[30:31] 10926; 10927; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32_inreg: 10928; GFX10-SCRATCH: ; %bb.0: 10929; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10930; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 10931; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 10932; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 10933; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 10934; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 10935; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 6 10936; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 10937; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 10938; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 10939; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 10940; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 10941; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 10942; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 10943; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 10944; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i32_inreg@rel32@lo+4 10945; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32_inreg@rel32@hi+12 10946; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 10947; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 10948; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 10949; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 10950; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 10951; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 10952; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 10953; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 10954; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 10955; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 10956; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 6 10957; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 10958; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 10959; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 10960; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 10961; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 10962; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 10963 %val = load <4 x i32>, <4 x i32> addrspace(4)* undef 10964 call amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> inreg %val) 10965 ret void 10966} 10967 10968define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { 10969; GFX9-LABEL: test_call_external_void_func_v4i32_imm_inreg: 10970; GFX9: ; %bb.0: 10971; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10972; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 10973; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 10974; GFX9-NEXT: s_mov_b64 exec, s[34:35] 10975; GFX9-NEXT: v_writelane_b32 v40, s33, 6 10976; GFX9-NEXT: v_writelane_b32 v40, s4, 0 10977; GFX9-NEXT: v_writelane_b32 v40, s5, 1 10978; GFX9-NEXT: v_writelane_b32 v40, s6, 2 10979; GFX9-NEXT: v_writelane_b32 v40, s7, 3 10980; GFX9-NEXT: s_mov_b32 s33, s32 10981; GFX9-NEXT: s_addk_i32 s32, 0x400 10982; GFX9-NEXT: v_writelane_b32 v40, s30, 4 10983; GFX9-NEXT: s_mov_b32 s4, 1 10984; GFX9-NEXT: s_mov_b32 s5, 2 10985; GFX9-NEXT: s_mov_b32 s6, 3 10986; GFX9-NEXT: s_mov_b32 s7, 4 10987; GFX9-NEXT: v_writelane_b32 v40, s31, 5 10988; GFX9-NEXT: s_getpc_b64 s[34:35] 10989; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4 10990; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12 10991; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 10992; GFX9-NEXT: v_readlane_b32 s31, v40, 5 10993; GFX9-NEXT: v_readlane_b32 s30, v40, 4 10994; GFX9-NEXT: v_readlane_b32 s7, v40, 3 10995; GFX9-NEXT: v_readlane_b32 s6, v40, 2 10996; GFX9-NEXT: v_readlane_b32 s5, v40, 1 10997; GFX9-NEXT: v_readlane_b32 s4, v40, 0 10998; GFX9-NEXT: s_addk_i32 s32, 0xfc00 10999; GFX9-NEXT: v_readlane_b32 s33, v40, 6 11000; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 11001; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 11002; GFX9-NEXT: s_mov_b64 exec, s[34:35] 11003; GFX9-NEXT: s_waitcnt vmcnt(0) 11004; GFX9-NEXT: s_setpc_b64 s[30:31] 11005; 11006; GFX10-LABEL: test_call_external_void_func_v4i32_imm_inreg: 11007; GFX10: ; %bb.0: 11008; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11009; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 11010; GFX10-NEXT: s_or_saveexec_b32 s34, -1 11011; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 11012; GFX10-NEXT: s_waitcnt_depctr 0xffe3 11013; GFX10-NEXT: s_mov_b32 exec_lo, s34 11014; GFX10-NEXT: v_writelane_b32 v40, s33, 6 11015; GFX10-NEXT: s_mov_b32 s33, s32 11016; GFX10-NEXT: s_addk_i32 s32, 0x200 11017; GFX10-NEXT: s_getpc_b64 s[34:35] 11018; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4 11019; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12 11020; GFX10-NEXT: v_writelane_b32 v40, s4, 0 11021; GFX10-NEXT: s_mov_b32 s4, 1 11022; GFX10-NEXT: v_writelane_b32 v40, s5, 1 11023; GFX10-NEXT: s_mov_b32 s5, 2 11024; GFX10-NEXT: v_writelane_b32 v40, s6, 2 11025; GFX10-NEXT: s_mov_b32 s6, 3 11026; GFX10-NEXT: v_writelane_b32 v40, s7, 3 11027; GFX10-NEXT: s_mov_b32 s7, 4 11028; GFX10-NEXT: v_writelane_b32 v40, s30, 4 11029; GFX10-NEXT: v_writelane_b32 v40, s31, 5 11030; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 11031; GFX10-NEXT: v_readlane_b32 s31, v40, 5 11032; GFX10-NEXT: v_readlane_b32 s30, v40, 4 11033; GFX10-NEXT: v_readlane_b32 s7, v40, 3 11034; GFX10-NEXT: v_readlane_b32 s6, v40, 2 11035; GFX10-NEXT: v_readlane_b32 s5, v40, 1 11036; GFX10-NEXT: v_readlane_b32 s4, v40, 0 11037; GFX10-NEXT: s_addk_i32 s32, 0xfe00 11038; GFX10-NEXT: v_readlane_b32 s33, v40, 6 11039; GFX10-NEXT: s_or_saveexec_b32 s34, -1 11040; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 11041; GFX10-NEXT: s_waitcnt_depctr 0xffe3 11042; GFX10-NEXT: s_mov_b32 exec_lo, s34 11043; GFX10-NEXT: s_waitcnt vmcnt(0) 11044; GFX10-NEXT: s_setpc_b64 s[30:31] 11045; 11046; GFX11-LABEL: test_call_external_void_func_v4i32_imm_inreg: 11047; GFX11: ; %bb.0: 11048; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11049; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 11050; GFX11-NEXT: s_or_saveexec_b32 s0, -1 11051; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 11052; GFX11-NEXT: s_mov_b32 exec_lo, s0 11053; GFX11-NEXT: v_writelane_b32 v40, s33, 6 11054; GFX11-NEXT: s_mov_b32 s33, s32 11055; GFX11-NEXT: s_add_i32 s32, s32, 16 11056; GFX11-NEXT: s_getpc_b64 s[0:1] 11057; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i32_inreg@rel32@lo+4 11058; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32_inreg@rel32@hi+12 11059; GFX11-NEXT: v_writelane_b32 v40, s4, 0 11060; GFX11-NEXT: s_mov_b32 s4, 1 11061; GFX11-NEXT: v_writelane_b32 v40, s5, 1 11062; GFX11-NEXT: s_mov_b32 s5, 2 11063; GFX11-NEXT: v_writelane_b32 v40, s6, 2 11064; GFX11-NEXT: s_mov_b32 s6, 3 11065; GFX11-NEXT: v_writelane_b32 v40, s7, 3 11066; GFX11-NEXT: s_mov_b32 s7, 4 11067; GFX11-NEXT: v_writelane_b32 v40, s30, 4 11068; GFX11-NEXT: v_writelane_b32 v40, s31, 5 11069; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 11070; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 11071; GFX11-NEXT: v_readlane_b32 s31, v40, 5 11072; GFX11-NEXT: v_readlane_b32 s30, v40, 4 11073; GFX11-NEXT: v_readlane_b32 s7, v40, 3 11074; GFX11-NEXT: v_readlane_b32 s6, v40, 2 11075; GFX11-NEXT: v_readlane_b32 s5, v40, 1 11076; GFX11-NEXT: v_readlane_b32 s4, v40, 0 11077; GFX11-NEXT: s_add_i32 s32, s32, -16 11078; GFX11-NEXT: v_readlane_b32 s33, v40, 6 11079; GFX11-NEXT: s_or_saveexec_b32 s0, -1 11080; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 11081; GFX11-NEXT: s_mov_b32 exec_lo, s0 11082; GFX11-NEXT: s_waitcnt vmcnt(0) 11083; GFX11-NEXT: s_setpc_b64 s[30:31] 11084; 11085; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32_imm_inreg: 11086; GFX10-SCRATCH: ; %bb.0: 11087; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11088; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 11089; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 11090; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 11091; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 11092; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 11093; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 6 11094; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 11095; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 11096; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 11097; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i32_inreg@rel32@lo+4 11098; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32_inreg@rel32@hi+12 11099; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 11100; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 11101; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 11102; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 11103; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 11104; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3 11105; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 11106; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4 11107; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 11108; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 11109; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 11110; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 11111; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 11112; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 11113; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 11114; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 11115; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 11116; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 11117; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 6 11118; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 11119; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 11120; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 11121; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 11122; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 11123; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 11124 call amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> inreg <i32 1, i32 2, i32 3, i32 4>) 11125 ret void 11126} 11127 11128define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { 11129; GFX9-LABEL: test_call_external_void_func_v5i32_imm_inreg: 11130; GFX9: ; %bb.0: 11131; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11132; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 11133; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 11134; GFX9-NEXT: s_mov_b64 exec, s[34:35] 11135; GFX9-NEXT: v_writelane_b32 v40, s33, 7 11136; GFX9-NEXT: v_writelane_b32 v40, s4, 0 11137; GFX9-NEXT: v_writelane_b32 v40, s5, 1 11138; GFX9-NEXT: v_writelane_b32 v40, s6, 2 11139; GFX9-NEXT: v_writelane_b32 v40, s7, 3 11140; GFX9-NEXT: v_writelane_b32 v40, s8, 4 11141; GFX9-NEXT: s_mov_b32 s33, s32 11142; GFX9-NEXT: s_addk_i32 s32, 0x400 11143; GFX9-NEXT: v_writelane_b32 v40, s30, 5 11144; GFX9-NEXT: s_mov_b32 s4, 1 11145; GFX9-NEXT: s_mov_b32 s5, 2 11146; GFX9-NEXT: s_mov_b32 s6, 3 11147; GFX9-NEXT: s_mov_b32 s7, 4 11148; GFX9-NEXT: s_mov_b32 s8, 5 11149; GFX9-NEXT: v_writelane_b32 v40, s31, 6 11150; GFX9-NEXT: s_getpc_b64 s[34:35] 11151; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5i32_inreg@rel32@lo+4 11152; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5i32_inreg@rel32@hi+12 11153; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 11154; GFX9-NEXT: v_readlane_b32 s31, v40, 6 11155; GFX9-NEXT: v_readlane_b32 s30, v40, 5 11156; GFX9-NEXT: v_readlane_b32 s8, v40, 4 11157; GFX9-NEXT: v_readlane_b32 s7, v40, 3 11158; GFX9-NEXT: v_readlane_b32 s6, v40, 2 11159; GFX9-NEXT: v_readlane_b32 s5, v40, 1 11160; GFX9-NEXT: v_readlane_b32 s4, v40, 0 11161; GFX9-NEXT: s_addk_i32 s32, 0xfc00 11162; GFX9-NEXT: v_readlane_b32 s33, v40, 7 11163; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 11164; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 11165; GFX9-NEXT: s_mov_b64 exec, s[34:35] 11166; GFX9-NEXT: s_waitcnt vmcnt(0) 11167; GFX9-NEXT: s_setpc_b64 s[30:31] 11168; 11169; GFX10-LABEL: test_call_external_void_func_v5i32_imm_inreg: 11170; GFX10: ; %bb.0: 11171; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11172; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 11173; GFX10-NEXT: s_or_saveexec_b32 s34, -1 11174; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 11175; GFX10-NEXT: s_waitcnt_depctr 0xffe3 11176; GFX10-NEXT: s_mov_b32 exec_lo, s34 11177; GFX10-NEXT: v_writelane_b32 v40, s33, 7 11178; GFX10-NEXT: s_mov_b32 s33, s32 11179; GFX10-NEXT: s_addk_i32 s32, 0x200 11180; GFX10-NEXT: s_getpc_b64 s[34:35] 11181; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5i32_inreg@rel32@lo+4 11182; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5i32_inreg@rel32@hi+12 11183; GFX10-NEXT: v_writelane_b32 v40, s4, 0 11184; GFX10-NEXT: s_mov_b32 s4, 1 11185; GFX10-NEXT: v_writelane_b32 v40, s5, 1 11186; GFX10-NEXT: s_mov_b32 s5, 2 11187; GFX10-NEXT: v_writelane_b32 v40, s6, 2 11188; GFX10-NEXT: s_mov_b32 s6, 3 11189; GFX10-NEXT: v_writelane_b32 v40, s7, 3 11190; GFX10-NEXT: s_mov_b32 s7, 4 11191; GFX10-NEXT: v_writelane_b32 v40, s8, 4 11192; GFX10-NEXT: s_mov_b32 s8, 5 11193; GFX10-NEXT: v_writelane_b32 v40, s30, 5 11194; GFX10-NEXT: v_writelane_b32 v40, s31, 6 11195; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 11196; GFX10-NEXT: v_readlane_b32 s31, v40, 6 11197; GFX10-NEXT: v_readlane_b32 s30, v40, 5 11198; GFX10-NEXT: v_readlane_b32 s8, v40, 4 11199; GFX10-NEXT: v_readlane_b32 s7, v40, 3 11200; GFX10-NEXT: v_readlane_b32 s6, v40, 2 11201; GFX10-NEXT: v_readlane_b32 s5, v40, 1 11202; GFX10-NEXT: v_readlane_b32 s4, v40, 0 11203; GFX10-NEXT: s_addk_i32 s32, 0xfe00 11204; GFX10-NEXT: v_readlane_b32 s33, v40, 7 11205; GFX10-NEXT: s_or_saveexec_b32 s34, -1 11206; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 11207; GFX10-NEXT: s_waitcnt_depctr 0xffe3 11208; GFX10-NEXT: s_mov_b32 exec_lo, s34 11209; GFX10-NEXT: s_waitcnt vmcnt(0) 11210; GFX10-NEXT: s_setpc_b64 s[30:31] 11211; 11212; GFX11-LABEL: test_call_external_void_func_v5i32_imm_inreg: 11213; GFX11: ; %bb.0: 11214; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11215; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 11216; GFX11-NEXT: s_or_saveexec_b32 s0, -1 11217; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 11218; GFX11-NEXT: s_mov_b32 exec_lo, s0 11219; GFX11-NEXT: v_writelane_b32 v40, s33, 7 11220; GFX11-NEXT: s_mov_b32 s33, s32 11221; GFX11-NEXT: s_add_i32 s32, s32, 16 11222; GFX11-NEXT: s_getpc_b64 s[0:1] 11223; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v5i32_inreg@rel32@lo+4 11224; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v5i32_inreg@rel32@hi+12 11225; GFX11-NEXT: v_writelane_b32 v40, s4, 0 11226; GFX11-NEXT: s_mov_b32 s4, 1 11227; GFX11-NEXT: v_writelane_b32 v40, s5, 1 11228; GFX11-NEXT: s_mov_b32 s5, 2 11229; GFX11-NEXT: v_writelane_b32 v40, s6, 2 11230; GFX11-NEXT: s_mov_b32 s6, 3 11231; GFX11-NEXT: v_writelane_b32 v40, s7, 3 11232; GFX11-NEXT: s_mov_b32 s7, 4 11233; GFX11-NEXT: v_writelane_b32 v40, s8, 4 11234; GFX11-NEXT: s_mov_b32 s8, 5 11235; GFX11-NEXT: v_writelane_b32 v40, s30, 5 11236; GFX11-NEXT: v_writelane_b32 v40, s31, 6 11237; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 11238; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 11239; GFX11-NEXT: v_readlane_b32 s31, v40, 6 11240; GFX11-NEXT: v_readlane_b32 s30, v40, 5 11241; GFX11-NEXT: v_readlane_b32 s8, v40, 4 11242; GFX11-NEXT: v_readlane_b32 s7, v40, 3 11243; GFX11-NEXT: v_readlane_b32 s6, v40, 2 11244; GFX11-NEXT: v_readlane_b32 s5, v40, 1 11245; GFX11-NEXT: v_readlane_b32 s4, v40, 0 11246; GFX11-NEXT: s_add_i32 s32, s32, -16 11247; GFX11-NEXT: v_readlane_b32 s33, v40, 7 11248; GFX11-NEXT: s_or_saveexec_b32 s0, -1 11249; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 11250; GFX11-NEXT: s_mov_b32 exec_lo, s0 11251; GFX11-NEXT: s_waitcnt vmcnt(0) 11252; GFX11-NEXT: s_setpc_b64 s[30:31] 11253; 11254; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5i32_imm_inreg: 11255; GFX10-SCRATCH: ; %bb.0: 11256; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11257; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 11258; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 11259; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 11260; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 11261; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 11262; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 7 11263; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 11264; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 11265; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 11266; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v5i32_inreg@rel32@lo+4 11267; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5i32_inreg@rel32@hi+12 11268; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 11269; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 11270; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 11271; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 11272; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 11273; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3 11274; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 11275; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4 11276; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 11277; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 5 11278; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 5 11279; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 6 11280; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 11281; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 6 11282; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 5 11283; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 11284; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 11285; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 11286; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 11287; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 11288; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 11289; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 7 11290; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 11291; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 11292; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 11293; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 11294; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 11295; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 11296 call amdgpu_gfx void @external_void_func_v5i32_inreg(<5 x i32> inreg <i32 1, i32 2, i32 3, i32 4, i32 5>) 11297 ret void 11298} 11299 11300define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { 11301; GFX9-LABEL: test_call_external_void_func_v8i32_inreg: 11302; GFX9: ; %bb.0: 11303; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11304; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 11305; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 11306; GFX9-NEXT: s_mov_b64 exec, s[34:35] 11307; GFX9-NEXT: v_writelane_b32 v40, s33, 10 11308; GFX9-NEXT: v_writelane_b32 v40, s4, 0 11309; GFX9-NEXT: v_writelane_b32 v40, s5, 1 11310; GFX9-NEXT: v_writelane_b32 v40, s6, 2 11311; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 11312; GFX9-NEXT: v_writelane_b32 v40, s7, 3 11313; GFX9-NEXT: v_writelane_b32 v40, s8, 4 11314; GFX9-NEXT: v_writelane_b32 v40, s9, 5 11315; GFX9-NEXT: v_writelane_b32 v40, s10, 6 11316; GFX9-NEXT: v_writelane_b32 v40, s11, 7 11317; GFX9-NEXT: s_waitcnt lgkmcnt(0) 11318; GFX9-NEXT: s_load_dwordx8 s[4:11], s[34:35], 0x0 11319; GFX9-NEXT: s_mov_b32 s33, s32 11320; GFX9-NEXT: s_addk_i32 s32, 0x400 11321; GFX9-NEXT: v_writelane_b32 v40, s30, 8 11322; GFX9-NEXT: v_writelane_b32 v40, s31, 9 11323; GFX9-NEXT: s_getpc_b64 s[34:35] 11324; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4 11325; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12 11326; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 11327; GFX9-NEXT: v_readlane_b32 s31, v40, 9 11328; GFX9-NEXT: v_readlane_b32 s30, v40, 8 11329; GFX9-NEXT: v_readlane_b32 s11, v40, 7 11330; GFX9-NEXT: v_readlane_b32 s10, v40, 6 11331; GFX9-NEXT: v_readlane_b32 s9, v40, 5 11332; GFX9-NEXT: v_readlane_b32 s8, v40, 4 11333; GFX9-NEXT: v_readlane_b32 s7, v40, 3 11334; GFX9-NEXT: v_readlane_b32 s6, v40, 2 11335; GFX9-NEXT: v_readlane_b32 s5, v40, 1 11336; GFX9-NEXT: v_readlane_b32 s4, v40, 0 11337; GFX9-NEXT: s_addk_i32 s32, 0xfc00 11338; GFX9-NEXT: v_readlane_b32 s33, v40, 10 11339; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 11340; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 11341; GFX9-NEXT: s_mov_b64 exec, s[34:35] 11342; GFX9-NEXT: s_waitcnt vmcnt(0) 11343; GFX9-NEXT: s_setpc_b64 s[30:31] 11344; 11345; GFX10-LABEL: test_call_external_void_func_v8i32_inreg: 11346; GFX10: ; %bb.0: 11347; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11348; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 11349; GFX10-NEXT: s_or_saveexec_b32 s34, -1 11350; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 11351; GFX10-NEXT: s_waitcnt_depctr 0xffe3 11352; GFX10-NEXT: s_mov_b32 exec_lo, s34 11353; GFX10-NEXT: v_writelane_b32 v40, s33, 10 11354; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 11355; GFX10-NEXT: s_mov_b32 s33, s32 11356; GFX10-NEXT: s_addk_i32 s32, 0x200 11357; GFX10-NEXT: v_writelane_b32 v40, s4, 0 11358; GFX10-NEXT: v_writelane_b32 v40, s5, 1 11359; GFX10-NEXT: v_writelane_b32 v40, s6, 2 11360; GFX10-NEXT: v_writelane_b32 v40, s7, 3 11361; GFX10-NEXT: v_writelane_b32 v40, s8, 4 11362; GFX10-NEXT: v_writelane_b32 v40, s9, 5 11363; GFX10-NEXT: v_writelane_b32 v40, s10, 6 11364; GFX10-NEXT: v_writelane_b32 v40, s11, 7 11365; GFX10-NEXT: s_waitcnt lgkmcnt(0) 11366; GFX10-NEXT: s_load_dwordx8 s[4:11], s[34:35], 0x0 11367; GFX10-NEXT: s_getpc_b64 s[34:35] 11368; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4 11369; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12 11370; GFX10-NEXT: v_writelane_b32 v40, s30, 8 11371; GFX10-NEXT: v_writelane_b32 v40, s31, 9 11372; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 11373; GFX10-NEXT: v_readlane_b32 s31, v40, 9 11374; GFX10-NEXT: v_readlane_b32 s30, v40, 8 11375; GFX10-NEXT: v_readlane_b32 s11, v40, 7 11376; GFX10-NEXT: v_readlane_b32 s10, v40, 6 11377; GFX10-NEXT: v_readlane_b32 s9, v40, 5 11378; GFX10-NEXT: v_readlane_b32 s8, v40, 4 11379; GFX10-NEXT: v_readlane_b32 s7, v40, 3 11380; GFX10-NEXT: v_readlane_b32 s6, v40, 2 11381; GFX10-NEXT: v_readlane_b32 s5, v40, 1 11382; GFX10-NEXT: v_readlane_b32 s4, v40, 0 11383; GFX10-NEXT: s_addk_i32 s32, 0xfe00 11384; GFX10-NEXT: v_readlane_b32 s33, v40, 10 11385; GFX10-NEXT: s_or_saveexec_b32 s34, -1 11386; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 11387; GFX10-NEXT: s_waitcnt_depctr 0xffe3 11388; GFX10-NEXT: s_mov_b32 exec_lo, s34 11389; GFX10-NEXT: s_waitcnt vmcnt(0) 11390; GFX10-NEXT: s_setpc_b64 s[30:31] 11391; 11392; GFX11-LABEL: test_call_external_void_func_v8i32_inreg: 11393; GFX11: ; %bb.0: 11394; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11395; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 11396; GFX11-NEXT: s_or_saveexec_b32 s0, -1 11397; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 11398; GFX11-NEXT: s_mov_b32 exec_lo, s0 11399; GFX11-NEXT: v_writelane_b32 v40, s33, 10 11400; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 11401; GFX11-NEXT: s_mov_b32 s33, s32 11402; GFX11-NEXT: s_add_i32 s32, s32, 16 11403; GFX11-NEXT: v_writelane_b32 v40, s4, 0 11404; GFX11-NEXT: v_writelane_b32 v40, s5, 1 11405; GFX11-NEXT: v_writelane_b32 v40, s6, 2 11406; GFX11-NEXT: v_writelane_b32 v40, s7, 3 11407; GFX11-NEXT: v_writelane_b32 v40, s8, 4 11408; GFX11-NEXT: v_writelane_b32 v40, s9, 5 11409; GFX11-NEXT: v_writelane_b32 v40, s10, 6 11410; GFX11-NEXT: v_writelane_b32 v40, s11, 7 11411; GFX11-NEXT: s_waitcnt lgkmcnt(0) 11412; GFX11-NEXT: s_load_b256 s[4:11], s[0:1], 0x0 11413; GFX11-NEXT: s_getpc_b64 s[0:1] 11414; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v8i32_inreg@rel32@lo+4 11415; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32_inreg@rel32@hi+12 11416; GFX11-NEXT: v_writelane_b32 v40, s30, 8 11417; GFX11-NEXT: v_writelane_b32 v40, s31, 9 11418; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 11419; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 11420; GFX11-NEXT: v_readlane_b32 s31, v40, 9 11421; GFX11-NEXT: v_readlane_b32 s30, v40, 8 11422; GFX11-NEXT: v_readlane_b32 s11, v40, 7 11423; GFX11-NEXT: v_readlane_b32 s10, v40, 6 11424; GFX11-NEXT: v_readlane_b32 s9, v40, 5 11425; GFX11-NEXT: v_readlane_b32 s8, v40, 4 11426; GFX11-NEXT: v_readlane_b32 s7, v40, 3 11427; GFX11-NEXT: v_readlane_b32 s6, v40, 2 11428; GFX11-NEXT: v_readlane_b32 s5, v40, 1 11429; GFX11-NEXT: v_readlane_b32 s4, v40, 0 11430; GFX11-NEXT: s_add_i32 s32, s32, -16 11431; GFX11-NEXT: v_readlane_b32 s33, v40, 10 11432; GFX11-NEXT: s_or_saveexec_b32 s0, -1 11433; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 11434; GFX11-NEXT: s_mov_b32 exec_lo, s0 11435; GFX11-NEXT: s_waitcnt vmcnt(0) 11436; GFX11-NEXT: s_setpc_b64 s[30:31] 11437; 11438; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32_inreg: 11439; GFX10-SCRATCH: ; %bb.0: 11440; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11441; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 11442; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 11443; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 11444; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 11445; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 11446; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 10 11447; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 11448; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 11449; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 11450; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 11451; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 11452; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 11453; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 11454; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 11455; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 11456; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6 11457; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7 11458; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) 11459; GFX10-SCRATCH-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x0 11460; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 11461; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32_inreg@rel32@lo+4 11462; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32_inreg@rel32@hi+12 11463; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8 11464; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9 11465; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 11466; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 11467; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8 11468; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7 11469; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6 11470; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 11471; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 11472; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 11473; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 11474; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 11475; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 11476; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 11477; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 10 11478; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 11479; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 11480; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 11481; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 11482; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 11483; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 11484 %ptr = load <8 x i32> addrspace(4)*, <8 x i32> addrspace(4)* addrspace(4)* undef 11485 %val = load <8 x i32>, <8 x i32> addrspace(4)* %ptr 11486 call amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> inreg %val) 11487 ret void 11488} 11489 11490define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { 11491; GFX9-LABEL: test_call_external_void_func_v8i32_imm_inreg: 11492; GFX9: ; %bb.0: 11493; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11494; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 11495; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 11496; GFX9-NEXT: s_mov_b64 exec, s[34:35] 11497; GFX9-NEXT: v_writelane_b32 v40, s33, 10 11498; GFX9-NEXT: v_writelane_b32 v40, s4, 0 11499; GFX9-NEXT: v_writelane_b32 v40, s5, 1 11500; GFX9-NEXT: v_writelane_b32 v40, s6, 2 11501; GFX9-NEXT: v_writelane_b32 v40, s7, 3 11502; GFX9-NEXT: v_writelane_b32 v40, s8, 4 11503; GFX9-NEXT: v_writelane_b32 v40, s9, 5 11504; GFX9-NEXT: v_writelane_b32 v40, s10, 6 11505; GFX9-NEXT: v_writelane_b32 v40, s11, 7 11506; GFX9-NEXT: s_mov_b32 s33, s32 11507; GFX9-NEXT: s_addk_i32 s32, 0x400 11508; GFX9-NEXT: v_writelane_b32 v40, s30, 8 11509; GFX9-NEXT: s_mov_b32 s4, 1 11510; GFX9-NEXT: s_mov_b32 s5, 2 11511; GFX9-NEXT: s_mov_b32 s6, 3 11512; GFX9-NEXT: s_mov_b32 s7, 4 11513; GFX9-NEXT: s_mov_b32 s8, 5 11514; GFX9-NEXT: s_mov_b32 s9, 6 11515; GFX9-NEXT: s_mov_b32 s10, 7 11516; GFX9-NEXT: s_mov_b32 s11, 8 11517; GFX9-NEXT: v_writelane_b32 v40, s31, 9 11518; GFX9-NEXT: s_getpc_b64 s[34:35] 11519; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4 11520; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12 11521; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 11522; GFX9-NEXT: v_readlane_b32 s31, v40, 9 11523; GFX9-NEXT: v_readlane_b32 s30, v40, 8 11524; GFX9-NEXT: v_readlane_b32 s11, v40, 7 11525; GFX9-NEXT: v_readlane_b32 s10, v40, 6 11526; GFX9-NEXT: v_readlane_b32 s9, v40, 5 11527; GFX9-NEXT: v_readlane_b32 s8, v40, 4 11528; GFX9-NEXT: v_readlane_b32 s7, v40, 3 11529; GFX9-NEXT: v_readlane_b32 s6, v40, 2 11530; GFX9-NEXT: v_readlane_b32 s5, v40, 1 11531; GFX9-NEXT: v_readlane_b32 s4, v40, 0 11532; GFX9-NEXT: s_addk_i32 s32, 0xfc00 11533; GFX9-NEXT: v_readlane_b32 s33, v40, 10 11534; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 11535; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 11536; GFX9-NEXT: s_mov_b64 exec, s[34:35] 11537; GFX9-NEXT: s_waitcnt vmcnt(0) 11538; GFX9-NEXT: s_setpc_b64 s[30:31] 11539; 11540; GFX10-LABEL: test_call_external_void_func_v8i32_imm_inreg: 11541; GFX10: ; %bb.0: 11542; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11543; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 11544; GFX10-NEXT: s_or_saveexec_b32 s34, -1 11545; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 11546; GFX10-NEXT: s_waitcnt_depctr 0xffe3 11547; GFX10-NEXT: s_mov_b32 exec_lo, s34 11548; GFX10-NEXT: v_writelane_b32 v40, s33, 10 11549; GFX10-NEXT: s_mov_b32 s33, s32 11550; GFX10-NEXT: s_addk_i32 s32, 0x200 11551; GFX10-NEXT: s_getpc_b64 s[34:35] 11552; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4 11553; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12 11554; GFX10-NEXT: v_writelane_b32 v40, s4, 0 11555; GFX10-NEXT: s_mov_b32 s4, 1 11556; GFX10-NEXT: v_writelane_b32 v40, s5, 1 11557; GFX10-NEXT: s_mov_b32 s5, 2 11558; GFX10-NEXT: v_writelane_b32 v40, s6, 2 11559; GFX10-NEXT: s_mov_b32 s6, 3 11560; GFX10-NEXT: v_writelane_b32 v40, s7, 3 11561; GFX10-NEXT: s_mov_b32 s7, 4 11562; GFX10-NEXT: v_writelane_b32 v40, s8, 4 11563; GFX10-NEXT: s_mov_b32 s8, 5 11564; GFX10-NEXT: v_writelane_b32 v40, s9, 5 11565; GFX10-NEXT: s_mov_b32 s9, 6 11566; GFX10-NEXT: v_writelane_b32 v40, s10, 6 11567; GFX10-NEXT: s_mov_b32 s10, 7 11568; GFX10-NEXT: v_writelane_b32 v40, s11, 7 11569; GFX10-NEXT: s_mov_b32 s11, 8 11570; GFX10-NEXT: v_writelane_b32 v40, s30, 8 11571; GFX10-NEXT: v_writelane_b32 v40, s31, 9 11572; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 11573; GFX10-NEXT: v_readlane_b32 s31, v40, 9 11574; GFX10-NEXT: v_readlane_b32 s30, v40, 8 11575; GFX10-NEXT: v_readlane_b32 s11, v40, 7 11576; GFX10-NEXT: v_readlane_b32 s10, v40, 6 11577; GFX10-NEXT: v_readlane_b32 s9, v40, 5 11578; GFX10-NEXT: v_readlane_b32 s8, v40, 4 11579; GFX10-NEXT: v_readlane_b32 s7, v40, 3 11580; GFX10-NEXT: v_readlane_b32 s6, v40, 2 11581; GFX10-NEXT: v_readlane_b32 s5, v40, 1 11582; GFX10-NEXT: v_readlane_b32 s4, v40, 0 11583; GFX10-NEXT: s_addk_i32 s32, 0xfe00 11584; GFX10-NEXT: v_readlane_b32 s33, v40, 10 11585; GFX10-NEXT: s_or_saveexec_b32 s34, -1 11586; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 11587; GFX10-NEXT: s_waitcnt_depctr 0xffe3 11588; GFX10-NEXT: s_mov_b32 exec_lo, s34 11589; GFX10-NEXT: s_waitcnt vmcnt(0) 11590; GFX10-NEXT: s_setpc_b64 s[30:31] 11591; 11592; GFX11-LABEL: test_call_external_void_func_v8i32_imm_inreg: 11593; GFX11: ; %bb.0: 11594; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11595; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 11596; GFX11-NEXT: s_or_saveexec_b32 s0, -1 11597; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 11598; GFX11-NEXT: s_mov_b32 exec_lo, s0 11599; GFX11-NEXT: v_writelane_b32 v40, s33, 10 11600; GFX11-NEXT: s_mov_b32 s33, s32 11601; GFX11-NEXT: s_add_i32 s32, s32, 16 11602; GFX11-NEXT: s_getpc_b64 s[0:1] 11603; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v8i32_inreg@rel32@lo+4 11604; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32_inreg@rel32@hi+12 11605; GFX11-NEXT: v_writelane_b32 v40, s4, 0 11606; GFX11-NEXT: s_mov_b32 s4, 1 11607; GFX11-NEXT: v_writelane_b32 v40, s5, 1 11608; GFX11-NEXT: s_mov_b32 s5, 2 11609; GFX11-NEXT: v_writelane_b32 v40, s6, 2 11610; GFX11-NEXT: s_mov_b32 s6, 3 11611; GFX11-NEXT: v_writelane_b32 v40, s7, 3 11612; GFX11-NEXT: s_mov_b32 s7, 4 11613; GFX11-NEXT: v_writelane_b32 v40, s8, 4 11614; GFX11-NEXT: s_mov_b32 s8, 5 11615; GFX11-NEXT: v_writelane_b32 v40, s9, 5 11616; GFX11-NEXT: s_mov_b32 s9, 6 11617; GFX11-NEXT: v_writelane_b32 v40, s10, 6 11618; GFX11-NEXT: s_mov_b32 s10, 7 11619; GFX11-NEXT: v_writelane_b32 v40, s11, 7 11620; GFX11-NEXT: s_mov_b32 s11, 8 11621; GFX11-NEXT: v_writelane_b32 v40, s30, 8 11622; GFX11-NEXT: v_writelane_b32 v40, s31, 9 11623; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 11624; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 11625; GFX11-NEXT: v_readlane_b32 s31, v40, 9 11626; GFX11-NEXT: v_readlane_b32 s30, v40, 8 11627; GFX11-NEXT: v_readlane_b32 s11, v40, 7 11628; GFX11-NEXT: v_readlane_b32 s10, v40, 6 11629; GFX11-NEXT: v_readlane_b32 s9, v40, 5 11630; GFX11-NEXT: v_readlane_b32 s8, v40, 4 11631; GFX11-NEXT: v_readlane_b32 s7, v40, 3 11632; GFX11-NEXT: v_readlane_b32 s6, v40, 2 11633; GFX11-NEXT: v_readlane_b32 s5, v40, 1 11634; GFX11-NEXT: v_readlane_b32 s4, v40, 0 11635; GFX11-NEXT: s_add_i32 s32, s32, -16 11636; GFX11-NEXT: v_readlane_b32 s33, v40, 10 11637; GFX11-NEXT: s_or_saveexec_b32 s0, -1 11638; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 11639; GFX11-NEXT: s_mov_b32 exec_lo, s0 11640; GFX11-NEXT: s_waitcnt vmcnt(0) 11641; GFX11-NEXT: s_setpc_b64 s[30:31] 11642; 11643; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32_imm_inreg: 11644; GFX10-SCRATCH: ; %bb.0: 11645; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11646; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 11647; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 11648; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 11649; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 11650; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 11651; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 10 11652; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 11653; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 11654; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 11655; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32_inreg@rel32@lo+4 11656; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32_inreg@rel32@hi+12 11657; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 11658; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 11659; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 11660; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 11661; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 11662; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3 11663; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 11664; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4 11665; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 11666; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 5 11667; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 11668; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 6 11669; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6 11670; GFX10-SCRATCH-NEXT: s_mov_b32 s10, 7 11671; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7 11672; GFX10-SCRATCH-NEXT: s_mov_b32 s11, 8 11673; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8 11674; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9 11675; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 11676; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 11677; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8 11678; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7 11679; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6 11680; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 11681; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 11682; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 11683; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 11684; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 11685; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 11686; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 11687; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 10 11688; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 11689; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 11690; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 11691; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 11692; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 11693; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 11694 call amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> inreg <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>) 11695 ret void 11696} 11697 11698define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { 11699; GFX9-LABEL: test_call_external_void_func_v16i32_inreg: 11700; GFX9: ; %bb.0: 11701; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11702; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 11703; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 11704; GFX9-NEXT: s_mov_b64 exec, s[34:35] 11705; GFX9-NEXT: v_writelane_b32 v40, s33, 18 11706; GFX9-NEXT: v_writelane_b32 v40, s4, 0 11707; GFX9-NEXT: v_writelane_b32 v40, s5, 1 11708; GFX9-NEXT: v_writelane_b32 v40, s6, 2 11709; GFX9-NEXT: v_writelane_b32 v40, s7, 3 11710; GFX9-NEXT: v_writelane_b32 v40, s8, 4 11711; GFX9-NEXT: v_writelane_b32 v40, s9, 5 11712; GFX9-NEXT: v_writelane_b32 v40, s10, 6 11713; GFX9-NEXT: v_writelane_b32 v40, s11, 7 11714; GFX9-NEXT: v_writelane_b32 v40, s12, 8 11715; GFX9-NEXT: v_writelane_b32 v40, s13, 9 11716; GFX9-NEXT: v_writelane_b32 v40, s14, 10 11717; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 11718; GFX9-NEXT: v_writelane_b32 v40, s15, 11 11719; GFX9-NEXT: v_writelane_b32 v40, s16, 12 11720; GFX9-NEXT: v_writelane_b32 v40, s17, 13 11721; GFX9-NEXT: v_writelane_b32 v40, s18, 14 11722; GFX9-NEXT: v_writelane_b32 v40, s19, 15 11723; GFX9-NEXT: s_waitcnt lgkmcnt(0) 11724; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 11725; GFX9-NEXT: s_mov_b32 s33, s32 11726; GFX9-NEXT: s_addk_i32 s32, 0x400 11727; GFX9-NEXT: v_writelane_b32 v40, s30, 16 11728; GFX9-NEXT: v_writelane_b32 v40, s31, 17 11729; GFX9-NEXT: s_getpc_b64 s[34:35] 11730; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v16i32_inreg@rel32@lo+4 11731; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32_inreg@rel32@hi+12 11732; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 11733; GFX9-NEXT: v_readlane_b32 s31, v40, 17 11734; GFX9-NEXT: v_readlane_b32 s30, v40, 16 11735; GFX9-NEXT: v_readlane_b32 s19, v40, 15 11736; GFX9-NEXT: v_readlane_b32 s18, v40, 14 11737; GFX9-NEXT: v_readlane_b32 s17, v40, 13 11738; GFX9-NEXT: v_readlane_b32 s16, v40, 12 11739; GFX9-NEXT: v_readlane_b32 s15, v40, 11 11740; GFX9-NEXT: v_readlane_b32 s14, v40, 10 11741; GFX9-NEXT: v_readlane_b32 s13, v40, 9 11742; GFX9-NEXT: v_readlane_b32 s12, v40, 8 11743; GFX9-NEXT: v_readlane_b32 s11, v40, 7 11744; GFX9-NEXT: v_readlane_b32 s10, v40, 6 11745; GFX9-NEXT: v_readlane_b32 s9, v40, 5 11746; GFX9-NEXT: v_readlane_b32 s8, v40, 4 11747; GFX9-NEXT: v_readlane_b32 s7, v40, 3 11748; GFX9-NEXT: v_readlane_b32 s6, v40, 2 11749; GFX9-NEXT: v_readlane_b32 s5, v40, 1 11750; GFX9-NEXT: v_readlane_b32 s4, v40, 0 11751; GFX9-NEXT: s_addk_i32 s32, 0xfc00 11752; GFX9-NEXT: v_readlane_b32 s33, v40, 18 11753; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 11754; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 11755; GFX9-NEXT: s_mov_b64 exec, s[34:35] 11756; GFX9-NEXT: s_waitcnt vmcnt(0) 11757; GFX9-NEXT: s_setpc_b64 s[30:31] 11758; 11759; GFX10-LABEL: test_call_external_void_func_v16i32_inreg: 11760; GFX10: ; %bb.0: 11761; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11762; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 11763; GFX10-NEXT: s_or_saveexec_b32 s34, -1 11764; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 11765; GFX10-NEXT: s_waitcnt_depctr 0xffe3 11766; GFX10-NEXT: s_mov_b32 exec_lo, s34 11767; GFX10-NEXT: v_writelane_b32 v40, s33, 18 11768; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 11769; GFX10-NEXT: s_mov_b32 s33, s32 11770; GFX10-NEXT: s_addk_i32 s32, 0x200 11771; GFX10-NEXT: v_writelane_b32 v40, s4, 0 11772; GFX10-NEXT: v_writelane_b32 v40, s5, 1 11773; GFX10-NEXT: v_writelane_b32 v40, s6, 2 11774; GFX10-NEXT: v_writelane_b32 v40, s7, 3 11775; GFX10-NEXT: v_writelane_b32 v40, s8, 4 11776; GFX10-NEXT: v_writelane_b32 v40, s9, 5 11777; GFX10-NEXT: v_writelane_b32 v40, s10, 6 11778; GFX10-NEXT: v_writelane_b32 v40, s11, 7 11779; GFX10-NEXT: v_writelane_b32 v40, s12, 8 11780; GFX10-NEXT: v_writelane_b32 v40, s13, 9 11781; GFX10-NEXT: v_writelane_b32 v40, s14, 10 11782; GFX10-NEXT: v_writelane_b32 v40, s15, 11 11783; GFX10-NEXT: v_writelane_b32 v40, s16, 12 11784; GFX10-NEXT: v_writelane_b32 v40, s17, 13 11785; GFX10-NEXT: v_writelane_b32 v40, s18, 14 11786; GFX10-NEXT: v_writelane_b32 v40, s19, 15 11787; GFX10-NEXT: s_waitcnt lgkmcnt(0) 11788; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 11789; GFX10-NEXT: s_getpc_b64 s[34:35] 11790; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v16i32_inreg@rel32@lo+4 11791; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32_inreg@rel32@hi+12 11792; GFX10-NEXT: v_writelane_b32 v40, s30, 16 11793; GFX10-NEXT: v_writelane_b32 v40, s31, 17 11794; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 11795; GFX10-NEXT: v_readlane_b32 s31, v40, 17 11796; GFX10-NEXT: v_readlane_b32 s30, v40, 16 11797; GFX10-NEXT: v_readlane_b32 s19, v40, 15 11798; GFX10-NEXT: v_readlane_b32 s18, v40, 14 11799; GFX10-NEXT: v_readlane_b32 s17, v40, 13 11800; GFX10-NEXT: v_readlane_b32 s16, v40, 12 11801; GFX10-NEXT: v_readlane_b32 s15, v40, 11 11802; GFX10-NEXT: v_readlane_b32 s14, v40, 10 11803; GFX10-NEXT: v_readlane_b32 s13, v40, 9 11804; GFX10-NEXT: v_readlane_b32 s12, v40, 8 11805; GFX10-NEXT: v_readlane_b32 s11, v40, 7 11806; GFX10-NEXT: v_readlane_b32 s10, v40, 6 11807; GFX10-NEXT: v_readlane_b32 s9, v40, 5 11808; GFX10-NEXT: v_readlane_b32 s8, v40, 4 11809; GFX10-NEXT: v_readlane_b32 s7, v40, 3 11810; GFX10-NEXT: v_readlane_b32 s6, v40, 2 11811; GFX10-NEXT: v_readlane_b32 s5, v40, 1 11812; GFX10-NEXT: v_readlane_b32 s4, v40, 0 11813; GFX10-NEXT: s_addk_i32 s32, 0xfe00 11814; GFX10-NEXT: v_readlane_b32 s33, v40, 18 11815; GFX10-NEXT: s_or_saveexec_b32 s34, -1 11816; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 11817; GFX10-NEXT: s_waitcnt_depctr 0xffe3 11818; GFX10-NEXT: s_mov_b32 exec_lo, s34 11819; GFX10-NEXT: s_waitcnt vmcnt(0) 11820; GFX10-NEXT: s_setpc_b64 s[30:31] 11821; 11822; GFX11-LABEL: test_call_external_void_func_v16i32_inreg: 11823; GFX11: ; %bb.0: 11824; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11825; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 11826; GFX11-NEXT: s_or_saveexec_b32 s0, -1 11827; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 11828; GFX11-NEXT: s_mov_b32 exec_lo, s0 11829; GFX11-NEXT: v_writelane_b32 v40, s33, 18 11830; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 11831; GFX11-NEXT: s_mov_b32 s33, s32 11832; GFX11-NEXT: s_add_i32 s32, s32, 16 11833; GFX11-NEXT: v_writelane_b32 v40, s4, 0 11834; GFX11-NEXT: v_writelane_b32 v40, s5, 1 11835; GFX11-NEXT: v_writelane_b32 v40, s6, 2 11836; GFX11-NEXT: v_writelane_b32 v40, s7, 3 11837; GFX11-NEXT: v_writelane_b32 v40, s8, 4 11838; GFX11-NEXT: v_writelane_b32 v40, s9, 5 11839; GFX11-NEXT: v_writelane_b32 v40, s10, 6 11840; GFX11-NEXT: v_writelane_b32 v40, s11, 7 11841; GFX11-NEXT: v_writelane_b32 v40, s12, 8 11842; GFX11-NEXT: v_writelane_b32 v40, s13, 9 11843; GFX11-NEXT: v_writelane_b32 v40, s14, 10 11844; GFX11-NEXT: v_writelane_b32 v40, s15, 11 11845; GFX11-NEXT: v_writelane_b32 v40, s16, 12 11846; GFX11-NEXT: v_writelane_b32 v40, s17, 13 11847; GFX11-NEXT: v_writelane_b32 v40, s18, 14 11848; GFX11-NEXT: v_writelane_b32 v40, s19, 15 11849; GFX11-NEXT: s_waitcnt lgkmcnt(0) 11850; GFX11-NEXT: s_load_b512 s[4:19], s[0:1], 0x0 11851; GFX11-NEXT: s_getpc_b64 s[0:1] 11852; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v16i32_inreg@rel32@lo+4 11853; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32_inreg@rel32@hi+12 11854; GFX11-NEXT: v_writelane_b32 v40, s30, 16 11855; GFX11-NEXT: v_writelane_b32 v40, s31, 17 11856; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 11857; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 11858; GFX11-NEXT: v_readlane_b32 s31, v40, 17 11859; GFX11-NEXT: v_readlane_b32 s30, v40, 16 11860; GFX11-NEXT: v_readlane_b32 s19, v40, 15 11861; GFX11-NEXT: v_readlane_b32 s18, v40, 14 11862; GFX11-NEXT: v_readlane_b32 s17, v40, 13 11863; GFX11-NEXT: v_readlane_b32 s16, v40, 12 11864; GFX11-NEXT: v_readlane_b32 s15, v40, 11 11865; GFX11-NEXT: v_readlane_b32 s14, v40, 10 11866; GFX11-NEXT: v_readlane_b32 s13, v40, 9 11867; GFX11-NEXT: v_readlane_b32 s12, v40, 8 11868; GFX11-NEXT: v_readlane_b32 s11, v40, 7 11869; GFX11-NEXT: v_readlane_b32 s10, v40, 6 11870; GFX11-NEXT: v_readlane_b32 s9, v40, 5 11871; GFX11-NEXT: v_readlane_b32 s8, v40, 4 11872; GFX11-NEXT: v_readlane_b32 s7, v40, 3 11873; GFX11-NEXT: v_readlane_b32 s6, v40, 2 11874; GFX11-NEXT: v_readlane_b32 s5, v40, 1 11875; GFX11-NEXT: v_readlane_b32 s4, v40, 0 11876; GFX11-NEXT: s_add_i32 s32, s32, -16 11877; GFX11-NEXT: v_readlane_b32 s33, v40, 18 11878; GFX11-NEXT: s_or_saveexec_b32 s0, -1 11879; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 11880; GFX11-NEXT: s_mov_b32 exec_lo, s0 11881; GFX11-NEXT: s_waitcnt vmcnt(0) 11882; GFX11-NEXT: s_setpc_b64 s[30:31] 11883; 11884; GFX10-SCRATCH-LABEL: test_call_external_void_func_v16i32_inreg: 11885; GFX10-SCRATCH: ; %bb.0: 11886; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11887; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 11888; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 11889; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 11890; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 11891; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 11892; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 18 11893; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 11894; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 11895; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 11896; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 11897; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 11898; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 11899; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 11900; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 11901; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 11902; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6 11903; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7 11904; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s12, 8 11905; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s13, 9 11906; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s14, 10 11907; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s15, 11 11908; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s16, 12 11909; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s17, 13 11910; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s18, 14 11911; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s19, 15 11912; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) 11913; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x0 11914; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 11915; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v16i32_inreg@rel32@lo+4 11916; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32_inreg@rel32@hi+12 11917; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 16 11918; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 17 11919; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 11920; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 17 11921; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 16 11922; GFX10-SCRATCH-NEXT: v_readlane_b32 s19, v40, 15 11923; GFX10-SCRATCH-NEXT: v_readlane_b32 s18, v40, 14 11924; GFX10-SCRATCH-NEXT: v_readlane_b32 s17, v40, 13 11925; GFX10-SCRATCH-NEXT: v_readlane_b32 s16, v40, 12 11926; GFX10-SCRATCH-NEXT: v_readlane_b32 s15, v40, 11 11927; GFX10-SCRATCH-NEXT: v_readlane_b32 s14, v40, 10 11928; GFX10-SCRATCH-NEXT: v_readlane_b32 s13, v40, 9 11929; GFX10-SCRATCH-NEXT: v_readlane_b32 s12, v40, 8 11930; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7 11931; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6 11932; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 11933; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 11934; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 11935; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 11936; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 11937; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 11938; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 11939; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 18 11940; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 11941; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 11942; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 11943; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 11944; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 11945; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 11946 %ptr = load <16 x i32> addrspace(4)*, <16 x i32> addrspace(4)* addrspace(4)* undef 11947 %val = load <16 x i32>, <16 x i32> addrspace(4)* %ptr 11948 call amdgpu_gfx void @external_void_func_v16i32_inreg(<16 x i32> inreg %val) 11949 ret void 11950} 11951 11952define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { 11953; GFX9-LABEL: test_call_external_void_func_v32i32_inreg: 11954; GFX9: ; %bb.0: 11955; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11956; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 11957; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 11958; GFX9-NEXT: s_mov_b64 exec, s[34:35] 11959; GFX9-NEXT: v_writelane_b32 v40, s33, 28 11960; GFX9-NEXT: v_writelane_b32 v40, s4, 0 11961; GFX9-NEXT: v_writelane_b32 v40, s5, 1 11962; GFX9-NEXT: v_writelane_b32 v40, s6, 2 11963; GFX9-NEXT: v_writelane_b32 v40, s7, 3 11964; GFX9-NEXT: v_writelane_b32 v40, s8, 4 11965; GFX9-NEXT: v_writelane_b32 v40, s9, 5 11966; GFX9-NEXT: v_writelane_b32 v40, s10, 6 11967; GFX9-NEXT: v_writelane_b32 v40, s11, 7 11968; GFX9-NEXT: v_writelane_b32 v40, s12, 8 11969; GFX9-NEXT: v_writelane_b32 v40, s13, 9 11970; GFX9-NEXT: v_writelane_b32 v40, s14, 10 11971; GFX9-NEXT: v_writelane_b32 v40, s15, 11 11972; GFX9-NEXT: v_writelane_b32 v40, s16, 12 11973; GFX9-NEXT: v_writelane_b32 v40, s17, 13 11974; GFX9-NEXT: v_writelane_b32 v40, s18, 14 11975; GFX9-NEXT: v_writelane_b32 v40, s19, 15 11976; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 11977; GFX9-NEXT: v_writelane_b32 v40, s20, 16 11978; GFX9-NEXT: v_writelane_b32 v40, s21, 17 11979; GFX9-NEXT: v_writelane_b32 v40, s22, 18 11980; GFX9-NEXT: v_writelane_b32 v40, s23, 19 11981; GFX9-NEXT: v_writelane_b32 v40, s24, 20 11982; GFX9-NEXT: s_waitcnt lgkmcnt(0) 11983; GFX9-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 11984; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 11985; GFX9-NEXT: v_writelane_b32 v40, s25, 21 11986; GFX9-NEXT: v_writelane_b32 v40, s26, 22 11987; GFX9-NEXT: v_writelane_b32 v40, s27, 23 11988; GFX9-NEXT: s_mov_b32 s33, s32 11989; GFX9-NEXT: s_addk_i32 s32, 0x400 11990; GFX9-NEXT: v_writelane_b32 v40, s28, 24 11991; GFX9-NEXT: s_waitcnt lgkmcnt(0) 11992; GFX9-NEXT: v_mov_b32_e32 v0, s46 11993; GFX9-NEXT: v_writelane_b32 v40, s29, 25 11994; GFX9-NEXT: v_mov_b32_e32 v1, s47 11995; GFX9-NEXT: v_mov_b32_e32 v2, s48 11996; GFX9-NEXT: v_mov_b32_e32 v3, s49 11997; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 11998; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 11999; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 12000; GFX9-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 12001; GFX9-NEXT: v_mov_b32_e32 v0, s50 12002; GFX9-NEXT: v_writelane_b32 v40, s30, 26 12003; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 12004; GFX9-NEXT: v_mov_b32_e32 v0, s51 12005; GFX9-NEXT: s_mov_b32 s20, s36 12006; GFX9-NEXT: s_mov_b32 s21, s37 12007; GFX9-NEXT: s_mov_b32 s22, s38 12008; GFX9-NEXT: s_mov_b32 s23, s39 12009; GFX9-NEXT: s_mov_b32 s24, s40 12010; GFX9-NEXT: s_mov_b32 s25, s41 12011; GFX9-NEXT: s_mov_b32 s26, s42 12012; GFX9-NEXT: s_mov_b32 s27, s43 12013; GFX9-NEXT: s_mov_b32 s28, s44 12014; GFX9-NEXT: s_mov_b32 s29, s45 12015; GFX9-NEXT: v_writelane_b32 v40, s31, 27 12016; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 12017; GFX9-NEXT: s_getpc_b64 s[34:35] 12018; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_inreg@rel32@lo+4 12019; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_inreg@rel32@hi+12 12020; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 12021; GFX9-NEXT: v_readlane_b32 s31, v40, 27 12022; GFX9-NEXT: v_readlane_b32 s30, v40, 26 12023; GFX9-NEXT: v_readlane_b32 s29, v40, 25 12024; GFX9-NEXT: v_readlane_b32 s28, v40, 24 12025; GFX9-NEXT: v_readlane_b32 s27, v40, 23 12026; GFX9-NEXT: v_readlane_b32 s26, v40, 22 12027; GFX9-NEXT: v_readlane_b32 s25, v40, 21 12028; GFX9-NEXT: v_readlane_b32 s24, v40, 20 12029; GFX9-NEXT: v_readlane_b32 s23, v40, 19 12030; GFX9-NEXT: v_readlane_b32 s22, v40, 18 12031; GFX9-NEXT: v_readlane_b32 s21, v40, 17 12032; GFX9-NEXT: v_readlane_b32 s20, v40, 16 12033; GFX9-NEXT: v_readlane_b32 s19, v40, 15 12034; GFX9-NEXT: v_readlane_b32 s18, v40, 14 12035; GFX9-NEXT: v_readlane_b32 s17, v40, 13 12036; GFX9-NEXT: v_readlane_b32 s16, v40, 12 12037; GFX9-NEXT: v_readlane_b32 s15, v40, 11 12038; GFX9-NEXT: v_readlane_b32 s14, v40, 10 12039; GFX9-NEXT: v_readlane_b32 s13, v40, 9 12040; GFX9-NEXT: v_readlane_b32 s12, v40, 8 12041; GFX9-NEXT: v_readlane_b32 s11, v40, 7 12042; GFX9-NEXT: v_readlane_b32 s10, v40, 6 12043; GFX9-NEXT: v_readlane_b32 s9, v40, 5 12044; GFX9-NEXT: v_readlane_b32 s8, v40, 4 12045; GFX9-NEXT: v_readlane_b32 s7, v40, 3 12046; GFX9-NEXT: v_readlane_b32 s6, v40, 2 12047; GFX9-NEXT: v_readlane_b32 s5, v40, 1 12048; GFX9-NEXT: v_readlane_b32 s4, v40, 0 12049; GFX9-NEXT: s_addk_i32 s32, 0xfc00 12050; GFX9-NEXT: v_readlane_b32 s33, v40, 28 12051; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 12052; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 12053; GFX9-NEXT: s_mov_b64 exec, s[34:35] 12054; GFX9-NEXT: s_waitcnt vmcnt(0) 12055; GFX9-NEXT: s_setpc_b64 s[30:31] 12056; 12057; GFX10-LABEL: test_call_external_void_func_v32i32_inreg: 12058; GFX10: ; %bb.0: 12059; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12060; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 12061; GFX10-NEXT: s_or_saveexec_b32 s34, -1 12062; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 12063; GFX10-NEXT: s_waitcnt_depctr 0xffe3 12064; GFX10-NEXT: s_mov_b32 exec_lo, s34 12065; GFX10-NEXT: v_writelane_b32 v40, s33, 28 12066; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 12067; GFX10-NEXT: s_mov_b32 s33, s32 12068; GFX10-NEXT: s_addk_i32 s32, 0x200 12069; GFX10-NEXT: v_writelane_b32 v40, s4, 0 12070; GFX10-NEXT: v_writelane_b32 v40, s5, 1 12071; GFX10-NEXT: v_writelane_b32 v40, s6, 2 12072; GFX10-NEXT: v_writelane_b32 v40, s7, 3 12073; GFX10-NEXT: v_writelane_b32 v40, s8, 4 12074; GFX10-NEXT: v_writelane_b32 v40, s9, 5 12075; GFX10-NEXT: v_writelane_b32 v40, s10, 6 12076; GFX10-NEXT: v_writelane_b32 v40, s11, 7 12077; GFX10-NEXT: v_writelane_b32 v40, s12, 8 12078; GFX10-NEXT: v_writelane_b32 v40, s13, 9 12079; GFX10-NEXT: v_writelane_b32 v40, s14, 10 12080; GFX10-NEXT: v_writelane_b32 v40, s15, 11 12081; GFX10-NEXT: v_writelane_b32 v40, s16, 12 12082; GFX10-NEXT: v_writelane_b32 v40, s17, 13 12083; GFX10-NEXT: v_writelane_b32 v40, s18, 14 12084; GFX10-NEXT: v_writelane_b32 v40, s19, 15 12085; GFX10-NEXT: s_waitcnt lgkmcnt(0) 12086; GFX10-NEXT: s_clause 0x1 12087; GFX10-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 12088; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 12089; GFX10-NEXT: s_getpc_b64 s[34:35] 12090; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_inreg@rel32@lo+4 12091; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_inreg@rel32@hi+12 12092; GFX10-NEXT: v_writelane_b32 v40, s20, 16 12093; GFX10-NEXT: v_writelane_b32 v40, s21, 17 12094; GFX10-NEXT: v_writelane_b32 v40, s22, 18 12095; GFX10-NEXT: s_waitcnt lgkmcnt(0) 12096; GFX10-NEXT: v_mov_b32_e32 v0, s46 12097; GFX10-NEXT: v_writelane_b32 v40, s23, 19 12098; GFX10-NEXT: v_mov_b32_e32 v1, s47 12099; GFX10-NEXT: v_mov_b32_e32 v2, s48 12100; GFX10-NEXT: v_mov_b32_e32 v3, s49 12101; GFX10-NEXT: s_mov_b32 s20, s36 12102; GFX10-NEXT: v_writelane_b32 v40, s24, 20 12103; GFX10-NEXT: s_mov_b32 s21, s37 12104; GFX10-NEXT: s_mov_b32 s22, s38 12105; GFX10-NEXT: s_mov_b32 s23, s39 12106; GFX10-NEXT: s_mov_b32 s24, s40 12107; GFX10-NEXT: v_writelane_b32 v40, s25, 21 12108; GFX10-NEXT: s_mov_b32 s25, s41 12109; GFX10-NEXT: v_mov_b32_e32 v4, s50 12110; GFX10-NEXT: v_mov_b32_e32 v5, s51 12111; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 12112; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 12113; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 12114; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 12115; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 12116; GFX10-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:20 12117; GFX10-NEXT: v_writelane_b32 v40, s26, 22 12118; GFX10-NEXT: s_mov_b32 s26, s42 12119; GFX10-NEXT: v_writelane_b32 v40, s27, 23 12120; GFX10-NEXT: s_mov_b32 s27, s43 12121; GFX10-NEXT: v_writelane_b32 v40, s28, 24 12122; GFX10-NEXT: s_mov_b32 s28, s44 12123; GFX10-NEXT: v_writelane_b32 v40, s29, 25 12124; GFX10-NEXT: s_mov_b32 s29, s45 12125; GFX10-NEXT: v_writelane_b32 v40, s30, 26 12126; GFX10-NEXT: v_writelane_b32 v40, s31, 27 12127; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 12128; GFX10-NEXT: v_readlane_b32 s31, v40, 27 12129; GFX10-NEXT: v_readlane_b32 s30, v40, 26 12130; GFX10-NEXT: v_readlane_b32 s29, v40, 25 12131; GFX10-NEXT: v_readlane_b32 s28, v40, 24 12132; GFX10-NEXT: v_readlane_b32 s27, v40, 23 12133; GFX10-NEXT: v_readlane_b32 s26, v40, 22 12134; GFX10-NEXT: v_readlane_b32 s25, v40, 21 12135; GFX10-NEXT: v_readlane_b32 s24, v40, 20 12136; GFX10-NEXT: v_readlane_b32 s23, v40, 19 12137; GFX10-NEXT: v_readlane_b32 s22, v40, 18 12138; GFX10-NEXT: v_readlane_b32 s21, v40, 17 12139; GFX10-NEXT: v_readlane_b32 s20, v40, 16 12140; GFX10-NEXT: v_readlane_b32 s19, v40, 15 12141; GFX10-NEXT: v_readlane_b32 s18, v40, 14 12142; GFX10-NEXT: v_readlane_b32 s17, v40, 13 12143; GFX10-NEXT: v_readlane_b32 s16, v40, 12 12144; GFX10-NEXT: v_readlane_b32 s15, v40, 11 12145; GFX10-NEXT: v_readlane_b32 s14, v40, 10 12146; GFX10-NEXT: v_readlane_b32 s13, v40, 9 12147; GFX10-NEXT: v_readlane_b32 s12, v40, 8 12148; GFX10-NEXT: v_readlane_b32 s11, v40, 7 12149; GFX10-NEXT: v_readlane_b32 s10, v40, 6 12150; GFX10-NEXT: v_readlane_b32 s9, v40, 5 12151; GFX10-NEXT: v_readlane_b32 s8, v40, 4 12152; GFX10-NEXT: v_readlane_b32 s7, v40, 3 12153; GFX10-NEXT: v_readlane_b32 s6, v40, 2 12154; GFX10-NEXT: v_readlane_b32 s5, v40, 1 12155; GFX10-NEXT: v_readlane_b32 s4, v40, 0 12156; GFX10-NEXT: s_addk_i32 s32, 0xfe00 12157; GFX10-NEXT: v_readlane_b32 s33, v40, 28 12158; GFX10-NEXT: s_or_saveexec_b32 s34, -1 12159; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 12160; GFX10-NEXT: s_waitcnt_depctr 0xffe3 12161; GFX10-NEXT: s_mov_b32 exec_lo, s34 12162; GFX10-NEXT: s_waitcnt vmcnt(0) 12163; GFX10-NEXT: s_setpc_b64 s[30:31] 12164; 12165; GFX11-LABEL: test_call_external_void_func_v32i32_inreg: 12166; GFX11: ; %bb.0: 12167; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12168; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 12169; GFX11-NEXT: s_or_saveexec_b32 s0, -1 12170; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 12171; GFX11-NEXT: s_mov_b32 exec_lo, s0 12172; GFX11-NEXT: v_writelane_b32 v40, s33, 28 12173; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 12174; GFX11-NEXT: s_mov_b32 s33, s32 12175; GFX11-NEXT: s_add_i32 s32, s32, 16 12176; GFX11-NEXT: v_writelane_b32 v40, s4, 0 12177; GFX11-NEXT: v_writelane_b32 v40, s5, 1 12178; GFX11-NEXT: v_writelane_b32 v40, s6, 2 12179; GFX11-NEXT: v_writelane_b32 v40, s7, 3 12180; GFX11-NEXT: v_writelane_b32 v40, s8, 4 12181; GFX11-NEXT: v_writelane_b32 v40, s9, 5 12182; GFX11-NEXT: v_writelane_b32 v40, s10, 6 12183; GFX11-NEXT: v_writelane_b32 v40, s11, 7 12184; GFX11-NEXT: v_writelane_b32 v40, s12, 8 12185; GFX11-NEXT: v_writelane_b32 v40, s13, 9 12186; GFX11-NEXT: v_writelane_b32 v40, s14, 10 12187; GFX11-NEXT: v_writelane_b32 v40, s15, 11 12188; GFX11-NEXT: v_writelane_b32 v40, s16, 12 12189; GFX11-NEXT: v_writelane_b32 v40, s17, 13 12190; GFX11-NEXT: v_writelane_b32 v40, s18, 14 12191; GFX11-NEXT: v_writelane_b32 v40, s19, 15 12192; GFX11-NEXT: s_waitcnt lgkmcnt(0) 12193; GFX11-NEXT: s_clause 0x1 12194; GFX11-NEXT: s_load_b512 s[36:51], s[0:1], 0x40 12195; GFX11-NEXT: s_load_b512 s[4:19], s[0:1], 0x0 12196; GFX11-NEXT: s_getpc_b64 s[0:1] 12197; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v32i32_inreg@rel32@lo+4 12198; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32_inreg@rel32@hi+12 12199; GFX11-NEXT: v_writelane_b32 v40, s20, 16 12200; GFX11-NEXT: v_writelane_b32 v40, s21, 17 12201; GFX11-NEXT: v_writelane_b32 v40, s22, 18 12202; GFX11-NEXT: s_waitcnt lgkmcnt(0) 12203; GFX11-NEXT: v_dual_mov_b32 v4, s50 :: v_dual_mov_b32 v5, s51 12204; GFX11-NEXT: v_writelane_b32 v40, s23, 19 12205; GFX11-NEXT: v_dual_mov_b32 v0, s46 :: v_dual_mov_b32 v1, s47 12206; GFX11-NEXT: v_dual_mov_b32 v2, s48 :: v_dual_mov_b32 v3, s49 12207; GFX11-NEXT: v_writelane_b32 v40, s24, 20 12208; GFX11-NEXT: s_mov_b32 s20, s36 12209; GFX11-NEXT: s_mov_b32 s21, s37 12210; GFX11-NEXT: s_mov_b32 s22, s38 12211; GFX11-NEXT: s_mov_b32 s23, s39 12212; GFX11-NEXT: v_writelane_b32 v40, s25, 21 12213; GFX11-NEXT: s_mov_b32 s24, s40 12214; GFX11-NEXT: s_mov_b32 s25, s41 12215; GFX11-NEXT: s_clause 0x1 12216; GFX11-NEXT: scratch_store_b64 off, v[4:5], s32 offset:16 12217; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 12218; GFX11-NEXT: v_writelane_b32 v40, s26, 22 12219; GFX11-NEXT: s_mov_b32 s26, s42 12220; GFX11-NEXT: v_writelane_b32 v40, s27, 23 12221; GFX11-NEXT: s_mov_b32 s27, s43 12222; GFX11-NEXT: v_writelane_b32 v40, s28, 24 12223; GFX11-NEXT: s_mov_b32 s28, s44 12224; GFX11-NEXT: v_writelane_b32 v40, s29, 25 12225; GFX11-NEXT: s_mov_b32 s29, s45 12226; GFX11-NEXT: v_writelane_b32 v40, s30, 26 12227; GFX11-NEXT: v_writelane_b32 v40, s31, 27 12228; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 12229; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 12230; GFX11-NEXT: v_readlane_b32 s31, v40, 27 12231; GFX11-NEXT: v_readlane_b32 s30, v40, 26 12232; GFX11-NEXT: v_readlane_b32 s29, v40, 25 12233; GFX11-NEXT: v_readlane_b32 s28, v40, 24 12234; GFX11-NEXT: v_readlane_b32 s27, v40, 23 12235; GFX11-NEXT: v_readlane_b32 s26, v40, 22 12236; GFX11-NEXT: v_readlane_b32 s25, v40, 21 12237; GFX11-NEXT: v_readlane_b32 s24, v40, 20 12238; GFX11-NEXT: v_readlane_b32 s23, v40, 19 12239; GFX11-NEXT: v_readlane_b32 s22, v40, 18 12240; GFX11-NEXT: v_readlane_b32 s21, v40, 17 12241; GFX11-NEXT: v_readlane_b32 s20, v40, 16 12242; GFX11-NEXT: v_readlane_b32 s19, v40, 15 12243; GFX11-NEXT: v_readlane_b32 s18, v40, 14 12244; GFX11-NEXT: v_readlane_b32 s17, v40, 13 12245; GFX11-NEXT: v_readlane_b32 s16, v40, 12 12246; GFX11-NEXT: v_readlane_b32 s15, v40, 11 12247; GFX11-NEXT: v_readlane_b32 s14, v40, 10 12248; GFX11-NEXT: v_readlane_b32 s13, v40, 9 12249; GFX11-NEXT: v_readlane_b32 s12, v40, 8 12250; GFX11-NEXT: v_readlane_b32 s11, v40, 7 12251; GFX11-NEXT: v_readlane_b32 s10, v40, 6 12252; GFX11-NEXT: v_readlane_b32 s9, v40, 5 12253; GFX11-NEXT: v_readlane_b32 s8, v40, 4 12254; GFX11-NEXT: v_readlane_b32 s7, v40, 3 12255; GFX11-NEXT: v_readlane_b32 s6, v40, 2 12256; GFX11-NEXT: v_readlane_b32 s5, v40, 1 12257; GFX11-NEXT: v_readlane_b32 s4, v40, 0 12258; GFX11-NEXT: s_add_i32 s32, s32, -16 12259; GFX11-NEXT: v_readlane_b32 s33, v40, 28 12260; GFX11-NEXT: s_or_saveexec_b32 s0, -1 12261; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 12262; GFX11-NEXT: s_mov_b32 exec_lo, s0 12263; GFX11-NEXT: s_waitcnt vmcnt(0) 12264; GFX11-NEXT: s_setpc_b64 s[30:31] 12265; 12266; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32_inreg: 12267; GFX10-SCRATCH: ; %bb.0: 12268; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12269; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 12270; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 12271; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 12272; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 12273; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 12274; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 28 12275; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 12276; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 12277; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 12278; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 12279; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 12280; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 12281; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 12282; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 12283; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 12284; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6 12285; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7 12286; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s12, 8 12287; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s13, 9 12288; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s14, 10 12289; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s15, 11 12290; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s16, 12 12291; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s17, 13 12292; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s18, 14 12293; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s19, 15 12294; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) 12295; GFX10-SCRATCH-NEXT: s_clause 0x1 12296; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0x40 12297; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x0 12298; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 12299; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v32i32_inreg@rel32@lo+4 12300; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32_inreg@rel32@hi+12 12301; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s20, 16 12302; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s21, 17 12303; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s22, 18 12304; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) 12305; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, s50 12306; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s23, 19 12307; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, s51 12308; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s46 12309; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, s47 12310; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, s48 12311; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s24, 20 12312; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, s49 12313; GFX10-SCRATCH-NEXT: s_mov_b32 s20, s36 12314; GFX10-SCRATCH-NEXT: s_mov_b32 s21, s37 12315; GFX10-SCRATCH-NEXT: s_mov_b32 s22, s38 12316; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s25, 21 12317; GFX10-SCRATCH-NEXT: s_mov_b32 s23, s39 12318; GFX10-SCRATCH-NEXT: s_mov_b32 s24, s40 12319; GFX10-SCRATCH-NEXT: s_mov_b32 s25, s41 12320; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[4:5], s32 offset:16 12321; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32 12322; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s26, 22 12323; GFX10-SCRATCH-NEXT: s_mov_b32 s26, s42 12324; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s27, 23 12325; GFX10-SCRATCH-NEXT: s_mov_b32 s27, s43 12326; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s28, 24 12327; GFX10-SCRATCH-NEXT: s_mov_b32 s28, s44 12328; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s29, 25 12329; GFX10-SCRATCH-NEXT: s_mov_b32 s29, s45 12330; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 26 12331; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 27 12332; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 12333; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 27 12334; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 26 12335; GFX10-SCRATCH-NEXT: v_readlane_b32 s29, v40, 25 12336; GFX10-SCRATCH-NEXT: v_readlane_b32 s28, v40, 24 12337; GFX10-SCRATCH-NEXT: v_readlane_b32 s27, v40, 23 12338; GFX10-SCRATCH-NEXT: v_readlane_b32 s26, v40, 22 12339; GFX10-SCRATCH-NEXT: v_readlane_b32 s25, v40, 21 12340; GFX10-SCRATCH-NEXT: v_readlane_b32 s24, v40, 20 12341; GFX10-SCRATCH-NEXT: v_readlane_b32 s23, v40, 19 12342; GFX10-SCRATCH-NEXT: v_readlane_b32 s22, v40, 18 12343; GFX10-SCRATCH-NEXT: v_readlane_b32 s21, v40, 17 12344; GFX10-SCRATCH-NEXT: v_readlane_b32 s20, v40, 16 12345; GFX10-SCRATCH-NEXT: v_readlane_b32 s19, v40, 15 12346; GFX10-SCRATCH-NEXT: v_readlane_b32 s18, v40, 14 12347; GFX10-SCRATCH-NEXT: v_readlane_b32 s17, v40, 13 12348; GFX10-SCRATCH-NEXT: v_readlane_b32 s16, v40, 12 12349; GFX10-SCRATCH-NEXT: v_readlane_b32 s15, v40, 11 12350; GFX10-SCRATCH-NEXT: v_readlane_b32 s14, v40, 10 12351; GFX10-SCRATCH-NEXT: v_readlane_b32 s13, v40, 9 12352; GFX10-SCRATCH-NEXT: v_readlane_b32 s12, v40, 8 12353; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7 12354; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6 12355; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 12356; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 12357; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 12358; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 12359; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 12360; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 12361; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 12362; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 28 12363; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 12364; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 12365; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 12366; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 12367; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 12368; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 12369 %ptr = load <32 x i32> addrspace(4)*, <32 x i32> addrspace(4)* addrspace(4)* undef 12370 %val = load <32 x i32>, <32 x i32> addrspace(4)* %ptr 12371 call amdgpu_gfx void @external_void_func_v32i32_inreg(<32 x i32> inreg %val) 12372 ret void 12373} 12374 12375define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { 12376; GFX9-LABEL: test_call_external_void_func_v32i32_i32_inreg: 12377; GFX9: ; %bb.0: 12378; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12379; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 12380; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 12381; GFX9-NEXT: s_mov_b64 exec, s[34:35] 12382; GFX9-NEXT: v_writelane_b32 v40, s33, 28 12383; GFX9-NEXT: v_writelane_b32 v40, s4, 0 12384; GFX9-NEXT: v_writelane_b32 v40, s5, 1 12385; GFX9-NEXT: v_writelane_b32 v40, s6, 2 12386; GFX9-NEXT: v_writelane_b32 v40, s7, 3 12387; GFX9-NEXT: v_writelane_b32 v40, s8, 4 12388; GFX9-NEXT: v_writelane_b32 v40, s9, 5 12389; GFX9-NEXT: v_writelane_b32 v40, s10, 6 12390; GFX9-NEXT: v_writelane_b32 v40, s11, 7 12391; GFX9-NEXT: v_writelane_b32 v40, s12, 8 12392; GFX9-NEXT: v_writelane_b32 v40, s13, 9 12393; GFX9-NEXT: v_writelane_b32 v40, s14, 10 12394; GFX9-NEXT: v_writelane_b32 v40, s15, 11 12395; GFX9-NEXT: v_writelane_b32 v40, s16, 12 12396; GFX9-NEXT: v_writelane_b32 v40, s17, 13 12397; GFX9-NEXT: v_writelane_b32 v40, s18, 14 12398; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 12399; GFX9-NEXT: v_writelane_b32 v40, s19, 15 12400; GFX9-NEXT: v_writelane_b32 v40, s20, 16 12401; GFX9-NEXT: v_writelane_b32 v40, s21, 17 12402; GFX9-NEXT: v_writelane_b32 v40, s22, 18 12403; GFX9-NEXT: v_writelane_b32 v40, s23, 19 12404; GFX9-NEXT: s_waitcnt lgkmcnt(0) 12405; GFX9-NEXT: s_load_dword s52, s[34:35], 0x0 12406; GFX9-NEXT: ; kill: killed $sgpr34_sgpr35 12407; GFX9-NEXT: ; kill: killed $sgpr34_sgpr35 12408; GFX9-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 12409; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 12410; GFX9-NEXT: v_writelane_b32 v40, s24, 20 12411; GFX9-NEXT: v_writelane_b32 v40, s25, 21 12412; GFX9-NEXT: s_mov_b32 s33, s32 12413; GFX9-NEXT: s_addk_i32 s32, 0x400 12414; GFX9-NEXT: v_writelane_b32 v40, s26, 22 12415; GFX9-NEXT: s_waitcnt lgkmcnt(0) 12416; GFX9-NEXT: v_mov_b32_e32 v0, s52 12417; GFX9-NEXT: v_writelane_b32 v40, s27, 23 12418; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 12419; GFX9-NEXT: v_mov_b32_e32 v0, s46 12420; GFX9-NEXT: v_writelane_b32 v40, s28, 24 12421; GFX9-NEXT: v_mov_b32_e32 v1, s47 12422; GFX9-NEXT: v_mov_b32_e32 v2, s48 12423; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 12424; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 12425; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 12426; GFX9-NEXT: v_mov_b32_e32 v0, s49 12427; GFX9-NEXT: v_writelane_b32 v40, s29, 25 12428; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 12429; GFX9-NEXT: v_mov_b32_e32 v0, s50 12430; GFX9-NEXT: v_writelane_b32 v40, s30, 26 12431; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 12432; GFX9-NEXT: v_mov_b32_e32 v0, s51 12433; GFX9-NEXT: s_mov_b32 s20, s36 12434; GFX9-NEXT: s_mov_b32 s21, s37 12435; GFX9-NEXT: s_mov_b32 s22, s38 12436; GFX9-NEXT: s_mov_b32 s23, s39 12437; GFX9-NEXT: s_mov_b32 s24, s40 12438; GFX9-NEXT: s_mov_b32 s25, s41 12439; GFX9-NEXT: s_mov_b32 s26, s42 12440; GFX9-NEXT: s_mov_b32 s27, s43 12441; GFX9-NEXT: s_mov_b32 s28, s44 12442; GFX9-NEXT: s_mov_b32 s29, s45 12443; GFX9-NEXT: v_writelane_b32 v40, s31, 27 12444; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 12445; GFX9-NEXT: s_getpc_b64 s[34:35] 12446; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_i32_inreg@rel32@lo+4 12447; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_i32_inreg@rel32@hi+12 12448; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 12449; GFX9-NEXT: v_readlane_b32 s31, v40, 27 12450; GFX9-NEXT: v_readlane_b32 s30, v40, 26 12451; GFX9-NEXT: v_readlane_b32 s29, v40, 25 12452; GFX9-NEXT: v_readlane_b32 s28, v40, 24 12453; GFX9-NEXT: v_readlane_b32 s27, v40, 23 12454; GFX9-NEXT: v_readlane_b32 s26, v40, 22 12455; GFX9-NEXT: v_readlane_b32 s25, v40, 21 12456; GFX9-NEXT: v_readlane_b32 s24, v40, 20 12457; GFX9-NEXT: v_readlane_b32 s23, v40, 19 12458; GFX9-NEXT: v_readlane_b32 s22, v40, 18 12459; GFX9-NEXT: v_readlane_b32 s21, v40, 17 12460; GFX9-NEXT: v_readlane_b32 s20, v40, 16 12461; GFX9-NEXT: v_readlane_b32 s19, v40, 15 12462; GFX9-NEXT: v_readlane_b32 s18, v40, 14 12463; GFX9-NEXT: v_readlane_b32 s17, v40, 13 12464; GFX9-NEXT: v_readlane_b32 s16, v40, 12 12465; GFX9-NEXT: v_readlane_b32 s15, v40, 11 12466; GFX9-NEXT: v_readlane_b32 s14, v40, 10 12467; GFX9-NEXT: v_readlane_b32 s13, v40, 9 12468; GFX9-NEXT: v_readlane_b32 s12, v40, 8 12469; GFX9-NEXT: v_readlane_b32 s11, v40, 7 12470; GFX9-NEXT: v_readlane_b32 s10, v40, 6 12471; GFX9-NEXT: v_readlane_b32 s9, v40, 5 12472; GFX9-NEXT: v_readlane_b32 s8, v40, 4 12473; GFX9-NEXT: v_readlane_b32 s7, v40, 3 12474; GFX9-NEXT: v_readlane_b32 s6, v40, 2 12475; GFX9-NEXT: v_readlane_b32 s5, v40, 1 12476; GFX9-NEXT: v_readlane_b32 s4, v40, 0 12477; GFX9-NEXT: s_addk_i32 s32, 0xfc00 12478; GFX9-NEXT: v_readlane_b32 s33, v40, 28 12479; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 12480; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 12481; GFX9-NEXT: s_mov_b64 exec, s[34:35] 12482; GFX9-NEXT: s_waitcnt vmcnt(0) 12483; GFX9-NEXT: s_setpc_b64 s[30:31] 12484; 12485; GFX10-LABEL: test_call_external_void_func_v32i32_i32_inreg: 12486; GFX10: ; %bb.0: 12487; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12488; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 12489; GFX10-NEXT: s_or_saveexec_b32 s34, -1 12490; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 12491; GFX10-NEXT: s_waitcnt_depctr 0xffe3 12492; GFX10-NEXT: s_mov_b32 exec_lo, s34 12493; GFX10-NEXT: v_writelane_b32 v40, s33, 28 12494; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 12495; GFX10-NEXT: s_mov_b32 s33, s32 12496; GFX10-NEXT: s_addk_i32 s32, 0x200 12497; GFX10-NEXT: v_writelane_b32 v40, s4, 0 12498; GFX10-NEXT: v_writelane_b32 v40, s5, 1 12499; GFX10-NEXT: v_writelane_b32 v40, s6, 2 12500; GFX10-NEXT: v_writelane_b32 v40, s7, 3 12501; GFX10-NEXT: v_writelane_b32 v40, s8, 4 12502; GFX10-NEXT: v_writelane_b32 v40, s9, 5 12503; GFX10-NEXT: v_writelane_b32 v40, s10, 6 12504; GFX10-NEXT: v_writelane_b32 v40, s11, 7 12505; GFX10-NEXT: v_writelane_b32 v40, s12, 8 12506; GFX10-NEXT: v_writelane_b32 v40, s13, 9 12507; GFX10-NEXT: v_writelane_b32 v40, s14, 10 12508; GFX10-NEXT: v_writelane_b32 v40, s15, 11 12509; GFX10-NEXT: v_writelane_b32 v40, s16, 12 12510; GFX10-NEXT: v_writelane_b32 v40, s17, 13 12511; GFX10-NEXT: v_writelane_b32 v40, s18, 14 12512; GFX10-NEXT: v_writelane_b32 v40, s19, 15 12513; GFX10-NEXT: s_waitcnt lgkmcnt(0) 12514; GFX10-NEXT: s_clause 0x2 12515; GFX10-NEXT: s_load_dword s52, s[34:35], 0x0 12516; GFX10-NEXT: ; meta instruction 12517; GFX10-NEXT: ; meta instruction 12518; GFX10-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 12519; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 12520; GFX10-NEXT: s_getpc_b64 s[34:35] 12521; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_i32_inreg@rel32@lo+4 12522; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_i32_inreg@rel32@hi+12 12523; GFX10-NEXT: v_writelane_b32 v40, s20, 16 12524; GFX10-NEXT: v_writelane_b32 v40, s21, 17 12525; GFX10-NEXT: v_writelane_b32 v40, s22, 18 12526; GFX10-NEXT: s_waitcnt lgkmcnt(0) 12527; GFX10-NEXT: v_mov_b32_e32 v0, s52 12528; GFX10-NEXT: v_mov_b32_e32 v1, s47 12529; GFX10-NEXT: v_writelane_b32 v40, s23, 19 12530; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 12531; GFX10-NEXT: v_mov_b32_e32 v0, s46 12532; GFX10-NEXT: v_mov_b32_e32 v2, s48 12533; GFX10-NEXT: v_mov_b32_e32 v3, s49 12534; GFX10-NEXT: v_writelane_b32 v40, s24, 20 12535; GFX10-NEXT: s_mov_b32 s20, s36 12536; GFX10-NEXT: s_mov_b32 s21, s37 12537; GFX10-NEXT: s_mov_b32 s22, s38 12538; GFX10-NEXT: s_mov_b32 s23, s39 12539; GFX10-NEXT: v_writelane_b32 v40, s25, 21 12540; GFX10-NEXT: s_mov_b32 s24, s40 12541; GFX10-NEXT: s_mov_b32 s25, s41 12542; GFX10-NEXT: v_mov_b32_e32 v4, s50 12543; GFX10-NEXT: v_mov_b32_e32 v5, s51 12544; GFX10-NEXT: v_writelane_b32 v40, s26, 22 12545; GFX10-NEXT: s_mov_b32 s26, s42 12546; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 12547; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 12548; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 12549; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 12550; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 12551; GFX10-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:20 12552; GFX10-NEXT: v_writelane_b32 v40, s27, 23 12553; GFX10-NEXT: s_mov_b32 s27, s43 12554; GFX10-NEXT: v_writelane_b32 v40, s28, 24 12555; GFX10-NEXT: s_mov_b32 s28, s44 12556; GFX10-NEXT: v_writelane_b32 v40, s29, 25 12557; GFX10-NEXT: s_mov_b32 s29, s45 12558; GFX10-NEXT: v_writelane_b32 v40, s30, 26 12559; GFX10-NEXT: v_writelane_b32 v40, s31, 27 12560; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 12561; GFX10-NEXT: v_readlane_b32 s31, v40, 27 12562; GFX10-NEXT: v_readlane_b32 s30, v40, 26 12563; GFX10-NEXT: v_readlane_b32 s29, v40, 25 12564; GFX10-NEXT: v_readlane_b32 s28, v40, 24 12565; GFX10-NEXT: v_readlane_b32 s27, v40, 23 12566; GFX10-NEXT: v_readlane_b32 s26, v40, 22 12567; GFX10-NEXT: v_readlane_b32 s25, v40, 21 12568; GFX10-NEXT: v_readlane_b32 s24, v40, 20 12569; GFX10-NEXT: v_readlane_b32 s23, v40, 19 12570; GFX10-NEXT: v_readlane_b32 s22, v40, 18 12571; GFX10-NEXT: v_readlane_b32 s21, v40, 17 12572; GFX10-NEXT: v_readlane_b32 s20, v40, 16 12573; GFX10-NEXT: v_readlane_b32 s19, v40, 15 12574; GFX10-NEXT: v_readlane_b32 s18, v40, 14 12575; GFX10-NEXT: v_readlane_b32 s17, v40, 13 12576; GFX10-NEXT: v_readlane_b32 s16, v40, 12 12577; GFX10-NEXT: v_readlane_b32 s15, v40, 11 12578; GFX10-NEXT: v_readlane_b32 s14, v40, 10 12579; GFX10-NEXT: v_readlane_b32 s13, v40, 9 12580; GFX10-NEXT: v_readlane_b32 s12, v40, 8 12581; GFX10-NEXT: v_readlane_b32 s11, v40, 7 12582; GFX10-NEXT: v_readlane_b32 s10, v40, 6 12583; GFX10-NEXT: v_readlane_b32 s9, v40, 5 12584; GFX10-NEXT: v_readlane_b32 s8, v40, 4 12585; GFX10-NEXT: v_readlane_b32 s7, v40, 3 12586; GFX10-NEXT: v_readlane_b32 s6, v40, 2 12587; GFX10-NEXT: v_readlane_b32 s5, v40, 1 12588; GFX10-NEXT: v_readlane_b32 s4, v40, 0 12589; GFX10-NEXT: s_addk_i32 s32, 0xfe00 12590; GFX10-NEXT: v_readlane_b32 s33, v40, 28 12591; GFX10-NEXT: s_or_saveexec_b32 s34, -1 12592; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 12593; GFX10-NEXT: s_waitcnt_depctr 0xffe3 12594; GFX10-NEXT: s_mov_b32 exec_lo, s34 12595; GFX10-NEXT: s_waitcnt vmcnt(0) 12596; GFX10-NEXT: s_setpc_b64 s[30:31] 12597; 12598; GFX11-LABEL: test_call_external_void_func_v32i32_i32_inreg: 12599; GFX11: ; %bb.0: 12600; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12601; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 12602; GFX11-NEXT: s_or_saveexec_b32 s0, -1 12603; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 12604; GFX11-NEXT: s_mov_b32 exec_lo, s0 12605; GFX11-NEXT: v_writelane_b32 v40, s33, 28 12606; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 12607; GFX11-NEXT: s_mov_b32 s33, s32 12608; GFX11-NEXT: s_add_i32 s32, s32, 16 12609; GFX11-NEXT: v_writelane_b32 v40, s4, 0 12610; GFX11-NEXT: v_writelane_b32 v40, s5, 1 12611; GFX11-NEXT: v_writelane_b32 v40, s6, 2 12612; GFX11-NEXT: v_writelane_b32 v40, s7, 3 12613; GFX11-NEXT: v_writelane_b32 v40, s8, 4 12614; GFX11-NEXT: v_writelane_b32 v40, s9, 5 12615; GFX11-NEXT: v_writelane_b32 v40, s10, 6 12616; GFX11-NEXT: v_writelane_b32 v40, s11, 7 12617; GFX11-NEXT: v_writelane_b32 v40, s12, 8 12618; GFX11-NEXT: v_writelane_b32 v40, s13, 9 12619; GFX11-NEXT: v_writelane_b32 v40, s14, 10 12620; GFX11-NEXT: v_writelane_b32 v40, s15, 11 12621; GFX11-NEXT: v_writelane_b32 v40, s16, 12 12622; GFX11-NEXT: v_writelane_b32 v40, s17, 13 12623; GFX11-NEXT: v_writelane_b32 v40, s18, 14 12624; GFX11-NEXT: v_writelane_b32 v40, s19, 15 12625; GFX11-NEXT: s_waitcnt lgkmcnt(0) 12626; GFX11-NEXT: s_clause 0x2 12627; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x0 12628; GFX11-NEXT: s_load_b512 s[36:51], s[0:1], 0x40 12629; GFX11-NEXT: s_load_b512 s[4:19], s[0:1], 0x0 12630; GFX11-NEXT: s_getpc_b64 s[0:1] 12631; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v32i32_i32_inreg@rel32@lo+4 12632; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32_i32_inreg@rel32@hi+12 12633; GFX11-NEXT: v_writelane_b32 v40, s20, 16 12634; GFX11-NEXT: v_writelane_b32 v40, s21, 17 12635; GFX11-NEXT: v_writelane_b32 v40, s22, 18 12636; GFX11-NEXT: s_waitcnt lgkmcnt(0) 12637; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v5, s51 12638; GFX11-NEXT: v_writelane_b32 v40, s23, 19 12639; GFX11-NEXT: v_dual_mov_b32 v4, s50 :: v_dual_mov_b32 v1, s47 12640; GFX11-NEXT: v_dual_mov_b32 v0, s46 :: v_dual_mov_b32 v3, s49 12641; GFX11-NEXT: v_writelane_b32 v40, s24, 20 12642; GFX11-NEXT: v_mov_b32_e32 v2, s48 12643; GFX11-NEXT: s_mov_b32 s20, s36 12644; GFX11-NEXT: s_mov_b32 s21, s37 12645; GFX11-NEXT: s_mov_b32 s22, s38 12646; GFX11-NEXT: v_writelane_b32 v40, s25, 21 12647; GFX11-NEXT: s_mov_b32 s23, s39 12648; GFX11-NEXT: s_mov_b32 s24, s40 12649; GFX11-NEXT: s_mov_b32 s25, s41 12650; GFX11-NEXT: s_clause 0x2 12651; GFX11-NEXT: scratch_store_b32 off, v6, s32 offset:24 12652; GFX11-NEXT: scratch_store_b64 off, v[4:5], s32 offset:16 12653; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 12654; GFX11-NEXT: v_writelane_b32 v40, s26, 22 12655; GFX11-NEXT: s_mov_b32 s26, s42 12656; GFX11-NEXT: v_writelane_b32 v40, s27, 23 12657; GFX11-NEXT: s_mov_b32 s27, s43 12658; GFX11-NEXT: v_writelane_b32 v40, s28, 24 12659; GFX11-NEXT: s_mov_b32 s28, s44 12660; GFX11-NEXT: v_writelane_b32 v40, s29, 25 12661; GFX11-NEXT: s_mov_b32 s29, s45 12662; GFX11-NEXT: v_writelane_b32 v40, s30, 26 12663; GFX11-NEXT: v_writelane_b32 v40, s31, 27 12664; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 12665; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 12666; GFX11-NEXT: v_readlane_b32 s31, v40, 27 12667; GFX11-NEXT: v_readlane_b32 s30, v40, 26 12668; GFX11-NEXT: v_readlane_b32 s29, v40, 25 12669; GFX11-NEXT: v_readlane_b32 s28, v40, 24 12670; GFX11-NEXT: v_readlane_b32 s27, v40, 23 12671; GFX11-NEXT: v_readlane_b32 s26, v40, 22 12672; GFX11-NEXT: v_readlane_b32 s25, v40, 21 12673; GFX11-NEXT: v_readlane_b32 s24, v40, 20 12674; GFX11-NEXT: v_readlane_b32 s23, v40, 19 12675; GFX11-NEXT: v_readlane_b32 s22, v40, 18 12676; GFX11-NEXT: v_readlane_b32 s21, v40, 17 12677; GFX11-NEXT: v_readlane_b32 s20, v40, 16 12678; GFX11-NEXT: v_readlane_b32 s19, v40, 15 12679; GFX11-NEXT: v_readlane_b32 s18, v40, 14 12680; GFX11-NEXT: v_readlane_b32 s17, v40, 13 12681; GFX11-NEXT: v_readlane_b32 s16, v40, 12 12682; GFX11-NEXT: v_readlane_b32 s15, v40, 11 12683; GFX11-NEXT: v_readlane_b32 s14, v40, 10 12684; GFX11-NEXT: v_readlane_b32 s13, v40, 9 12685; GFX11-NEXT: v_readlane_b32 s12, v40, 8 12686; GFX11-NEXT: v_readlane_b32 s11, v40, 7 12687; GFX11-NEXT: v_readlane_b32 s10, v40, 6 12688; GFX11-NEXT: v_readlane_b32 s9, v40, 5 12689; GFX11-NEXT: v_readlane_b32 s8, v40, 4 12690; GFX11-NEXT: v_readlane_b32 s7, v40, 3 12691; GFX11-NEXT: v_readlane_b32 s6, v40, 2 12692; GFX11-NEXT: v_readlane_b32 s5, v40, 1 12693; GFX11-NEXT: v_readlane_b32 s4, v40, 0 12694; GFX11-NEXT: s_add_i32 s32, s32, -16 12695; GFX11-NEXT: v_readlane_b32 s33, v40, 28 12696; GFX11-NEXT: s_or_saveexec_b32 s0, -1 12697; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 12698; GFX11-NEXT: s_mov_b32 exec_lo, s0 12699; GFX11-NEXT: s_waitcnt vmcnt(0) 12700; GFX11-NEXT: s_setpc_b64 s[30:31] 12701; 12702; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32_i32_inreg: 12703; GFX10-SCRATCH: ; %bb.0: 12704; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12705; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 12706; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 12707; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 12708; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 12709; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 12710; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 28 12711; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 12712; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 12713; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 12714; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 12715; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 12716; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 12717; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 12718; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 12719; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 12720; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6 12721; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7 12722; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s12, 8 12723; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s13, 9 12724; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s14, 10 12725; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s15, 11 12726; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s16, 12 12727; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s17, 13 12728; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s18, 14 12729; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s19, 15 12730; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) 12731; GFX10-SCRATCH-NEXT: s_clause 0x2 12732; GFX10-SCRATCH-NEXT: s_load_dword s2, s[0:1], 0x0 12733; GFX10-SCRATCH-NEXT: ; meta instruction 12734; GFX10-SCRATCH-NEXT: ; meta instruction 12735; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0x40 12736; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x0 12737; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 12738; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v32i32_i32_inreg@rel32@lo+4 12739; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32_i32_inreg@rel32@hi+12 12740; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s20, 16 12741; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s21, 17 12742; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s22, 18 12743; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) 12744; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, s2 12745; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, s50 12746; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s23, 19 12747; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, s51 12748; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s46 12749; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, s47 12750; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, s48 12751; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s24, 20 12752; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, s49 12753; GFX10-SCRATCH-NEXT: s_mov_b32 s20, s36 12754; GFX10-SCRATCH-NEXT: s_mov_b32 s21, s37 12755; GFX10-SCRATCH-NEXT: s_mov_b32 s22, s38 12756; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s25, 21 12757; GFX10-SCRATCH-NEXT: s_mov_b32 s23, s39 12758; GFX10-SCRATCH-NEXT: s_mov_b32 s24, s40 12759; GFX10-SCRATCH-NEXT: s_mov_b32 s25, s41 12760; GFX10-SCRATCH-NEXT: scratch_store_dword off, v6, s32 offset:24 12761; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[4:5], s32 offset:16 12762; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32 12763; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s26, 22 12764; GFX10-SCRATCH-NEXT: s_mov_b32 s26, s42 12765; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s27, 23 12766; GFX10-SCRATCH-NEXT: s_mov_b32 s27, s43 12767; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s28, 24 12768; GFX10-SCRATCH-NEXT: s_mov_b32 s28, s44 12769; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s29, 25 12770; GFX10-SCRATCH-NEXT: s_mov_b32 s29, s45 12771; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 26 12772; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 27 12773; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 12774; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 27 12775; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 26 12776; GFX10-SCRATCH-NEXT: v_readlane_b32 s29, v40, 25 12777; GFX10-SCRATCH-NEXT: v_readlane_b32 s28, v40, 24 12778; GFX10-SCRATCH-NEXT: v_readlane_b32 s27, v40, 23 12779; GFX10-SCRATCH-NEXT: v_readlane_b32 s26, v40, 22 12780; GFX10-SCRATCH-NEXT: v_readlane_b32 s25, v40, 21 12781; GFX10-SCRATCH-NEXT: v_readlane_b32 s24, v40, 20 12782; GFX10-SCRATCH-NEXT: v_readlane_b32 s23, v40, 19 12783; GFX10-SCRATCH-NEXT: v_readlane_b32 s22, v40, 18 12784; GFX10-SCRATCH-NEXT: v_readlane_b32 s21, v40, 17 12785; GFX10-SCRATCH-NEXT: v_readlane_b32 s20, v40, 16 12786; GFX10-SCRATCH-NEXT: v_readlane_b32 s19, v40, 15 12787; GFX10-SCRATCH-NEXT: v_readlane_b32 s18, v40, 14 12788; GFX10-SCRATCH-NEXT: v_readlane_b32 s17, v40, 13 12789; GFX10-SCRATCH-NEXT: v_readlane_b32 s16, v40, 12 12790; GFX10-SCRATCH-NEXT: v_readlane_b32 s15, v40, 11 12791; GFX10-SCRATCH-NEXT: v_readlane_b32 s14, v40, 10 12792; GFX10-SCRATCH-NEXT: v_readlane_b32 s13, v40, 9 12793; GFX10-SCRATCH-NEXT: v_readlane_b32 s12, v40, 8 12794; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7 12795; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6 12796; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 12797; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 12798; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 12799; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 12800; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 12801; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 12802; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 12803; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 28 12804; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 12805; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 12806; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 12807; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 12808; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 12809; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 12810 %ptr0 = load <32 x i32> addrspace(4)*, <32 x i32> addrspace(4)* addrspace(4)* undef 12811 %val0 = load <32 x i32>, <32 x i32> addrspace(4)* %ptr0 12812 %val1 = load i32, i32 addrspace(4)* undef 12813 call amdgpu_gfx void @external_void_func_v32i32_i32_inreg(<32 x i32> inreg %val0, i32 inreg %val1) 12814 ret void 12815} 12816 12817define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 { 12818; GFX9-LABEL: stack_passed_arg_alignment_v32i32_f64: 12819; GFX9: ; %bb.0: ; %entry 12820; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12821; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 12822; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill 12823; GFX9-NEXT: s_mov_b64 exec, s[34:35] 12824; GFX9-NEXT: v_writelane_b32 v40, s33, 2 12825; GFX9-NEXT: s_mov_b32 s33, s32 12826; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s33 12827; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:4 12828; GFX9-NEXT: s_addk_i32 s32, 0x400 12829; GFX9-NEXT: v_writelane_b32 v40, s30, 0 12830; GFX9-NEXT: v_writelane_b32 v40, s31, 1 12831; GFX9-NEXT: s_getpc_b64 s[34:35] 12832; GFX9-NEXT: s_add_u32 s34, s34, stack_passed_f64_arg@rel32@lo+4 12833; GFX9-NEXT: s_addc_u32 s35, s35, stack_passed_f64_arg@rel32@hi+12 12834; GFX9-NEXT: s_waitcnt vmcnt(1) 12835; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 12836; GFX9-NEXT: s_waitcnt vmcnt(1) 12837; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 12838; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 12839; GFX9-NEXT: v_readlane_b32 s31, v40, 1 12840; GFX9-NEXT: v_readlane_b32 s30, v40, 0 12841; GFX9-NEXT: s_addk_i32 s32, 0xfc00 12842; GFX9-NEXT: v_readlane_b32 s33, v40, 2 12843; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 12844; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload 12845; GFX9-NEXT: s_mov_b64 exec, s[34:35] 12846; GFX9-NEXT: s_waitcnt vmcnt(0) 12847; GFX9-NEXT: s_setpc_b64 s[30:31] 12848; 12849; GFX10-LABEL: stack_passed_arg_alignment_v32i32_f64: 12850; GFX10: ; %bb.0: ; %entry 12851; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12852; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 12853; GFX10-NEXT: s_or_saveexec_b32 s34, -1 12854; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill 12855; GFX10-NEXT: s_waitcnt_depctr 0xffe3 12856; GFX10-NEXT: s_mov_b32 exec_lo, s34 12857; GFX10-NEXT: v_writelane_b32 v40, s33, 2 12858; GFX10-NEXT: s_mov_b32 s33, s32 12859; GFX10-NEXT: s_clause 0x1 12860; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s33 12861; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:4 12862; GFX10-NEXT: s_addk_i32 s32, 0x200 12863; GFX10-NEXT: v_writelane_b32 v40, s30, 0 12864; GFX10-NEXT: s_getpc_b64 s[34:35] 12865; GFX10-NEXT: s_add_u32 s34, s34, stack_passed_f64_arg@rel32@lo+4 12866; GFX10-NEXT: s_addc_u32 s35, s35, stack_passed_f64_arg@rel32@hi+12 12867; GFX10-NEXT: s_waitcnt vmcnt(1) 12868; GFX10-NEXT: buffer_store_dword v32, off, s[0:3], s32 12869; GFX10-NEXT: s_waitcnt vmcnt(0) 12870; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 12871; GFX10-NEXT: v_writelane_b32 v40, s31, 1 12872; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 12873; GFX10-NEXT: v_readlane_b32 s31, v40, 1 12874; GFX10-NEXT: v_readlane_b32 s30, v40, 0 12875; GFX10-NEXT: s_addk_i32 s32, 0xfe00 12876; GFX10-NEXT: v_readlane_b32 s33, v40, 2 12877; GFX10-NEXT: s_or_saveexec_b32 s34, -1 12878; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload 12879; GFX10-NEXT: s_waitcnt_depctr 0xffe3 12880; GFX10-NEXT: s_mov_b32 exec_lo, s34 12881; GFX10-NEXT: s_waitcnt vmcnt(0) 12882; GFX10-NEXT: s_setpc_b64 s[30:31] 12883; 12884; GFX11-LABEL: stack_passed_arg_alignment_v32i32_f64: 12885; GFX11: ; %bb.0: ; %entry 12886; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12887; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 12888; GFX11-NEXT: s_or_saveexec_b32 s0, -1 12889; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:8 ; 4-byte Folded Spill 12890; GFX11-NEXT: s_mov_b32 exec_lo, s0 12891; GFX11-NEXT: v_writelane_b32 v40, s33, 2 12892; GFX11-NEXT: s_mov_b32 s33, s32 12893; GFX11-NEXT: s_add_i32 s32, s32, 16 12894; GFX11-NEXT: scratch_load_b64 v[32:33], off, s33 12895; GFX11-NEXT: s_getpc_b64 s[0:1] 12896; GFX11-NEXT: s_add_u32 s0, s0, stack_passed_f64_arg@rel32@lo+4 12897; GFX11-NEXT: s_addc_u32 s1, s1, stack_passed_f64_arg@rel32@hi+12 12898; GFX11-NEXT: v_writelane_b32 v40, s30, 0 12899; GFX11-NEXT: v_writelane_b32 v40, s31, 1 12900; GFX11-NEXT: s_waitcnt vmcnt(0) 12901; GFX11-NEXT: scratch_store_b64 off, v[32:33], s32 12902; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 12903; GFX11-NEXT: v_readlane_b32 s31, v40, 1 12904; GFX11-NEXT: v_readlane_b32 s30, v40, 0 12905; GFX11-NEXT: s_add_i32 s32, s32, -16 12906; GFX11-NEXT: v_readlane_b32 s33, v40, 2 12907; GFX11-NEXT: s_or_saveexec_b32 s0, -1 12908; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:8 ; 4-byte Folded Reload 12909; GFX11-NEXT: s_mov_b32 exec_lo, s0 12910; GFX11-NEXT: s_waitcnt vmcnt(0) 12911; GFX11-NEXT: s_setpc_b64 s[30:31] 12912; 12913; GFX10-SCRATCH-LABEL: stack_passed_arg_alignment_v32i32_f64: 12914; GFX10-SCRATCH: ; %bb.0: ; %entry 12915; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12916; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 12917; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 12918; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 offset:8 ; 4-byte Folded Spill 12919; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 12920; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 12921; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 12922; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 12923; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 12924; GFX10-SCRATCH-NEXT: scratch_load_dwordx2 v[32:33], off, s33 12925; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 12926; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, stack_passed_f64_arg@rel32@lo+4 12927; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, stack_passed_f64_arg@rel32@hi+12 12928; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 12929; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 12930; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 12931; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[32:33], s32 12932; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 12933; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 12934; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 12935; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 12936; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 12937; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 12938; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:8 ; 4-byte Folded Reload 12939; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 12940; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 12941; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 12942; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 12943entry: 12944 call amdgpu_gfx void @stack_passed_f64_arg(<32 x i32> %val, double %tmp) 12945 ret void 12946} 12947 12948define amdgpu_gfx void @stack_12xv3i32() #0 { 12949; GFX9-LABEL: stack_12xv3i32: 12950; GFX9: ; %bb.0: ; %entry 12951; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12952; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 12953; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 12954; GFX9-NEXT: s_mov_b64 exec, s[34:35] 12955; GFX9-NEXT: v_writelane_b32 v40, s33, 2 12956; GFX9-NEXT: s_mov_b32 s33, s32 12957; GFX9-NEXT: s_addk_i32 s32, 0x400 12958; GFX9-NEXT: v_mov_b32_e32 v0, 12 12959; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 12960; GFX9-NEXT: v_mov_b32_e32 v0, 13 12961; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 12962; GFX9-NEXT: v_mov_b32_e32 v0, 14 12963; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 12964; GFX9-NEXT: v_mov_b32_e32 v0, 15 12965; GFX9-NEXT: v_writelane_b32 v40, s30, 0 12966; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 12967; GFX9-NEXT: v_mov_b32_e32 v0, 0 12968; GFX9-NEXT: v_mov_b32_e32 v1, 0 12969; GFX9-NEXT: v_mov_b32_e32 v2, 0 12970; GFX9-NEXT: v_mov_b32_e32 v3, 1 12971; GFX9-NEXT: v_mov_b32_e32 v4, 1 12972; GFX9-NEXT: v_mov_b32_e32 v5, 1 12973; GFX9-NEXT: v_mov_b32_e32 v6, 2 12974; GFX9-NEXT: v_mov_b32_e32 v7, 2 12975; GFX9-NEXT: v_mov_b32_e32 v8, 2 12976; GFX9-NEXT: v_mov_b32_e32 v9, 3 12977; GFX9-NEXT: v_mov_b32_e32 v10, 3 12978; GFX9-NEXT: v_mov_b32_e32 v11, 3 12979; GFX9-NEXT: v_mov_b32_e32 v12, 4 12980; GFX9-NEXT: v_mov_b32_e32 v13, 4 12981; GFX9-NEXT: v_mov_b32_e32 v14, 4 12982; GFX9-NEXT: v_mov_b32_e32 v15, 5 12983; GFX9-NEXT: v_mov_b32_e32 v16, 5 12984; GFX9-NEXT: v_mov_b32_e32 v17, 5 12985; GFX9-NEXT: v_mov_b32_e32 v18, 6 12986; GFX9-NEXT: v_mov_b32_e32 v19, 6 12987; GFX9-NEXT: v_mov_b32_e32 v20, 6 12988; GFX9-NEXT: v_mov_b32_e32 v21, 7 12989; GFX9-NEXT: v_mov_b32_e32 v22, 7 12990; GFX9-NEXT: v_mov_b32_e32 v23, 7 12991; GFX9-NEXT: v_mov_b32_e32 v24, 8 12992; GFX9-NEXT: v_mov_b32_e32 v25, 8 12993; GFX9-NEXT: v_mov_b32_e32 v26, 8 12994; GFX9-NEXT: v_mov_b32_e32 v27, 9 12995; GFX9-NEXT: v_mov_b32_e32 v28, 9 12996; GFX9-NEXT: v_mov_b32_e32 v29, 9 12997; GFX9-NEXT: v_mov_b32_e32 v30, 10 12998; GFX9-NEXT: v_mov_b32_e32 v31, 11 12999; GFX9-NEXT: v_writelane_b32 v40, s31, 1 13000; GFX9-NEXT: s_getpc_b64 s[34:35] 13001; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_12xv3i32@rel32@lo+4 13002; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_12xv3i32@rel32@hi+12 13003; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 13004; GFX9-NEXT: v_readlane_b32 s31, v40, 1 13005; GFX9-NEXT: v_readlane_b32 s30, v40, 0 13006; GFX9-NEXT: s_addk_i32 s32, 0xfc00 13007; GFX9-NEXT: v_readlane_b32 s33, v40, 2 13008; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 13009; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 13010; GFX9-NEXT: s_mov_b64 exec, s[34:35] 13011; GFX9-NEXT: s_waitcnt vmcnt(0) 13012; GFX9-NEXT: s_setpc_b64 s[30:31] 13013; 13014; GFX10-LABEL: stack_12xv3i32: 13015; GFX10: ; %bb.0: ; %entry 13016; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13017; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 13018; GFX10-NEXT: s_or_saveexec_b32 s34, -1 13019; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 13020; GFX10-NEXT: s_waitcnt_depctr 0xffe3 13021; GFX10-NEXT: s_mov_b32 exec_lo, s34 13022; GFX10-NEXT: v_writelane_b32 v40, s33, 2 13023; GFX10-NEXT: v_mov_b32_e32 v0, 12 13024; GFX10-NEXT: v_mov_b32_e32 v1, 13 13025; GFX10-NEXT: v_mov_b32_e32 v2, 14 13026; GFX10-NEXT: s_mov_b32 s33, s32 13027; GFX10-NEXT: s_addk_i32 s32, 0x200 13028; GFX10-NEXT: v_mov_b32_e32 v3, 15 13029; GFX10-NEXT: v_writelane_b32 v40, s30, 0 13030; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 13031; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 13032; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 13033; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 13034; GFX10-NEXT: v_mov_b32_e32 v0, 0 13035; GFX10-NEXT: v_mov_b32_e32 v1, 0 13036; GFX10-NEXT: v_mov_b32_e32 v2, 0 13037; GFX10-NEXT: v_mov_b32_e32 v3, 1 13038; GFX10-NEXT: v_mov_b32_e32 v4, 1 13039; GFX10-NEXT: v_mov_b32_e32 v5, 1 13040; GFX10-NEXT: v_mov_b32_e32 v6, 2 13041; GFX10-NEXT: v_mov_b32_e32 v7, 2 13042; GFX10-NEXT: v_mov_b32_e32 v8, 2 13043; GFX10-NEXT: v_mov_b32_e32 v9, 3 13044; GFX10-NEXT: v_mov_b32_e32 v10, 3 13045; GFX10-NEXT: v_mov_b32_e32 v11, 3 13046; GFX10-NEXT: v_mov_b32_e32 v12, 4 13047; GFX10-NEXT: v_mov_b32_e32 v13, 4 13048; GFX10-NEXT: v_mov_b32_e32 v14, 4 13049; GFX10-NEXT: v_mov_b32_e32 v15, 5 13050; GFX10-NEXT: v_mov_b32_e32 v16, 5 13051; GFX10-NEXT: v_mov_b32_e32 v17, 5 13052; GFX10-NEXT: v_mov_b32_e32 v18, 6 13053; GFX10-NEXT: v_mov_b32_e32 v19, 6 13054; GFX10-NEXT: v_mov_b32_e32 v20, 6 13055; GFX10-NEXT: v_mov_b32_e32 v21, 7 13056; GFX10-NEXT: v_mov_b32_e32 v22, 7 13057; GFX10-NEXT: v_mov_b32_e32 v23, 7 13058; GFX10-NEXT: v_mov_b32_e32 v24, 8 13059; GFX10-NEXT: v_mov_b32_e32 v25, 8 13060; GFX10-NEXT: v_mov_b32_e32 v26, 8 13061; GFX10-NEXT: v_mov_b32_e32 v27, 9 13062; GFX10-NEXT: v_mov_b32_e32 v28, 9 13063; GFX10-NEXT: v_mov_b32_e32 v29, 9 13064; GFX10-NEXT: v_mov_b32_e32 v30, 10 13065; GFX10-NEXT: v_mov_b32_e32 v31, 11 13066; GFX10-NEXT: v_writelane_b32 v40, s31, 1 13067; GFX10-NEXT: s_getpc_b64 s[34:35] 13068; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_12xv3i32@rel32@lo+4 13069; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_12xv3i32@rel32@hi+12 13070; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 13071; GFX10-NEXT: v_readlane_b32 s31, v40, 1 13072; GFX10-NEXT: v_readlane_b32 s30, v40, 0 13073; GFX10-NEXT: s_addk_i32 s32, 0xfe00 13074; GFX10-NEXT: v_readlane_b32 s33, v40, 2 13075; GFX10-NEXT: s_or_saveexec_b32 s34, -1 13076; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 13077; GFX10-NEXT: s_waitcnt_depctr 0xffe3 13078; GFX10-NEXT: s_mov_b32 exec_lo, s34 13079; GFX10-NEXT: s_waitcnt vmcnt(0) 13080; GFX10-NEXT: s_setpc_b64 s[30:31] 13081; 13082; GFX11-LABEL: stack_12xv3i32: 13083; GFX11: ; %bb.0: ; %entry 13084; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13085; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 13086; GFX11-NEXT: s_or_saveexec_b32 s0, -1 13087; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 13088; GFX11-NEXT: s_mov_b32 exec_lo, s0 13089; GFX11-NEXT: v_writelane_b32 v40, s33, 2 13090; GFX11-NEXT: v_dual_mov_b32 v0, 12 :: v_dual_mov_b32 v1, 13 13091; GFX11-NEXT: v_dual_mov_b32 v2, 14 :: v_dual_mov_b32 v3, 15 13092; GFX11-NEXT: s_mov_b32 s33, s32 13093; GFX11-NEXT: s_add_i32 s32, s32, 16 13094; GFX11-NEXT: v_writelane_b32 v40, s30, 0 13095; GFX11-NEXT: v_dual_mov_b32 v4, 1 :: v_dual_mov_b32 v5, 1 13096; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 13097; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0 13098; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 1 13099; GFX11-NEXT: v_dual_mov_b32 v6, 2 :: v_dual_mov_b32 v7, 2 13100; GFX11-NEXT: v_dual_mov_b32 v8, 2 :: v_dual_mov_b32 v9, 3 13101; GFX11-NEXT: v_dual_mov_b32 v10, 3 :: v_dual_mov_b32 v11, 3 13102; GFX11-NEXT: v_dual_mov_b32 v12, 4 :: v_dual_mov_b32 v13, 4 13103; GFX11-NEXT: v_dual_mov_b32 v14, 4 :: v_dual_mov_b32 v15, 5 13104; GFX11-NEXT: v_dual_mov_b32 v16, 5 :: v_dual_mov_b32 v17, 5 13105; GFX11-NEXT: v_dual_mov_b32 v18, 6 :: v_dual_mov_b32 v19, 6 13106; GFX11-NEXT: v_dual_mov_b32 v20, 6 :: v_dual_mov_b32 v21, 7 13107; GFX11-NEXT: v_dual_mov_b32 v22, 7 :: v_dual_mov_b32 v23, 7 13108; GFX11-NEXT: v_dual_mov_b32 v24, 8 :: v_dual_mov_b32 v25, 8 13109; GFX11-NEXT: v_dual_mov_b32 v26, 8 :: v_dual_mov_b32 v27, 9 13110; GFX11-NEXT: v_dual_mov_b32 v28, 9 :: v_dual_mov_b32 v29, 9 13111; GFX11-NEXT: v_dual_mov_b32 v30, 10 :: v_dual_mov_b32 v31, 11 13112; GFX11-NEXT: v_writelane_b32 v40, s31, 1 13113; GFX11-NEXT: s_getpc_b64 s[0:1] 13114; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_12xv3i32@rel32@lo+4 13115; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_12xv3i32@rel32@hi+12 13116; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 13117; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 13118; GFX11-NEXT: v_readlane_b32 s31, v40, 1 13119; GFX11-NEXT: v_readlane_b32 s30, v40, 0 13120; GFX11-NEXT: s_add_i32 s32, s32, -16 13121; GFX11-NEXT: v_readlane_b32 s33, v40, 2 13122; GFX11-NEXT: s_or_saveexec_b32 s0, -1 13123; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 13124; GFX11-NEXT: s_mov_b32 exec_lo, s0 13125; GFX11-NEXT: s_waitcnt vmcnt(0) 13126; GFX11-NEXT: s_setpc_b64 s[30:31] 13127; 13128; GFX10-SCRATCH-LABEL: stack_12xv3i32: 13129; GFX10-SCRATCH: ; %bb.0: ; %entry 13130; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13131; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 13132; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 13133; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 13134; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 13135; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 13136; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 13137; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 12 13138; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 13 13139; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 14 13140; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 15 13141; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 13142; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 13143; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 13144; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 1 13145; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32 13146; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 13147; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 13148; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0 13149; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 1 13150; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 1 13151; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 2 13152; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 2 13153; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, 2 13154; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v9, 3 13155; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v10, 3 13156; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v11, 3 13157; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v12, 4 13158; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v13, 4 13159; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v14, 4 13160; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v15, 5 13161; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v16, 5 13162; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v17, 5 13163; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v18, 6 13164; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v19, 6 13165; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v20, 6 13166; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v21, 7 13167; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v22, 7 13168; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v23, 7 13169; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v24, 8 13170; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v25, 8 13171; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v26, 8 13172; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v27, 9 13173; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v28, 9 13174; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v29, 9 13175; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v30, 10 13176; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v31, 11 13177; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 13178; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 13179; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_12xv3i32@rel32@lo+4 13180; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_12xv3i32@rel32@hi+12 13181; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 13182; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 13183; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 13184; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 13185; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 13186; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 13187; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 13188; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 13189; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 13190; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 13191; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 13192entry: 13193 call amdgpu_gfx void @external_void_func_12xv3i32( 13194 <3 x i32><i32 0, i32 0, i32 0>, 13195 <3 x i32><i32 1, i32 1, i32 1>, 13196 <3 x i32><i32 2, i32 2, i32 2>, 13197 <3 x i32><i32 3, i32 3, i32 3>, 13198 <3 x i32><i32 4, i32 4, i32 4>, 13199 <3 x i32><i32 5, i32 5, i32 5>, 13200 <3 x i32><i32 6, i32 6, i32 6>, 13201 <3 x i32><i32 7, i32 7, i32 7>, 13202 <3 x i32><i32 8, i32 8, i32 8>, 13203 <3 x i32><i32 9, i32 9, i32 9>, 13204 <3 x i32><i32 10, i32 11, i32 12>, 13205 <3 x i32><i32 13, i32 14, i32 15>) 13206 ret void 13207} 13208 13209define amdgpu_gfx void @stack_8xv5i32() #0 { 13210; GFX9-LABEL: stack_8xv5i32: 13211; GFX9: ; %bb.0: ; %entry 13212; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13213; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 13214; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 13215; GFX9-NEXT: s_mov_b64 exec, s[34:35] 13216; GFX9-NEXT: v_writelane_b32 v40, s33, 2 13217; GFX9-NEXT: s_mov_b32 s33, s32 13218; GFX9-NEXT: s_addk_i32 s32, 0x400 13219; GFX9-NEXT: v_mov_b32_e32 v0, 8 13220; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 13221; GFX9-NEXT: v_mov_b32_e32 v0, 9 13222; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 13223; GFX9-NEXT: v_mov_b32_e32 v0, 10 13224; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 13225; GFX9-NEXT: v_mov_b32_e32 v0, 11 13226; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 13227; GFX9-NEXT: v_mov_b32_e32 v0, 12 13228; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 13229; GFX9-NEXT: v_mov_b32_e32 v0, 13 13230; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 13231; GFX9-NEXT: v_mov_b32_e32 v0, 14 13232; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 13233; GFX9-NEXT: v_mov_b32_e32 v0, 15 13234; GFX9-NEXT: v_writelane_b32 v40, s30, 0 13235; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 13236; GFX9-NEXT: v_mov_b32_e32 v0, 0 13237; GFX9-NEXT: v_mov_b32_e32 v1, 0 13238; GFX9-NEXT: v_mov_b32_e32 v2, 0 13239; GFX9-NEXT: v_mov_b32_e32 v3, 0 13240; GFX9-NEXT: v_mov_b32_e32 v4, 0 13241; GFX9-NEXT: v_mov_b32_e32 v5, 1 13242; GFX9-NEXT: v_mov_b32_e32 v6, 1 13243; GFX9-NEXT: v_mov_b32_e32 v7, 1 13244; GFX9-NEXT: v_mov_b32_e32 v8, 1 13245; GFX9-NEXT: v_mov_b32_e32 v9, 1 13246; GFX9-NEXT: v_mov_b32_e32 v10, 2 13247; GFX9-NEXT: v_mov_b32_e32 v11, 2 13248; GFX9-NEXT: v_mov_b32_e32 v12, 2 13249; GFX9-NEXT: v_mov_b32_e32 v13, 2 13250; GFX9-NEXT: v_mov_b32_e32 v14, 2 13251; GFX9-NEXT: v_mov_b32_e32 v15, 3 13252; GFX9-NEXT: v_mov_b32_e32 v16, 3 13253; GFX9-NEXT: v_mov_b32_e32 v17, 3 13254; GFX9-NEXT: v_mov_b32_e32 v18, 3 13255; GFX9-NEXT: v_mov_b32_e32 v19, 3 13256; GFX9-NEXT: v_mov_b32_e32 v20, 4 13257; GFX9-NEXT: v_mov_b32_e32 v21, 4 13258; GFX9-NEXT: v_mov_b32_e32 v22, 4 13259; GFX9-NEXT: v_mov_b32_e32 v23, 4 13260; GFX9-NEXT: v_mov_b32_e32 v24, 4 13261; GFX9-NEXT: v_mov_b32_e32 v25, 5 13262; GFX9-NEXT: v_mov_b32_e32 v26, 5 13263; GFX9-NEXT: v_mov_b32_e32 v27, 5 13264; GFX9-NEXT: v_mov_b32_e32 v28, 5 13265; GFX9-NEXT: v_mov_b32_e32 v29, 5 13266; GFX9-NEXT: v_mov_b32_e32 v30, 6 13267; GFX9-NEXT: v_mov_b32_e32 v31, 7 13268; GFX9-NEXT: v_writelane_b32 v40, s31, 1 13269; GFX9-NEXT: s_getpc_b64 s[34:35] 13270; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_8xv5i32@rel32@lo+4 13271; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_8xv5i32@rel32@hi+12 13272; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 13273; GFX9-NEXT: v_readlane_b32 s31, v40, 1 13274; GFX9-NEXT: v_readlane_b32 s30, v40, 0 13275; GFX9-NEXT: s_addk_i32 s32, 0xfc00 13276; GFX9-NEXT: v_readlane_b32 s33, v40, 2 13277; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 13278; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 13279; GFX9-NEXT: s_mov_b64 exec, s[34:35] 13280; GFX9-NEXT: s_waitcnt vmcnt(0) 13281; GFX9-NEXT: s_setpc_b64 s[30:31] 13282; 13283; GFX10-LABEL: stack_8xv5i32: 13284; GFX10: ; %bb.0: ; %entry 13285; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13286; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 13287; GFX10-NEXT: s_or_saveexec_b32 s34, -1 13288; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 13289; GFX10-NEXT: s_waitcnt_depctr 0xffe3 13290; GFX10-NEXT: s_mov_b32 exec_lo, s34 13291; GFX10-NEXT: v_mov_b32_e32 v0, 8 13292; GFX10-NEXT: v_mov_b32_e32 v1, 9 13293; GFX10-NEXT: v_mov_b32_e32 v2, 10 13294; GFX10-NEXT: v_writelane_b32 v40, s33, 2 13295; GFX10-NEXT: s_mov_b32 s33, s32 13296; GFX10-NEXT: s_addk_i32 s32, 0x200 13297; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 13298; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 13299; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 13300; GFX10-NEXT: v_mov_b32_e32 v0, 11 13301; GFX10-NEXT: v_mov_b32_e32 v1, 12 13302; GFX10-NEXT: v_mov_b32_e32 v2, 13 13303; GFX10-NEXT: v_mov_b32_e32 v3, 14 13304; GFX10-NEXT: v_mov_b32_e32 v4, 15 13305; GFX10-NEXT: v_writelane_b32 v40, s30, 0 13306; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 13307; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:16 13308; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:20 13309; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:24 13310; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 13311; GFX10-NEXT: v_mov_b32_e32 v0, 0 13312; GFX10-NEXT: v_mov_b32_e32 v1, 0 13313; GFX10-NEXT: v_mov_b32_e32 v2, 0 13314; GFX10-NEXT: v_mov_b32_e32 v3, 0 13315; GFX10-NEXT: v_mov_b32_e32 v4, 0 13316; GFX10-NEXT: v_mov_b32_e32 v5, 1 13317; GFX10-NEXT: v_mov_b32_e32 v6, 1 13318; GFX10-NEXT: v_mov_b32_e32 v7, 1 13319; GFX10-NEXT: v_mov_b32_e32 v8, 1 13320; GFX10-NEXT: v_mov_b32_e32 v9, 1 13321; GFX10-NEXT: v_mov_b32_e32 v10, 2 13322; GFX10-NEXT: v_mov_b32_e32 v11, 2 13323; GFX10-NEXT: v_mov_b32_e32 v12, 2 13324; GFX10-NEXT: v_mov_b32_e32 v13, 2 13325; GFX10-NEXT: v_mov_b32_e32 v14, 2 13326; GFX10-NEXT: v_mov_b32_e32 v15, 3 13327; GFX10-NEXT: v_mov_b32_e32 v16, 3 13328; GFX10-NEXT: v_mov_b32_e32 v17, 3 13329; GFX10-NEXT: v_mov_b32_e32 v18, 3 13330; GFX10-NEXT: v_mov_b32_e32 v19, 3 13331; GFX10-NEXT: v_mov_b32_e32 v20, 4 13332; GFX10-NEXT: v_mov_b32_e32 v21, 4 13333; GFX10-NEXT: v_mov_b32_e32 v22, 4 13334; GFX10-NEXT: v_mov_b32_e32 v23, 4 13335; GFX10-NEXT: v_mov_b32_e32 v24, 4 13336; GFX10-NEXT: v_mov_b32_e32 v25, 5 13337; GFX10-NEXT: v_mov_b32_e32 v26, 5 13338; GFX10-NEXT: v_mov_b32_e32 v27, 5 13339; GFX10-NEXT: v_mov_b32_e32 v28, 5 13340; GFX10-NEXT: v_mov_b32_e32 v29, 5 13341; GFX10-NEXT: v_mov_b32_e32 v30, 6 13342; GFX10-NEXT: v_mov_b32_e32 v31, 7 13343; GFX10-NEXT: v_writelane_b32 v40, s31, 1 13344; GFX10-NEXT: s_getpc_b64 s[34:35] 13345; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_8xv5i32@rel32@lo+4 13346; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_8xv5i32@rel32@hi+12 13347; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 13348; GFX10-NEXT: v_readlane_b32 s31, v40, 1 13349; GFX10-NEXT: v_readlane_b32 s30, v40, 0 13350; GFX10-NEXT: s_addk_i32 s32, 0xfe00 13351; GFX10-NEXT: v_readlane_b32 s33, v40, 2 13352; GFX10-NEXT: s_or_saveexec_b32 s34, -1 13353; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 13354; GFX10-NEXT: s_waitcnt_depctr 0xffe3 13355; GFX10-NEXT: s_mov_b32 exec_lo, s34 13356; GFX10-NEXT: s_waitcnt vmcnt(0) 13357; GFX10-NEXT: s_setpc_b64 s[30:31] 13358; 13359; GFX11-LABEL: stack_8xv5i32: 13360; GFX11: ; %bb.0: ; %entry 13361; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13362; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 13363; GFX11-NEXT: s_or_saveexec_b32 s0, -1 13364; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 13365; GFX11-NEXT: s_mov_b32 exec_lo, s0 13366; GFX11-NEXT: v_writelane_b32 v40, s33, 2 13367; GFX11-NEXT: v_dual_mov_b32 v0, 12 :: v_dual_mov_b32 v1, 13 13368; GFX11-NEXT: v_dual_mov_b32 v2, 14 :: v_dual_mov_b32 v3, 15 13369; GFX11-NEXT: v_dual_mov_b32 v4, 8 :: v_dual_mov_b32 v5, 9 13370; GFX11-NEXT: v_dual_mov_b32 v6, 10 :: v_dual_mov_b32 v7, 11 13371; GFX11-NEXT: s_mov_b32 s33, s32 13372; GFX11-NEXT: s_add_i32 s32, s32, 16 13373; GFX11-NEXT: v_writelane_b32 v40, s30, 0 13374; GFX11-NEXT: s_clause 0x1 13375; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 offset:16 13376; GFX11-NEXT: scratch_store_b128 off, v[4:7], s32 13377; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0 13378; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0 13379; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 1 13380; GFX11-NEXT: v_dual_mov_b32 v6, 1 :: v_dual_mov_b32 v7, 1 13381; GFX11-NEXT: v_dual_mov_b32 v8, 1 :: v_dual_mov_b32 v9, 1 13382; GFX11-NEXT: v_dual_mov_b32 v10, 2 :: v_dual_mov_b32 v11, 2 13383; GFX11-NEXT: v_dual_mov_b32 v12, 2 :: v_dual_mov_b32 v13, 2 13384; GFX11-NEXT: v_dual_mov_b32 v14, 2 :: v_dual_mov_b32 v15, 3 13385; GFX11-NEXT: v_dual_mov_b32 v16, 3 :: v_dual_mov_b32 v17, 3 13386; GFX11-NEXT: v_dual_mov_b32 v18, 3 :: v_dual_mov_b32 v19, 3 13387; GFX11-NEXT: v_dual_mov_b32 v20, 4 :: v_dual_mov_b32 v21, 4 13388; GFX11-NEXT: v_dual_mov_b32 v22, 4 :: v_dual_mov_b32 v23, 4 13389; GFX11-NEXT: v_dual_mov_b32 v24, 4 :: v_dual_mov_b32 v25, 5 13390; GFX11-NEXT: v_dual_mov_b32 v26, 5 :: v_dual_mov_b32 v27, 5 13391; GFX11-NEXT: v_dual_mov_b32 v28, 5 :: v_dual_mov_b32 v29, 5 13392; GFX11-NEXT: v_dual_mov_b32 v30, 6 :: v_dual_mov_b32 v31, 7 13393; GFX11-NEXT: v_writelane_b32 v40, s31, 1 13394; GFX11-NEXT: s_getpc_b64 s[0:1] 13395; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_8xv5i32@rel32@lo+4 13396; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5i32@rel32@hi+12 13397; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 13398; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 13399; GFX11-NEXT: v_readlane_b32 s31, v40, 1 13400; GFX11-NEXT: v_readlane_b32 s30, v40, 0 13401; GFX11-NEXT: s_add_i32 s32, s32, -16 13402; GFX11-NEXT: v_readlane_b32 s33, v40, 2 13403; GFX11-NEXT: s_or_saveexec_b32 s0, -1 13404; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 13405; GFX11-NEXT: s_mov_b32 exec_lo, s0 13406; GFX11-NEXT: s_waitcnt vmcnt(0) 13407; GFX11-NEXT: s_setpc_b64 s[30:31] 13408; 13409; GFX10-SCRATCH-LABEL: stack_8xv5i32: 13410; GFX10-SCRATCH: ; %bb.0: ; %entry 13411; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13412; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 13413; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 13414; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 13415; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 13416; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 13417; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 13418; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 12 13419; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 13 13420; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 14 13421; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 15 13422; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 8 13423; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 9 13424; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 10 13425; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 11 13426; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 13427; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 13428; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 13429; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:16 13430; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[4:7], s32 13431; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 13432; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 13433; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0 13434; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 0 13435; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0 13436; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 1 13437; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 1 13438; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 1 13439; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, 1 13440; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v9, 1 13441; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v10, 2 13442; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v11, 2 13443; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v12, 2 13444; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v13, 2 13445; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v14, 2 13446; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v15, 3 13447; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v16, 3 13448; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v17, 3 13449; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v18, 3 13450; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v19, 3 13451; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v20, 4 13452; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v21, 4 13453; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v22, 4 13454; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v23, 4 13455; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v24, 4 13456; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v25, 5 13457; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v26, 5 13458; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v27, 5 13459; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v28, 5 13460; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v29, 5 13461; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v30, 6 13462; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v31, 7 13463; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 13464; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 13465; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_8xv5i32@rel32@lo+4 13466; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5i32@rel32@hi+12 13467; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 13468; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 13469; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 13470; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 13471; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 13472; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 13473; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 13474; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 13475; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 13476; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 13477; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 13478entry: 13479 call amdgpu_gfx void @external_void_func_8xv5i32( 13480 <5 x i32><i32 0, i32 0, i32 0, i32 0, i32 0>, 13481 <5 x i32><i32 1, i32 1, i32 1, i32 1, i32 1>, 13482 <5 x i32><i32 2, i32 2, i32 2, i32 2, i32 2>, 13483 <5 x i32><i32 3, i32 3, i32 3, i32 3, i32 3>, 13484 <5 x i32><i32 4, i32 4, i32 4, i32 4, i32 4>, 13485 <5 x i32><i32 5, i32 5, i32 5, i32 5, i32 5>, 13486 <5 x i32><i32 6, i32 7, i32 8, i32 9, i32 10>, 13487 <5 x i32><i32 11, i32 12, i32 13, i32 14, i32 15>) 13488 ret void 13489} 13490 13491define amdgpu_gfx void @stack_8xv5f32() #0 { 13492; GFX9-LABEL: stack_8xv5f32: 13493; GFX9: ; %bb.0: ; %entry 13494; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13495; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 13496; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 13497; GFX9-NEXT: s_mov_b64 exec, s[34:35] 13498; GFX9-NEXT: v_writelane_b32 v40, s33, 2 13499; GFX9-NEXT: s_mov_b32 s33, s32 13500; GFX9-NEXT: s_addk_i32 s32, 0x400 13501; GFX9-NEXT: v_mov_b32_e32 v0, 0x41000000 13502; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 13503; GFX9-NEXT: v_mov_b32_e32 v0, 0x41100000 13504; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 13505; GFX9-NEXT: v_mov_b32_e32 v0, 0x41200000 13506; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 13507; GFX9-NEXT: v_mov_b32_e32 v0, 0x41300000 13508; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 13509; GFX9-NEXT: v_mov_b32_e32 v0, 0x41400000 13510; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 13511; GFX9-NEXT: v_mov_b32_e32 v0, 0x41500000 13512; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 13513; GFX9-NEXT: v_mov_b32_e32 v0, 0x41600000 13514; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 13515; GFX9-NEXT: v_mov_b32_e32 v0, 0x41700000 13516; GFX9-NEXT: v_writelane_b32 v40, s30, 0 13517; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 13518; GFX9-NEXT: v_mov_b32_e32 v0, 0 13519; GFX9-NEXT: v_mov_b32_e32 v1, 0 13520; GFX9-NEXT: v_mov_b32_e32 v2, 0 13521; GFX9-NEXT: v_mov_b32_e32 v3, 0 13522; GFX9-NEXT: v_mov_b32_e32 v4, 0 13523; GFX9-NEXT: v_mov_b32_e32 v5, 1.0 13524; GFX9-NEXT: v_mov_b32_e32 v6, 1.0 13525; GFX9-NEXT: v_mov_b32_e32 v7, 1.0 13526; GFX9-NEXT: v_mov_b32_e32 v8, 1.0 13527; GFX9-NEXT: v_mov_b32_e32 v9, 1.0 13528; GFX9-NEXT: v_mov_b32_e32 v10, 2.0 13529; GFX9-NEXT: v_mov_b32_e32 v11, 2.0 13530; GFX9-NEXT: v_mov_b32_e32 v12, 2.0 13531; GFX9-NEXT: v_mov_b32_e32 v13, 2.0 13532; GFX9-NEXT: v_mov_b32_e32 v14, 2.0 13533; GFX9-NEXT: v_mov_b32_e32 v15, 0x40400000 13534; GFX9-NEXT: v_mov_b32_e32 v16, 0x40400000 13535; GFX9-NEXT: v_mov_b32_e32 v17, 0x40400000 13536; GFX9-NEXT: v_mov_b32_e32 v18, 0x40400000 13537; GFX9-NEXT: v_mov_b32_e32 v19, 0x40400000 13538; GFX9-NEXT: v_mov_b32_e32 v20, 4.0 13539; GFX9-NEXT: v_mov_b32_e32 v21, 4.0 13540; GFX9-NEXT: v_mov_b32_e32 v22, 4.0 13541; GFX9-NEXT: v_mov_b32_e32 v23, 4.0 13542; GFX9-NEXT: v_mov_b32_e32 v24, 4.0 13543; GFX9-NEXT: v_mov_b32_e32 v25, 0x40a00000 13544; GFX9-NEXT: v_mov_b32_e32 v26, 0x40a00000 13545; GFX9-NEXT: v_mov_b32_e32 v27, 0x40a00000 13546; GFX9-NEXT: v_mov_b32_e32 v28, 0x40a00000 13547; GFX9-NEXT: v_mov_b32_e32 v29, 0x40a00000 13548; GFX9-NEXT: v_mov_b32_e32 v30, 0x40c00000 13549; GFX9-NEXT: v_mov_b32_e32 v31, 0x40e00000 13550; GFX9-NEXT: v_writelane_b32 v40, s31, 1 13551; GFX9-NEXT: s_getpc_b64 s[34:35] 13552; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_8xv5f32@rel32@lo+4 13553; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_8xv5f32@rel32@hi+12 13554; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 13555; GFX9-NEXT: v_readlane_b32 s31, v40, 1 13556; GFX9-NEXT: v_readlane_b32 s30, v40, 0 13557; GFX9-NEXT: s_addk_i32 s32, 0xfc00 13558; GFX9-NEXT: v_readlane_b32 s33, v40, 2 13559; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 13560; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 13561; GFX9-NEXT: s_mov_b64 exec, s[34:35] 13562; GFX9-NEXT: s_waitcnt vmcnt(0) 13563; GFX9-NEXT: s_setpc_b64 s[30:31] 13564; 13565; GFX10-LABEL: stack_8xv5f32: 13566; GFX10: ; %bb.0: ; %entry 13567; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13568; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 13569; GFX10-NEXT: s_or_saveexec_b32 s34, -1 13570; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 13571; GFX10-NEXT: s_waitcnt_depctr 0xffe3 13572; GFX10-NEXT: s_mov_b32 exec_lo, s34 13573; GFX10-NEXT: v_mov_b32_e32 v0, 0x41000000 13574; GFX10-NEXT: v_mov_b32_e32 v1, 0x41100000 13575; GFX10-NEXT: v_mov_b32_e32 v2, 0x41200000 13576; GFX10-NEXT: v_writelane_b32 v40, s33, 2 13577; GFX10-NEXT: s_mov_b32 s33, s32 13578; GFX10-NEXT: s_addk_i32 s32, 0x200 13579; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 13580; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 13581; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 13582; GFX10-NEXT: v_mov_b32_e32 v0, 0x41300000 13583; GFX10-NEXT: v_mov_b32_e32 v1, 0x41400000 13584; GFX10-NEXT: v_mov_b32_e32 v2, 0x41500000 13585; GFX10-NEXT: v_mov_b32_e32 v3, 0x41600000 13586; GFX10-NEXT: v_mov_b32_e32 v4, 0x41700000 13587; GFX10-NEXT: v_writelane_b32 v40, s30, 0 13588; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 13589; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:16 13590; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:20 13591; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:24 13592; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 13593; GFX10-NEXT: v_mov_b32_e32 v0, 0 13594; GFX10-NEXT: v_mov_b32_e32 v1, 0 13595; GFX10-NEXT: v_mov_b32_e32 v2, 0 13596; GFX10-NEXT: v_mov_b32_e32 v3, 0 13597; GFX10-NEXT: v_mov_b32_e32 v4, 0 13598; GFX10-NEXT: v_mov_b32_e32 v5, 1.0 13599; GFX10-NEXT: v_mov_b32_e32 v6, 1.0 13600; GFX10-NEXT: v_mov_b32_e32 v7, 1.0 13601; GFX10-NEXT: v_mov_b32_e32 v8, 1.0 13602; GFX10-NEXT: v_mov_b32_e32 v9, 1.0 13603; GFX10-NEXT: v_mov_b32_e32 v10, 2.0 13604; GFX10-NEXT: v_mov_b32_e32 v11, 2.0 13605; GFX10-NEXT: v_mov_b32_e32 v12, 2.0 13606; GFX10-NEXT: v_mov_b32_e32 v13, 2.0 13607; GFX10-NEXT: v_mov_b32_e32 v14, 2.0 13608; GFX10-NEXT: v_mov_b32_e32 v15, 0x40400000 13609; GFX10-NEXT: v_mov_b32_e32 v16, 0x40400000 13610; GFX10-NEXT: v_mov_b32_e32 v17, 0x40400000 13611; GFX10-NEXT: v_mov_b32_e32 v18, 0x40400000 13612; GFX10-NEXT: v_mov_b32_e32 v19, 0x40400000 13613; GFX10-NEXT: v_mov_b32_e32 v20, 4.0 13614; GFX10-NEXT: v_mov_b32_e32 v21, 4.0 13615; GFX10-NEXT: v_mov_b32_e32 v22, 4.0 13616; GFX10-NEXT: v_mov_b32_e32 v23, 4.0 13617; GFX10-NEXT: v_mov_b32_e32 v24, 4.0 13618; GFX10-NEXT: v_mov_b32_e32 v25, 0x40a00000 13619; GFX10-NEXT: v_mov_b32_e32 v26, 0x40a00000 13620; GFX10-NEXT: v_mov_b32_e32 v27, 0x40a00000 13621; GFX10-NEXT: v_mov_b32_e32 v28, 0x40a00000 13622; GFX10-NEXT: v_mov_b32_e32 v29, 0x40a00000 13623; GFX10-NEXT: v_mov_b32_e32 v30, 0x40c00000 13624; GFX10-NEXT: v_mov_b32_e32 v31, 0x40e00000 13625; GFX10-NEXT: v_writelane_b32 v40, s31, 1 13626; GFX10-NEXT: s_getpc_b64 s[34:35] 13627; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_8xv5f32@rel32@lo+4 13628; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_8xv5f32@rel32@hi+12 13629; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 13630; GFX10-NEXT: v_readlane_b32 s31, v40, 1 13631; GFX10-NEXT: v_readlane_b32 s30, v40, 0 13632; GFX10-NEXT: s_addk_i32 s32, 0xfe00 13633; GFX10-NEXT: v_readlane_b32 s33, v40, 2 13634; GFX10-NEXT: s_or_saveexec_b32 s34, -1 13635; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 13636; GFX10-NEXT: s_waitcnt_depctr 0xffe3 13637; GFX10-NEXT: s_mov_b32 exec_lo, s34 13638; GFX10-NEXT: s_waitcnt vmcnt(0) 13639; GFX10-NEXT: s_setpc_b64 s[30:31] 13640; 13641; GFX11-LABEL: stack_8xv5f32: 13642; GFX11: ; %bb.0: ; %entry 13643; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13644; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 13645; GFX11-NEXT: s_or_saveexec_b32 s0, -1 13646; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill 13647; GFX11-NEXT: s_mov_b32 exec_lo, s0 13648; GFX11-NEXT: v_writelane_b32 v40, s33, 2 13649; GFX11-NEXT: v_mov_b32_e32 v0, 0x41400000 13650; GFX11-NEXT: v_mov_b32_e32 v1, 0x41500000 13651; GFX11-NEXT: v_mov_b32_e32 v2, 0x41600000 13652; GFX11-NEXT: v_mov_b32_e32 v3, 0x41700000 13653; GFX11-NEXT: v_mov_b32_e32 v4, 0x41000000 13654; GFX11-NEXT: v_mov_b32_e32 v5, 0x41100000 13655; GFX11-NEXT: v_mov_b32_e32 v6, 0x41200000 13656; GFX11-NEXT: v_mov_b32_e32 v7, 0x41300000 13657; GFX11-NEXT: s_mov_b32 s33, s32 13658; GFX11-NEXT: s_add_i32 s32, s32, 16 13659; GFX11-NEXT: v_writelane_b32 v40, s30, 0 13660; GFX11-NEXT: s_clause 0x1 13661; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 offset:16 13662; GFX11-NEXT: scratch_store_b128 off, v[4:7], s32 13663; GFX11-NEXT: v_mov_b32_e32 v6, 1.0 13664; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0 13665; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0 13666; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 1.0 13667; GFX11-NEXT: v_dual_mov_b32 v7, 1.0 :: v_dual_mov_b32 v8, 1.0 13668; GFX11-NEXT: v_dual_mov_b32 v9, 1.0 :: v_dual_mov_b32 v10, 2.0 13669; GFX11-NEXT: v_dual_mov_b32 v11, 2.0 :: v_dual_mov_b32 v12, 2.0 13670; GFX11-NEXT: v_dual_mov_b32 v13, 2.0 :: v_dual_mov_b32 v14, 2.0 13671; GFX11-NEXT: v_dual_mov_b32 v15, 0x40400000 :: v_dual_mov_b32 v16, 0x40400000 13672; GFX11-NEXT: v_dual_mov_b32 v17, 0x40400000 :: v_dual_mov_b32 v18, 0x40400000 13673; GFX11-NEXT: v_dual_mov_b32 v19, 0x40400000 :: v_dual_mov_b32 v20, 4.0 13674; GFX11-NEXT: v_dual_mov_b32 v21, 4.0 :: v_dual_mov_b32 v22, 4.0 13675; GFX11-NEXT: v_dual_mov_b32 v23, 4.0 :: v_dual_mov_b32 v24, 4.0 13676; GFX11-NEXT: v_dual_mov_b32 v25, 0x40a00000 :: v_dual_mov_b32 v26, 0x40a00000 13677; GFX11-NEXT: v_dual_mov_b32 v27, 0x40a00000 :: v_dual_mov_b32 v28, 0x40a00000 13678; GFX11-NEXT: v_mov_b32_e32 v29, 0x40a00000 13679; GFX11-NEXT: v_mov_b32_e32 v30, 0x40c00000 13680; GFX11-NEXT: v_mov_b32_e32 v31, 0x40e00000 13681; GFX11-NEXT: v_writelane_b32 v40, s31, 1 13682; GFX11-NEXT: s_getpc_b64 s[0:1] 13683; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_8xv5f32@rel32@lo+4 13684; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5f32@rel32@hi+12 13685; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 13686; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 13687; GFX11-NEXT: v_readlane_b32 s31, v40, 1 13688; GFX11-NEXT: v_readlane_b32 s30, v40, 0 13689; GFX11-NEXT: s_add_i32 s32, s32, -16 13690; GFX11-NEXT: v_readlane_b32 s33, v40, 2 13691; GFX11-NEXT: s_or_saveexec_b32 s0, -1 13692; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload 13693; GFX11-NEXT: s_mov_b32 exec_lo, s0 13694; GFX11-NEXT: s_waitcnt vmcnt(0) 13695; GFX11-NEXT: s_setpc_b64 s[30:31] 13696; 13697; GFX10-SCRATCH-LABEL: stack_8xv5f32: 13698; GFX10-SCRATCH: ; %bb.0: ; %entry 13699; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13700; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 13701; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 13702; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill 13703; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 13704; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 13705; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 13706; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x41400000 13707; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x41500000 13708; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0x41600000 13709; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 0x41700000 13710; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0x41000000 13711; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 0x41100000 13712; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 0x41200000 13713; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 0x41300000 13714; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 13715; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 13716; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 13717; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:16 13718; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[4:7], s32 13719; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 13720; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 13721; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0 13722; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 0 13723; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0 13724; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 1.0 13725; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 1.0 13726; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 1.0 13727; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, 1.0 13728; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v9, 1.0 13729; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v10, 2.0 13730; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v11, 2.0 13731; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v12, 2.0 13732; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v13, 2.0 13733; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v14, 2.0 13734; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v15, 0x40400000 13735; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v16, 0x40400000 13736; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v17, 0x40400000 13737; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v18, 0x40400000 13738; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v19, 0x40400000 13739; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v20, 4.0 13740; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v21, 4.0 13741; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v22, 4.0 13742; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v23, 4.0 13743; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v24, 4.0 13744; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v25, 0x40a00000 13745; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v26, 0x40a00000 13746; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v27, 0x40a00000 13747; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v28, 0x40a00000 13748; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v29, 0x40a00000 13749; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v30, 0x40c00000 13750; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v31, 0x40e00000 13751; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 13752; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] 13753; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_8xv5f32@rel32@lo+4 13754; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5f32@rel32@hi+12 13755; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] 13756; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 13757; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 13758; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 13759; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 13760; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 13761; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload 13762; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 13763; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 13764; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) 13765; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] 13766entry: 13767 call amdgpu_gfx void @external_void_func_8xv5f32( 13768 <5 x float><float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>, 13769 <5 x float><float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, 13770 <5 x float><float 2.0, float 2.0, float 2.0, float 2.0, float 2.0>, 13771 <5 x float><float 3.0, float 3.0, float 3.0, float 3.0, float 3.0>, 13772 <5 x float><float 4.0, float 4.0, float 4.0, float 4.0, float 4.0>, 13773 <5 x float><float 5.0, float 5.0, float 5.0, float 5.0, float 5.0>, 13774 <5 x float><float 6.0, float 7.0, float 8.0, float 9.0, float 10.0>, 13775 <5 x float><float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>) 13776 ret void 13777} 13778 13779declare hidden amdgpu_gfx void @byval_align16_f64_arg(<32 x i32>, double addrspace(5)* byval(double) align 16) #0 13780declare hidden amdgpu_gfx void @stack_passed_f64_arg(<32 x i32>, double) #0 13781declare hidden amdgpu_gfx void @external_void_func_12xv3i32(<3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, 13782 <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>) #0 13783declare hidden amdgpu_gfx void @external_void_func_8xv5i32(<5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>, 13784 <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>) #0 13785declare hidden amdgpu_gfx void @external_void_func_12xv3f32(<3 x float>, <3 x float>, <3 x float>, <3 x float>, 13786 <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>) #0 13787declare hidden amdgpu_gfx void @external_void_func_8xv5f32(<5 x float>, <5 x float>, <5 x float>, <5 x float>, 13788 <5 x float>, <5 x float>, <5 x float>, <5 x float>) #0 13789attributes #0 = { nounwind } 13790attributes #1 = { nounwind noinline } 13791