1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s 3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10 %s 4; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX11 %s 5 6; Test using saddr addressing mode of global_*store_* flat instructions. 7 8define amdgpu_ps void @global_store_saddr_i8_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 addrspace(1)* %voffset.ptr, i8 %data) { 9; GCN-LABEL: global_store_saddr_i8_zext_vgpr: 10; GCN: ; %bb.0: 11; GCN-NEXT: global_load_dword v0, v[0:1], off 12; GCN-NEXT: s_waitcnt vmcnt(0) 13; GCN-NEXT: global_store_byte v0, v2, s[2:3] 14; GCN-NEXT: s_endpgm 15; 16; GFX11-LABEL: global_store_saddr_i8_zext_vgpr: 17; GFX11: ; %bb.0: 18; GFX11-NEXT: global_load_b32 v0, v[0:1], off 19; GFX11-NEXT: s_waitcnt vmcnt(0) 20; GFX11-NEXT: global_store_b8 v0, v2, s[2:3] 21; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 22; GFX11-NEXT: s_endpgm 23 %voffset = load i32, i32 addrspace(1)* %voffset.ptr 24 %zext.offset = zext i32 %voffset to i64 25 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 26 store i8 %data, i8 addrspace(1)* %gep0 27 ret void 28} 29 30; Maximum positive offset on gfx10 31define amdgpu_ps void @global_store_saddr_i8_zext_vgpr_offset_2047(i8 addrspace(1)* inreg %sbase, i32 addrspace(1)* %voffset.ptr, i8 %data) { 32; GCN-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047: 33; GCN: ; %bb.0: 34; GCN-NEXT: global_load_dword v0, v[0:1], off 35; GCN-NEXT: s_waitcnt vmcnt(0) 36; GCN-NEXT: global_store_byte v0, v2, s[2:3] offset:2047 37; GCN-NEXT: s_endpgm 38; 39; GFX11-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047: 40; GFX11: ; %bb.0: 41; GFX11-NEXT: global_load_b32 v0, v[0:1], off 42; GFX11-NEXT: s_waitcnt vmcnt(0) 43; GFX11-NEXT: global_store_b8 v0, v2, s[2:3] offset:2047 44; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 45; GFX11-NEXT: s_endpgm 46 %voffset = load i32, i32 addrspace(1)* %voffset.ptr 47 %zext.offset = zext i32 %voffset to i64 48 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 49 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 2047 50 store i8 %data, i8 addrspace(1)* %gep1 51 ret void 52} 53 54; Maximum negative offset on gfx10 55define amdgpu_ps void @global_store_saddr_i8_zext_vgpr_offset_neg2048(i8 addrspace(1)* inreg %sbase, i32 addrspace(1)* %voffset.ptr, i8 %data) { 56; GCN-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048: 57; GCN: ; %bb.0: 58; GCN-NEXT: global_load_dword v0, v[0:1], off 59; GCN-NEXT: s_waitcnt vmcnt(0) 60; GCN-NEXT: global_store_byte v0, v2, s[2:3] offset:-2048 61; GCN-NEXT: s_endpgm 62; 63; GFX11-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048: 64; GFX11: ; %bb.0: 65; GFX11-NEXT: global_load_b32 v0, v[0:1], off 66; GFX11-NEXT: s_waitcnt vmcnt(0) 67; GFX11-NEXT: global_store_b8 v0, v2, s[2:3] offset:-2048 68; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 69; GFX11-NEXT: s_endpgm 70 %voffset = load i32, i32 addrspace(1)* %voffset.ptr 71 %zext.offset = zext i32 %voffset to i64 72 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 73 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -2048 74 store i8 %data, i8 addrspace(1)* %gep1 75 ret void 76} 77 78; -------------------------------------------------------------------------------- 79; Uniformity edge cases 80; -------------------------------------------------------------------------------- 81 82@ptr.in.lds = internal addrspace(3) global i8 addrspace(1)* undef 83 84; Base pointer is uniform, but also in VGPRs 85define amdgpu_ps void @global_store_saddr_uniform_ptr_in_vgprs(i32 %voffset, i8 %data) { 86; GFX9-LABEL: global_store_saddr_uniform_ptr_in_vgprs: 87; GFX9: ; %bb.0: 88; GFX9-NEXT: v_mov_b32_e32 v2, 0 89; GFX9-NEXT: ds_read_b64 v[2:3], v2 90; GFX9-NEXT: s_waitcnt lgkmcnt(0) 91; GFX9-NEXT: v_readfirstlane_b32 s0, v2 92; GFX9-NEXT: v_readfirstlane_b32 s1, v3 93; GFX9-NEXT: s_nop 4 94; GFX9-NEXT: global_store_byte v0, v1, s[0:1] 95; GFX9-NEXT: s_endpgm 96; 97; GFX10-LABEL: global_store_saddr_uniform_ptr_in_vgprs: 98; GFX10: ; %bb.0: 99; GFX10-NEXT: v_mov_b32_e32 v2, 0 100; GFX10-NEXT: ds_read_b64 v[2:3], v2 101; GFX10-NEXT: s_waitcnt lgkmcnt(0) 102; GFX10-NEXT: v_readfirstlane_b32 s0, v2 103; GFX10-NEXT: v_readfirstlane_b32 s1, v3 104; GFX10-NEXT: global_store_byte v0, v1, s[0:1] 105; GFX10-NEXT: s_endpgm 106; 107; GFX11-LABEL: global_store_saddr_uniform_ptr_in_vgprs: 108; GFX11: ; %bb.0: 109; GFX11-NEXT: v_mov_b32_e32 v2, 0 110; GFX11-NEXT: ds_load_b64 v[2:3], v2 111; GFX11-NEXT: s_waitcnt lgkmcnt(0) 112; GFX11-NEXT: v_readfirstlane_b32 s0, v2 113; GFX11-NEXT: v_readfirstlane_b32 s1, v3 114; GFX11-NEXT: global_store_b8 v0, v1, s[0:1] 115; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 116; GFX11-NEXT: s_endpgm 117 %sbase = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(3)* @ptr.in.lds 118 %zext.offset = zext i32 %voffset to i64 119 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 120 store i8 %data, i8 addrspace(1)* %gep0 121 ret void 122} 123 124; Base pointer is uniform, but also in VGPRs, with imm offset 125define amdgpu_ps void @global_store_saddr_uniform_ptr_in_vgprs_immoffset(i32 %voffset, i8 %data) { 126; GFX9-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset: 127; GFX9: ; %bb.0: 128; GFX9-NEXT: v_mov_b32_e32 v2, 0 129; GFX9-NEXT: ds_read_b64 v[2:3], v2 130; GFX9-NEXT: s_waitcnt lgkmcnt(0) 131; GFX9-NEXT: v_readfirstlane_b32 s0, v2 132; GFX9-NEXT: v_readfirstlane_b32 s1, v3 133; GFX9-NEXT: s_nop 4 134; GFX9-NEXT: global_store_byte v0, v1, s[0:1] offset:-120 135; GFX9-NEXT: s_endpgm 136; 137; GFX10-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset: 138; GFX10: ; %bb.0: 139; GFX10-NEXT: v_mov_b32_e32 v2, 0 140; GFX10-NEXT: ds_read_b64 v[2:3], v2 141; GFX10-NEXT: s_waitcnt lgkmcnt(0) 142; GFX10-NEXT: v_readfirstlane_b32 s0, v2 143; GFX10-NEXT: v_readfirstlane_b32 s1, v3 144; GFX10-NEXT: global_store_byte v0, v1, s[0:1] offset:-120 145; GFX10-NEXT: s_endpgm 146; 147; GFX11-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset: 148; GFX11: ; %bb.0: 149; GFX11-NEXT: v_mov_b32_e32 v2, 0 150; GFX11-NEXT: ds_load_b64 v[2:3], v2 151; GFX11-NEXT: s_waitcnt lgkmcnt(0) 152; GFX11-NEXT: v_readfirstlane_b32 s0, v2 153; GFX11-NEXT: v_readfirstlane_b32 s1, v3 154; GFX11-NEXT: global_store_b8 v0, v1, s[0:1] offset:-120 155; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 156; GFX11-NEXT: s_endpgm 157 %sbase = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(3)* @ptr.in.lds 158 %zext.offset = zext i32 %voffset to i64 159 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 160 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -120 161 store i8 %data, i8 addrspace(1)* %gep1 162 ret void 163} 164 165; -------------------------------------------------------------------------------- 166; Stress various type stores 167; -------------------------------------------------------------------------------- 168 169define amdgpu_ps void @global_store_saddr_i16_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, i16 %data) { 170; GCN-LABEL: global_store_saddr_i16_zext_vgpr: 171; GCN: ; %bb.0: 172; GCN-NEXT: global_store_short v0, v1, s[2:3] 173; GCN-NEXT: s_endpgm 174; 175; GFX11-LABEL: global_store_saddr_i16_zext_vgpr: 176; GFX11: ; %bb.0: 177; GFX11-NEXT: global_store_b16 v0, v1, s[2:3] 178; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 179; GFX11-NEXT: s_endpgm 180 %zext.offset = zext i32 %voffset to i64 181 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 182 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to i16 addrspace(1)* 183 store i16 %data, i16 addrspace(1)* %gep0.cast 184 ret void 185} 186 187define amdgpu_ps void @global_store_saddr_i16_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, i16 %data) { 188; GCN-LABEL: global_store_saddr_i16_zext_vgpr_offset_neg128: 189; GCN: ; %bb.0: 190; GCN-NEXT: global_store_short v0, v1, s[2:3] offset:-128 191; GCN-NEXT: s_endpgm 192; 193; GFX11-LABEL: global_store_saddr_i16_zext_vgpr_offset_neg128: 194; GFX11: ; %bb.0: 195; GFX11-NEXT: global_store_b16 v0, v1, s[2:3] offset:-128 196; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 197; GFX11-NEXT: s_endpgm 198 %zext.offset = zext i32 %voffset to i64 199 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 200 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 201 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to i16 addrspace(1)* 202 store i16 %data, i16 addrspace(1)* %gep1.cast 203 ret void 204} 205 206define amdgpu_ps void @global_store_saddr_f16_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, half %data) { 207; GCN-LABEL: global_store_saddr_f16_zext_vgpr: 208; GCN: ; %bb.0: 209; GCN-NEXT: global_store_short v0, v1, s[2:3] 210; GCN-NEXT: s_endpgm 211; 212; GFX11-LABEL: global_store_saddr_f16_zext_vgpr: 213; GFX11: ; %bb.0: 214; GFX11-NEXT: global_store_b16 v0, v1, s[2:3] 215; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 216; GFX11-NEXT: s_endpgm 217 %zext.offset = zext i32 %voffset to i64 218 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 219 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to half addrspace(1)* 220 store half %data, half addrspace(1)* %gep0.cast 221 ret void 222} 223 224define amdgpu_ps void @global_store_saddr_f16_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, half %data) { 225; GCN-LABEL: global_store_saddr_f16_zext_vgpr_offset_neg128: 226; GCN: ; %bb.0: 227; GCN-NEXT: global_store_short v0, v1, s[2:3] offset:-128 228; GCN-NEXT: s_endpgm 229; 230; GFX11-LABEL: global_store_saddr_f16_zext_vgpr_offset_neg128: 231; GFX11: ; %bb.0: 232; GFX11-NEXT: global_store_b16 v0, v1, s[2:3] offset:-128 233; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 234; GFX11-NEXT: s_endpgm 235 %zext.offset = zext i32 %voffset to i64 236 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 237 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 238 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to half addrspace(1)* 239 store half %data, half addrspace(1)* %gep1.cast 240 ret void 241} 242 243define amdgpu_ps void @global_store_saddr_i32_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, i32 %data) { 244; GCN-LABEL: global_store_saddr_i32_zext_vgpr: 245; GCN: ; %bb.0: 246; GCN-NEXT: global_store_dword v0, v1, s[2:3] 247; GCN-NEXT: s_endpgm 248; 249; GFX11-LABEL: global_store_saddr_i32_zext_vgpr: 250; GFX11: ; %bb.0: 251; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] 252; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 253; GFX11-NEXT: s_endpgm 254 %zext.offset = zext i32 %voffset to i64 255 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 256 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to i32 addrspace(1)* 257 store i32 %data, i32 addrspace(1)* %gep0.cast 258 ret void 259} 260 261define amdgpu_ps void @global_store_saddr_i32_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, i32 %data) { 262; GCN-LABEL: global_store_saddr_i32_zext_vgpr_offset_neg128: 263; GCN: ; %bb.0: 264; GCN-NEXT: global_store_dword v0, v1, s[2:3] offset:-128 265; GCN-NEXT: s_endpgm 266; 267; GFX11-LABEL: global_store_saddr_i32_zext_vgpr_offset_neg128: 268; GFX11: ; %bb.0: 269; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128 270; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 271; GFX11-NEXT: s_endpgm 272 %zext.offset = zext i32 %voffset to i64 273 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 274 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 275 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to i32 addrspace(1)* 276 store i32 %data, i32 addrspace(1)* %gep1.cast 277 ret void 278} 279 280define amdgpu_ps void @global_store_saddr_f32_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, float %data) { 281; GCN-LABEL: global_store_saddr_f32_zext_vgpr: 282; GCN: ; %bb.0: 283; GCN-NEXT: global_store_dword v0, v1, s[2:3] 284; GCN-NEXT: s_endpgm 285; 286; GFX11-LABEL: global_store_saddr_f32_zext_vgpr: 287; GFX11: ; %bb.0: 288; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] 289; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 290; GFX11-NEXT: s_endpgm 291 %zext.offset = zext i32 %voffset to i64 292 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 293 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to float addrspace(1)* 294 store float %data, float addrspace(1)* %gep0.cast 295 ret void 296} 297 298define amdgpu_ps void @global_store_saddr_f32_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, float %data) { 299; GCN-LABEL: global_store_saddr_f32_zext_vgpr_offset_neg128: 300; GCN: ; %bb.0: 301; GCN-NEXT: global_store_dword v0, v1, s[2:3] offset:-128 302; GCN-NEXT: s_endpgm 303; 304; GFX11-LABEL: global_store_saddr_f32_zext_vgpr_offset_neg128: 305; GFX11: ; %bb.0: 306; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128 307; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 308; GFX11-NEXT: s_endpgm 309 %zext.offset = zext i32 %voffset to i64 310 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 311 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 312 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to float addrspace(1)* 313 store float %data, float addrspace(1)* %gep1.cast 314 ret void 315} 316 317define amdgpu_ps void @global_store_saddr_p3_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, i8 addrspace(3)* %data) { 318; GCN-LABEL: global_store_saddr_p3_zext_vgpr: 319; GCN: ; %bb.0: 320; GCN-NEXT: global_store_dword v0, v1, s[2:3] 321; GCN-NEXT: s_endpgm 322; 323; GFX11-LABEL: global_store_saddr_p3_zext_vgpr: 324; GFX11: ; %bb.0: 325; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] 326; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 327; GFX11-NEXT: s_endpgm 328 %zext.offset = zext i32 %voffset to i64 329 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 330 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to i8 addrspace(3)* addrspace(1)* 331 store i8 addrspace(3)* %data, i8 addrspace(3)* addrspace(1)* %gep0.cast 332 ret void 333} 334 335define amdgpu_ps void @global_store_saddr_p3_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, i8 addrspace(3)* %data) { 336; GCN-LABEL: global_store_saddr_p3_zext_vgpr_offset_neg128: 337; GCN: ; %bb.0: 338; GCN-NEXT: global_store_dword v0, v1, s[2:3] offset:-128 339; GCN-NEXT: s_endpgm 340; 341; GFX11-LABEL: global_store_saddr_p3_zext_vgpr_offset_neg128: 342; GFX11: ; %bb.0: 343; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128 344; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 345; GFX11-NEXT: s_endpgm 346 %zext.offset = zext i32 %voffset to i64 347 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 348 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 349 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to i8 addrspace(3)* addrspace(1)* 350 store i8 addrspace(3)* %data, i8 addrspace(3)* addrspace(1)* %gep1.cast 351 ret void 352} 353 354define amdgpu_ps void @global_store_saddr_i64_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, i64 %data) { 355; GCN-LABEL: global_store_saddr_i64_zext_vgpr: 356; GCN: ; %bb.0: 357; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] 358; GCN-NEXT: s_endpgm 359; 360; GFX11-LABEL: global_store_saddr_i64_zext_vgpr: 361; GFX11: ; %bb.0: 362; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] 363; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 364; GFX11-NEXT: s_endpgm 365 %zext.offset = zext i32 %voffset to i64 366 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 367 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to i64 addrspace(1)* 368 store i64 %data, i64 addrspace(1)* %gep0.cast 369 ret void 370} 371 372define amdgpu_ps void @global_store_saddr_i64_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, i64 %data) { 373; GCN-LABEL: global_store_saddr_i64_zext_vgpr_offset_neg128: 374; GCN: ; %bb.0: 375; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128 376; GCN-NEXT: s_endpgm 377; 378; GFX11-LABEL: global_store_saddr_i64_zext_vgpr_offset_neg128: 379; GFX11: ; %bb.0: 380; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128 381; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 382; GFX11-NEXT: s_endpgm 383 %zext.offset = zext i32 %voffset to i64 384 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 385 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 386 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to i64 addrspace(1)* 387 store i64 %data, i64 addrspace(1)* %gep1.cast 388 ret void 389} 390 391define amdgpu_ps void @global_store_saddr_f64_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, double %data) { 392; GCN-LABEL: global_store_saddr_f64_zext_vgpr: 393; GCN: ; %bb.0: 394; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] 395; GCN-NEXT: s_endpgm 396; 397; GFX11-LABEL: global_store_saddr_f64_zext_vgpr: 398; GFX11: ; %bb.0: 399; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] 400; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 401; GFX11-NEXT: s_endpgm 402 %zext.offset = zext i32 %voffset to i64 403 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 404 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to double addrspace(1)* 405 store double %data, double addrspace(1)* %gep0.cast 406 ret void 407} 408 409define amdgpu_ps void @global_store_saddr_f64_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, double %data) { 410; GCN-LABEL: global_store_saddr_f64_zext_vgpr_offset_neg128: 411; GCN: ; %bb.0: 412; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128 413; GCN-NEXT: s_endpgm 414; 415; GFX11-LABEL: global_store_saddr_f64_zext_vgpr_offset_neg128: 416; GFX11: ; %bb.0: 417; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128 418; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 419; GFX11-NEXT: s_endpgm 420 %zext.offset = zext i32 %voffset to i64 421 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 422 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 423 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to double addrspace(1)* 424 store double %data, double addrspace(1)* %gep1.cast 425 ret void 426} 427 428define amdgpu_ps void @global_store_saddr_v2i32_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i32> %data) { 429; GCN-LABEL: global_store_saddr_v2i32_zext_vgpr: 430; GCN: ; %bb.0: 431; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] 432; GCN-NEXT: s_endpgm 433; 434; GFX11-LABEL: global_store_saddr_v2i32_zext_vgpr: 435; GFX11: ; %bb.0: 436; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] 437; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 438; GFX11-NEXT: s_endpgm 439 %zext.offset = zext i32 %voffset to i64 440 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 441 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <2 x i32> addrspace(1)* 442 store <2 x i32> %data, <2 x i32> addrspace(1)* %gep0.cast 443 ret void 444} 445 446define amdgpu_ps void @global_store_saddr_v2i32_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i32> %data) { 447; GCN-LABEL: global_store_saddr_v2i32_zext_vgpr_offset_neg128: 448; GCN: ; %bb.0: 449; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128 450; GCN-NEXT: s_endpgm 451; 452; GFX11-LABEL: global_store_saddr_v2i32_zext_vgpr_offset_neg128: 453; GFX11: ; %bb.0: 454; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128 455; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 456; GFX11-NEXT: s_endpgm 457 %zext.offset = zext i32 %voffset to i64 458 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 459 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 460 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <2 x i32> addrspace(1)* 461 store <2 x i32> %data, <2 x i32> addrspace(1)* %gep1.cast 462 ret void 463} 464 465define amdgpu_ps void @global_store_saddr_v2f32_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x float> %data) { 466; GCN-LABEL: global_store_saddr_v2f32_zext_vgpr: 467; GCN: ; %bb.0: 468; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] 469; GCN-NEXT: s_endpgm 470; 471; GFX11-LABEL: global_store_saddr_v2f32_zext_vgpr: 472; GFX11: ; %bb.0: 473; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] 474; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 475; GFX11-NEXT: s_endpgm 476 %zext.offset = zext i32 %voffset to i64 477 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 478 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <2 x float> addrspace(1)* 479 store <2 x float> %data, <2 x float> addrspace(1)* %gep0.cast 480 ret void 481} 482 483define amdgpu_ps void @global_store_saddr_v2f32_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x float> %data) { 484; GCN-LABEL: global_store_saddr_v2f32_zext_vgpr_offset_neg128: 485; GCN: ; %bb.0: 486; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128 487; GCN-NEXT: s_endpgm 488; 489; GFX11-LABEL: global_store_saddr_v2f32_zext_vgpr_offset_neg128: 490; GFX11: ; %bb.0: 491; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128 492; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 493; GFX11-NEXT: s_endpgm 494 %zext.offset = zext i32 %voffset to i64 495 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 496 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 497 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <2 x float> addrspace(1)* 498 store <2 x float> %data, <2 x float> addrspace(1)* %gep1.cast 499 ret void 500} 501 502define amdgpu_ps void @global_store_saddr_v4i16_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x i16> %data) { 503; GCN-LABEL: global_store_saddr_v4i16_zext_vgpr: 504; GCN: ; %bb.0: 505; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] 506; GCN-NEXT: s_endpgm 507; 508; GFX11-LABEL: global_store_saddr_v4i16_zext_vgpr: 509; GFX11: ; %bb.0: 510; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] 511; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 512; GFX11-NEXT: s_endpgm 513 %zext.offset = zext i32 %voffset to i64 514 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 515 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <4 x i16> addrspace(1)* 516 store <4 x i16> %data, <4 x i16> addrspace(1)* %gep0.cast 517 ret void 518} 519 520define amdgpu_ps void @global_store_saddr_v4i16_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x i16> %data) { 521; GCN-LABEL: global_store_saddr_v4i16_zext_vgpr_offset_neg128: 522; GCN: ; %bb.0: 523; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128 524; GCN-NEXT: s_endpgm 525; 526; GFX11-LABEL: global_store_saddr_v4i16_zext_vgpr_offset_neg128: 527; GFX11: ; %bb.0: 528; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128 529; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 530; GFX11-NEXT: s_endpgm 531 %zext.offset = zext i32 %voffset to i64 532 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 533 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 534 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <4 x i16> addrspace(1)* 535 store <4 x i16> %data, <4 x i16> addrspace(1)* %gep1.cast 536 ret void 537} 538 539define amdgpu_ps void @global_store_saddr_v4f16_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x half> %data) { 540; GCN-LABEL: global_store_saddr_v4f16_zext_vgpr: 541; GCN: ; %bb.0: 542; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] 543; GCN-NEXT: s_endpgm 544; 545; GFX11-LABEL: global_store_saddr_v4f16_zext_vgpr: 546; GFX11: ; %bb.0: 547; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] 548; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 549; GFX11-NEXT: s_endpgm 550 %zext.offset = zext i32 %voffset to i64 551 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 552 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <4 x half> addrspace(1)* 553 store <4 x half> %data, <4 x half> addrspace(1)* %gep0.cast 554 ret void 555} 556 557define amdgpu_ps void @global_store_saddr_v4f16_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x half> %data) { 558; GCN-LABEL: global_store_saddr_v4f16_zext_vgpr_offset_neg128: 559; GCN: ; %bb.0: 560; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128 561; GCN-NEXT: s_endpgm 562; 563; GFX11-LABEL: global_store_saddr_v4f16_zext_vgpr_offset_neg128: 564; GFX11: ; %bb.0: 565; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128 566; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 567; GFX11-NEXT: s_endpgm 568 %zext.offset = zext i32 %voffset to i64 569 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 570 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 571 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <4 x half> addrspace(1)* 572 store <4 x half> %data, <4 x half> addrspace(1)* %gep1.cast 573 ret void 574} 575 576define amdgpu_ps void @global_store_saddr_p1_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, i8 addrspace(1)* %data) { 577; GCN-LABEL: global_store_saddr_p1_zext_vgpr: 578; GCN: ; %bb.0: 579; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] 580; GCN-NEXT: s_endpgm 581; 582; GFX11-LABEL: global_store_saddr_p1_zext_vgpr: 583; GFX11: ; %bb.0: 584; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] 585; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 586; GFX11-NEXT: s_endpgm 587 %zext.offset = zext i32 %voffset to i64 588 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 589 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to i8 addrspace(1)* addrspace(1)* 590 store i8 addrspace(1)* %data, i8 addrspace(1)* addrspace(1)* %gep0.cast 591 ret void 592} 593 594define amdgpu_ps void @global_store_saddr_p1_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, i8 addrspace(1)* %data) { 595; GCN-LABEL: global_store_saddr_p1_zext_vgpr_offset_neg128: 596; GCN: ; %bb.0: 597; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128 598; GCN-NEXT: s_endpgm 599; 600; GFX11-LABEL: global_store_saddr_p1_zext_vgpr_offset_neg128: 601; GFX11: ; %bb.0: 602; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128 603; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 604; GFX11-NEXT: s_endpgm 605 %zext.offset = zext i32 %voffset to i64 606 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 607 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 608 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to i8 addrspace(1)* addrspace(1)* 609 store i8 addrspace(1)* %data, i8 addrspace(1)* addrspace(1)* %gep1.cast 610 ret void 611} 612 613define amdgpu_ps void @global_store_saddr_v3i32_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <3 x i32> %data) { 614; GCN-LABEL: global_store_saddr_v3i32_zext_vgpr: 615; GCN: ; %bb.0: 616; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3] 617; GCN-NEXT: s_endpgm 618; 619; GFX11-LABEL: global_store_saddr_v3i32_zext_vgpr: 620; GFX11: ; %bb.0: 621; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3] 622; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 623; GFX11-NEXT: s_endpgm 624 %zext.offset = zext i32 %voffset to i64 625 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 626 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <3 x i32> addrspace(1)* 627 store <3 x i32> %data, <3 x i32> addrspace(1)* %gep0.cast 628 ret void 629} 630 631define amdgpu_ps void @global_store_saddr_v3i32_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <3 x i32> %data) { 632; GCN-LABEL: global_store_saddr_v3i32_zext_vgpr_offset_neg128: 633; GCN: ; %bb.0: 634; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3] offset:-128 635; GCN-NEXT: s_endpgm 636; 637; GFX11-LABEL: global_store_saddr_v3i32_zext_vgpr_offset_neg128: 638; GFX11: ; %bb.0: 639; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3] offset:-128 640; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 641; GFX11-NEXT: s_endpgm 642 %zext.offset = zext i32 %voffset to i64 643 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 644 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 645 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <3 x i32> addrspace(1)* 646 store <3 x i32> %data, <3 x i32> addrspace(1)* %gep1.cast 647 ret void 648} 649 650define amdgpu_ps void @global_store_saddr_v3f32_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <3 x float> %data) { 651; GCN-LABEL: global_store_saddr_v3f32_zext_vgpr: 652; GCN: ; %bb.0: 653; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3] 654; GCN-NEXT: s_endpgm 655; 656; GFX11-LABEL: global_store_saddr_v3f32_zext_vgpr: 657; GFX11: ; %bb.0: 658; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3] 659; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 660; GFX11-NEXT: s_endpgm 661 %zext.offset = zext i32 %voffset to i64 662 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 663 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <3 x float> addrspace(1)* 664 store <3 x float> %data, <3 x float> addrspace(1)* %gep0.cast 665 ret void 666} 667 668define amdgpu_ps void @global_store_saddr_v3f32_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <3 x float> %data) { 669; GCN-LABEL: global_store_saddr_v3f32_zext_vgpr_offset_neg128: 670; GCN: ; %bb.0: 671; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3] offset:-128 672; GCN-NEXT: s_endpgm 673; 674; GFX11-LABEL: global_store_saddr_v3f32_zext_vgpr_offset_neg128: 675; GFX11: ; %bb.0: 676; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3] offset:-128 677; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 678; GFX11-NEXT: s_endpgm 679 %zext.offset = zext i32 %voffset to i64 680 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 681 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 682 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <3 x float> addrspace(1)* 683 store <3 x float> %data, <3 x float> addrspace(1)* %gep1.cast 684 ret void 685} 686 687define amdgpu_ps void @global_store_saddr_v6i16_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <6 x i16> %data) { 688; GCN-LABEL: global_store_saddr_v6i16_zext_vgpr: 689; GCN: ; %bb.0: 690; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3] 691; GCN-NEXT: s_endpgm 692; 693; GFX11-LABEL: global_store_saddr_v6i16_zext_vgpr: 694; GFX11: ; %bb.0: 695; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3] 696; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 697; GFX11-NEXT: s_endpgm 698 %zext.offset = zext i32 %voffset to i64 699 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 700 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <6 x i16> addrspace(1)* 701 store <6 x i16> %data, <6 x i16> addrspace(1)* %gep0.cast 702 ret void 703} 704 705define amdgpu_ps void @global_store_saddr_v6i16_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <6 x i16> %data) { 706; GCN-LABEL: global_store_saddr_v6i16_zext_vgpr_offset_neg128: 707; GCN: ; %bb.0: 708; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3] offset:-128 709; GCN-NEXT: s_endpgm 710; 711; GFX11-LABEL: global_store_saddr_v6i16_zext_vgpr_offset_neg128: 712; GFX11: ; %bb.0: 713; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3] offset:-128 714; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 715; GFX11-NEXT: s_endpgm 716 %zext.offset = zext i32 %voffset to i64 717 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 718 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 719 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <6 x i16> addrspace(1)* 720 store <6 x i16> %data, <6 x i16> addrspace(1)* %gep1.cast 721 ret void 722} 723 724define amdgpu_ps void @global_store_saddr_v6f16_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <6 x half> %data) { 725; GCN-LABEL: global_store_saddr_v6f16_zext_vgpr: 726; GCN: ; %bb.0: 727; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3] 728; GCN-NEXT: s_endpgm 729; 730; GFX11-LABEL: global_store_saddr_v6f16_zext_vgpr: 731; GFX11: ; %bb.0: 732; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3] 733; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 734; GFX11-NEXT: s_endpgm 735 %zext.offset = zext i32 %voffset to i64 736 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 737 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <6 x half> addrspace(1)* 738 store <6 x half> %data, <6 x half> addrspace(1)* %gep0.cast 739 ret void 740} 741 742define amdgpu_ps void @global_store_saddr_v6f16_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <6 x half> %data) { 743; GCN-LABEL: global_store_saddr_v6f16_zext_vgpr_offset_neg128: 744; GCN: ; %bb.0: 745; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3] offset:-128 746; GCN-NEXT: s_endpgm 747; 748; GFX11-LABEL: global_store_saddr_v6f16_zext_vgpr_offset_neg128: 749; GFX11: ; %bb.0: 750; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3] offset:-128 751; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 752; GFX11-NEXT: s_endpgm 753 %zext.offset = zext i32 %voffset to i64 754 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 755 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 756 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <6 x half> addrspace(1)* 757 store <6 x half> %data, <6 x half> addrspace(1)* %gep1.cast 758 ret void 759} 760 761define amdgpu_ps void @global_store_saddr_v4i32_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x i32> %data) { 762; GCN-LABEL: global_store_saddr_v4i32_zext_vgpr: 763; GCN: ; %bb.0: 764; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] 765; GCN-NEXT: s_endpgm 766; 767; GFX11-LABEL: global_store_saddr_v4i32_zext_vgpr: 768; GFX11: ; %bb.0: 769; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] 770; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 771; GFX11-NEXT: s_endpgm 772 %zext.offset = zext i32 %voffset to i64 773 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 774 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <4 x i32> addrspace(1)* 775 store <4 x i32> %data, <4 x i32> addrspace(1)* %gep0.cast 776 ret void 777} 778 779define amdgpu_ps void @global_store_saddr_v4i32_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x i32> %data) { 780; GCN-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128: 781; GCN: ; %bb.0: 782; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128 783; GCN-NEXT: s_endpgm 784; 785; GFX11-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128: 786; GFX11: ; %bb.0: 787; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128 788; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 789; GFX11-NEXT: s_endpgm 790 %zext.offset = zext i32 %voffset to i64 791 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 792 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 793 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <4 x i32> addrspace(1)* 794 store <4 x i32> %data, <4 x i32> addrspace(1)* %gep1.cast 795 ret void 796} 797 798define amdgpu_ps void @global_store_saddr_v4f32_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x float> %data) { 799; GCN-LABEL: global_store_saddr_v4f32_zext_vgpr: 800; GCN: ; %bb.0: 801; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] 802; GCN-NEXT: s_endpgm 803; 804; GFX11-LABEL: global_store_saddr_v4f32_zext_vgpr: 805; GFX11: ; %bb.0: 806; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] 807; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 808; GFX11-NEXT: s_endpgm 809 %zext.offset = zext i32 %voffset to i64 810 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 811 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <4 x float> addrspace(1)* 812 store <4 x float> %data, <4 x float> addrspace(1)* %gep0.cast 813 ret void 814} 815 816define amdgpu_ps void @global_store_saddr_v4f32_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x float> %data) { 817; GCN-LABEL: global_store_saddr_v4f32_zext_vgpr_offset_neg128: 818; GCN: ; %bb.0: 819; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128 820; GCN-NEXT: s_endpgm 821; 822; GFX11-LABEL: global_store_saddr_v4f32_zext_vgpr_offset_neg128: 823; GFX11: ; %bb.0: 824; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128 825; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 826; GFX11-NEXT: s_endpgm 827 %zext.offset = zext i32 %voffset to i64 828 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 829 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 830 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <4 x float> addrspace(1)* 831 store <4 x float> %data, <4 x float> addrspace(1)* %gep1.cast 832 ret void 833} 834 835define amdgpu_ps void @global_store_saddr_v2i64_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i64> %data) { 836; GCN-LABEL: global_store_saddr_v2i64_zext_vgpr: 837; GCN: ; %bb.0: 838; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] 839; GCN-NEXT: s_endpgm 840; 841; GFX11-LABEL: global_store_saddr_v2i64_zext_vgpr: 842; GFX11: ; %bb.0: 843; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] 844; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 845; GFX11-NEXT: s_endpgm 846 %zext.offset = zext i32 %voffset to i64 847 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 848 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <2 x i64> addrspace(1)* 849 store <2 x i64> %data, <2 x i64> addrspace(1)* %gep0.cast 850 ret void 851} 852 853define amdgpu_ps void @global_store_saddr_v2i64_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i64> %data) { 854; GCN-LABEL: global_store_saddr_v2i64_zext_vgpr_offset_neg128: 855; GCN: ; %bb.0: 856; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128 857; GCN-NEXT: s_endpgm 858; 859; GFX11-LABEL: global_store_saddr_v2i64_zext_vgpr_offset_neg128: 860; GFX11: ; %bb.0: 861; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128 862; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 863; GFX11-NEXT: s_endpgm 864 %zext.offset = zext i32 %voffset to i64 865 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 866 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 867 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <2 x i64> addrspace(1)* 868 store <2 x i64> %data, <2 x i64> addrspace(1)* %gep1.cast 869 ret void 870} 871 872define amdgpu_ps void @global_store_saddr_v2f64_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x double> %data) { 873; GCN-LABEL: global_store_saddr_v2f64_zext_vgpr: 874; GCN: ; %bb.0: 875; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] 876; GCN-NEXT: s_endpgm 877; 878; GFX11-LABEL: global_store_saddr_v2f64_zext_vgpr: 879; GFX11: ; %bb.0: 880; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] 881; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 882; GFX11-NEXT: s_endpgm 883 %zext.offset = zext i32 %voffset to i64 884 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 885 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <2 x double> addrspace(1)* 886 store <2 x double> %data, <2 x double> addrspace(1)* %gep0.cast 887 ret void 888} 889 890define amdgpu_ps void @global_store_saddr_v2f64_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x double> %data) { 891; GCN-LABEL: global_store_saddr_v2f64_zext_vgpr_offset_neg128: 892; GCN: ; %bb.0: 893; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128 894; GCN-NEXT: s_endpgm 895; 896; GFX11-LABEL: global_store_saddr_v2f64_zext_vgpr_offset_neg128: 897; GFX11: ; %bb.0: 898; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128 899; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 900; GFX11-NEXT: s_endpgm 901 %zext.offset = zext i32 %voffset to i64 902 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 903 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 904 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <2 x double> addrspace(1)* 905 store <2 x double> %data, <2 x double> addrspace(1)* %gep1.cast 906 ret void 907} 908 909define amdgpu_ps void @global_store_saddr_v8i16_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <8 x i16> %data) { 910; GCN-LABEL: global_store_saddr_v8i16_zext_vgpr: 911; GCN: ; %bb.0: 912; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] 913; GCN-NEXT: s_endpgm 914; 915; GFX11-LABEL: global_store_saddr_v8i16_zext_vgpr: 916; GFX11: ; %bb.0: 917; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] 918; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 919; GFX11-NEXT: s_endpgm 920 %zext.offset = zext i32 %voffset to i64 921 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 922 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <8 x i16> addrspace(1)* 923 store <8 x i16> %data, <8 x i16> addrspace(1)* %gep0.cast 924 ret void 925} 926 927define amdgpu_ps void @global_store_saddr_v8i16_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <8 x i16> %data) { 928; GCN-LABEL: global_store_saddr_v8i16_zext_vgpr_offset_neg128: 929; GCN: ; %bb.0: 930; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128 931; GCN-NEXT: s_endpgm 932; 933; GFX11-LABEL: global_store_saddr_v8i16_zext_vgpr_offset_neg128: 934; GFX11: ; %bb.0: 935; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128 936; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 937; GFX11-NEXT: s_endpgm 938 %zext.offset = zext i32 %voffset to i64 939 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 940 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 941 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <8 x i16> addrspace(1)* 942 store <8 x i16> %data, <8 x i16> addrspace(1)* %gep1.cast 943 ret void 944} 945 946define amdgpu_ps void @global_store_saddr_v8f16_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <8 x half> %data) { 947; GCN-LABEL: global_store_saddr_v8f16_zext_vgpr: 948; GCN: ; %bb.0: 949; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] 950; GCN-NEXT: s_endpgm 951; 952; GFX11-LABEL: global_store_saddr_v8f16_zext_vgpr: 953; GFX11: ; %bb.0: 954; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] 955; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 956; GFX11-NEXT: s_endpgm 957 %zext.offset = zext i32 %voffset to i64 958 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 959 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <8 x half> addrspace(1)* 960 store <8 x half> %data, <8 x half> addrspace(1)* %gep0.cast 961 ret void 962} 963 964define amdgpu_ps void @global_store_saddr_v8f16_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <8 x half> %data) { 965; GCN-LABEL: global_store_saddr_v8f16_zext_vgpr_offset_neg128: 966; GCN: ; %bb.0: 967; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128 968; GCN-NEXT: s_endpgm 969; 970; GFX11-LABEL: global_store_saddr_v8f16_zext_vgpr_offset_neg128: 971; GFX11: ; %bb.0: 972; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128 973; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 974; GFX11-NEXT: s_endpgm 975 %zext.offset = zext i32 %voffset to i64 976 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 977 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 978 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <8 x half> addrspace(1)* 979 store <8 x half> %data, <8 x half> addrspace(1)* %gep1.cast 980 ret void 981} 982 983define amdgpu_ps void @global_store_saddr_v2p1_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i8 addrspace(1)*> %data) { 984; GCN-LABEL: global_store_saddr_v2p1_zext_vgpr: 985; GCN: ; %bb.0: 986; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] 987; GCN-NEXT: s_endpgm 988; 989; GFX11-LABEL: global_store_saddr_v2p1_zext_vgpr: 990; GFX11: ; %bb.0: 991; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] 992; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 993; GFX11-NEXT: s_endpgm 994 %zext.offset = zext i32 %voffset to i64 995 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 996 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <2 x i8 addrspace(1)*> addrspace(1)* 997 store <2 x i8 addrspace(1)*> %data, <2 x i8 addrspace(1)*> addrspace(1)* %gep0.cast 998 ret void 999} 1000 1001define amdgpu_ps void @global_store_saddr_v2p1_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i8 addrspace(1)*> %data) { 1002; GCN-LABEL: global_store_saddr_v2p1_zext_vgpr_offset_neg128: 1003; GCN: ; %bb.0: 1004; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128 1005; GCN-NEXT: s_endpgm 1006; 1007; GFX11-LABEL: global_store_saddr_v2p1_zext_vgpr_offset_neg128: 1008; GFX11: ; %bb.0: 1009; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128 1010; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1011; GFX11-NEXT: s_endpgm 1012 %zext.offset = zext i32 %voffset to i64 1013 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 1014 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 1015 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <2 x i8 addrspace(1)*> addrspace(1)* 1016 store <2 x i8 addrspace(1)*> %data, <2 x i8 addrspace(1)*> addrspace(1)* %gep1.cast 1017 ret void 1018} 1019 1020define amdgpu_ps void @global_store_saddr_v4p3_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x i8 addrspace(3)*> %data) { 1021; GCN-LABEL: global_store_saddr_v4p3_zext_vgpr: 1022; GCN: ; %bb.0: 1023; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] 1024; GCN-NEXT: s_endpgm 1025; 1026; GFX11-LABEL: global_store_saddr_v4p3_zext_vgpr: 1027; GFX11: ; %bb.0: 1028; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] 1029; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1030; GFX11-NEXT: s_endpgm 1031 %zext.offset = zext i32 %voffset to i64 1032 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 1033 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <4 x i8 addrspace(3)*> addrspace(1)* 1034 store <4 x i8 addrspace(3)*> %data, <4 x i8 addrspace(3)*> addrspace(1)* %gep0.cast 1035 ret void 1036} 1037 1038define amdgpu_ps void @global_store_saddr_v4p3_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x i8 addrspace(3)*> %data) { 1039; GCN-LABEL: global_store_saddr_v4p3_zext_vgpr_offset_neg128: 1040; GCN: ; %bb.0: 1041; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128 1042; GCN-NEXT: s_endpgm 1043; 1044; GFX11-LABEL: global_store_saddr_v4p3_zext_vgpr_offset_neg128: 1045; GFX11: ; %bb.0: 1046; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128 1047; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1048; GFX11-NEXT: s_endpgm 1049 %zext.offset = zext i32 %voffset to i64 1050 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 1051 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 1052 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <4 x i8 addrspace(3)*> addrspace(1)* 1053 store <4 x i8 addrspace(3)*> %data, <4 x i8 addrspace(3)*> addrspace(1)* %gep1.cast 1054 ret void 1055} 1056 1057; -------------------------------------------------------------------------------- 1058; Atomic store 1059; -------------------------------------------------------------------------------- 1060 1061define amdgpu_ps void @atomic_global_store_saddr_i32_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, i32 %data) { 1062; GFX9-LABEL: atomic_global_store_saddr_i32_zext_vgpr: 1063; GFX9: ; %bb.0: 1064; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1065; GFX9-NEXT: global_store_dword v0, v1, s[2:3] 1066; GFX9-NEXT: s_endpgm 1067; 1068; GFX10-LABEL: atomic_global_store_saddr_i32_zext_vgpr: 1069; GFX10: ; %bb.0: 1070; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1071; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1072; GFX10-NEXT: global_store_dword v0, v1, s[2:3] 1073; GFX10-NEXT: s_endpgm 1074; 1075; GFX11-LABEL: atomic_global_store_saddr_i32_zext_vgpr: 1076; GFX11: ; %bb.0: 1077; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1078; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1079; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] 1080; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1081; GFX11-NEXT: s_endpgm 1082 %zext.offset = zext i32 %voffset to i64 1083 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 1084 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to i32 addrspace(1)* 1085 store atomic i32 %data, i32 addrspace(1)* %gep0.cast seq_cst, align 4 1086 ret void 1087} 1088 1089define amdgpu_ps void @atomic_global_store_saddr_i32_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, i32 %data) { 1090; GFX9-LABEL: atomic_global_store_saddr_i32_zext_vgpr_offset_neg128: 1091; GFX9: ; %bb.0: 1092; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1093; GFX9-NEXT: global_store_dword v0, v1, s[2:3] offset:-128 1094; GFX9-NEXT: s_endpgm 1095; 1096; GFX10-LABEL: atomic_global_store_saddr_i32_zext_vgpr_offset_neg128: 1097; GFX10: ; %bb.0: 1098; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1099; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1100; GFX10-NEXT: global_store_dword v0, v1, s[2:3] offset:-128 1101; GFX10-NEXT: s_endpgm 1102; 1103; GFX11-LABEL: atomic_global_store_saddr_i32_zext_vgpr_offset_neg128: 1104; GFX11: ; %bb.0: 1105; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1106; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1107; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128 1108; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1109; GFX11-NEXT: s_endpgm 1110 %zext.offset = zext i32 %voffset to i64 1111 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 1112 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 1113 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to i32 addrspace(1)* 1114 store atomic i32 %data, i32 addrspace(1)* %gep1.cast seq_cst, align 4 1115 ret void 1116} 1117 1118define amdgpu_ps void @atomic_global_store_saddr_i64_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, i64 %data) { 1119; GFX9-LABEL: atomic_global_store_saddr_i64_zext_vgpr: 1120; GFX9: ; %bb.0: 1121; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1122; GFX9-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] 1123; GFX9-NEXT: s_endpgm 1124; 1125; GFX10-LABEL: atomic_global_store_saddr_i64_zext_vgpr: 1126; GFX10: ; %bb.0: 1127; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1128; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1129; GFX10-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] 1130; GFX10-NEXT: s_endpgm 1131; 1132; GFX11-LABEL: atomic_global_store_saddr_i64_zext_vgpr: 1133; GFX11: ; %bb.0: 1134; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1135; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1136; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] 1137; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1138; GFX11-NEXT: s_endpgm 1139 %zext.offset = zext i32 %voffset to i64 1140 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 1141 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to i64 addrspace(1)* 1142 store atomic i64 %data, i64 addrspace(1)* %gep0.cast seq_cst, align 8 1143 ret void 1144} 1145 1146define amdgpu_ps void @atomic_global_store_saddr_i64_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, i64 %data) { 1147; GFX9-LABEL: atomic_global_store_saddr_i64_zext_vgpr_offset_neg128: 1148; GFX9: ; %bb.0: 1149; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1150; GFX9-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128 1151; GFX9-NEXT: s_endpgm 1152; 1153; GFX10-LABEL: atomic_global_store_saddr_i64_zext_vgpr_offset_neg128: 1154; GFX10: ; %bb.0: 1155; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1156; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1157; GFX10-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128 1158; GFX10-NEXT: s_endpgm 1159; 1160; GFX11-LABEL: atomic_global_store_saddr_i64_zext_vgpr_offset_neg128: 1161; GFX11: ; %bb.0: 1162; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1163; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1164; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128 1165; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1166; GFX11-NEXT: s_endpgm 1167 %zext.offset = zext i32 %voffset to i64 1168 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 1169 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 1170 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to i64 addrspace(1)* 1171 store atomic i64 %data, i64 addrspace(1)* %gep1.cast seq_cst, align 8 1172 ret void 1173} 1174 1175; -------------------------------------------------------------------------------- 1176; D16 HI store (hi 16) 1177; -------------------------------------------------------------------------------- 1178 1179define amdgpu_ps void @global_store_saddr_i16_d16hi_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i16> %data) { 1180; GCN-LABEL: global_store_saddr_i16_d16hi_zext_vgpr: 1181; GCN: ; %bb.0: 1182; GCN-NEXT: global_store_short_d16_hi v0, v1, s[2:3] 1183; GCN-NEXT: s_endpgm 1184; 1185; GFX11-LABEL: global_store_saddr_i16_d16hi_zext_vgpr: 1186; GFX11: ; %bb.0: 1187; GFX11-NEXT: global_store_d16_hi_b16 v0, v1, s[2:3] 1188; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1189; GFX11-NEXT: s_endpgm 1190 %zext.offset = zext i32 %voffset to i64 1191 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 1192 %gep0.cast = bitcast i8 addrspace(1)* %gep0 to i16 addrspace(1)* 1193 %data.hi = extractelement <2 x i16> %data, i32 1 1194 store i16 %data.hi, i16 addrspace(1)* %gep0.cast 1195 ret void 1196} 1197 1198define amdgpu_ps void @global_store_saddr_i16_d16hi_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i16> %data) { 1199; GCN-LABEL: global_store_saddr_i16_d16hi_zext_vgpr_offset_neg128: 1200; GCN: ; %bb.0: 1201; GCN-NEXT: global_store_short_d16_hi v0, v1, s[2:3] offset:-128 1202; GCN-NEXT: s_endpgm 1203; 1204; GFX11-LABEL: global_store_saddr_i16_d16hi_zext_vgpr_offset_neg128: 1205; GFX11: ; %bb.0: 1206; GFX11-NEXT: global_store_d16_hi_b16 v0, v1, s[2:3] offset:-128 1207; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1208; GFX11-NEXT: s_endpgm 1209 %zext.offset = zext i32 %voffset to i64 1210 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 1211 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 1212 %gep1.cast = bitcast i8 addrspace(1)* %gep1 to i16 addrspace(1)* 1213 %data.hi = extractelement <2 x i16> %data, i32 1 1214 store i16 %data.hi, i16 addrspace(1)* %gep1.cast 1215 ret void 1216} 1217 1218define amdgpu_ps void @global_store_saddr_i16_d16hi_trunci8_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i16> %data) { 1219; GCN-LABEL: global_store_saddr_i16_d16hi_trunci8_zext_vgpr: 1220; GCN: ; %bb.0: 1221; GCN-NEXT: global_store_byte_d16_hi v0, v1, s[2:3] 1222; GCN-NEXT: s_endpgm 1223; 1224; GFX11-LABEL: global_store_saddr_i16_d16hi_trunci8_zext_vgpr: 1225; GFX11: ; %bb.0: 1226; GFX11-NEXT: global_store_d16_hi_b8 v0, v1, s[2:3] 1227; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1228; GFX11-NEXT: s_endpgm 1229 %zext.offset = zext i32 %voffset to i64 1230 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 1231 %data.hi = extractelement <2 x i16> %data, i32 1 1232 %data.hi.trunc = trunc i16 %data.hi to i8 1233 store i8 %data.hi.trunc, i8 addrspace(1)* %gep0 1234 ret void 1235} 1236 1237define amdgpu_ps void @global_store_saddr_i16_d16hi_trunci8_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i16> %data) { 1238; GCN-LABEL: global_store_saddr_i16_d16hi_trunci8_zext_vgpr_offset_neg128: 1239; GCN: ; %bb.0: 1240; GCN-NEXT: global_store_byte_d16_hi v0, v1, s[2:3] offset:-128 1241; GCN-NEXT: s_endpgm 1242; 1243; GFX11-LABEL: global_store_saddr_i16_d16hi_trunci8_zext_vgpr_offset_neg128: 1244; GFX11: ; %bb.0: 1245; GFX11-NEXT: global_store_d16_hi_b8 v0, v1, s[2:3] offset:-128 1246; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1247; GFX11-NEXT: s_endpgm 1248 %zext.offset = zext i32 %voffset to i64 1249 %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset 1250 %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 1251 %data.hi = extractelement <2 x i16> %data, i32 1 1252 %data.hi.trunc = trunc i16 %data.hi to i8 1253 store i8 %data.hi.trunc, i8 addrspace(1)* %gep1 1254 ret void 1255} 1256