1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-SDAG 3; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-GISEL 4; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11-SDAG 5; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11-GISEL 6 7; Test flat scratch SVS addressing mode with various combinations of alignment 8; of soffset, voffset and inst_offset. 9 10declare i32 @llvm.amdgcn.workitem.id.x() 11 12define amdgpu_kernel void @soff1_voff1(i32 %soff) { 13; GFX940-SDAG-LABEL: soff1_voff1: 14; GFX940-SDAG: ; %bb.0: ; %bb 15; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 16; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 17; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 18; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 19; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 20; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 21; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 22; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 23; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 24; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 25; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 26; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 27; GFX940-SDAG-NEXT: s_endpgm 28; 29; GFX940-GISEL-LABEL: soff1_voff1: 30; GFX940-GISEL: ; %bb.0: ; %bb 31; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 32; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 33; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 34; GFX940-GISEL-NEXT: v_mov_b32_e32 v3, 2 35; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 36; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 37; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 38; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 39; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 40; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 41; GFX940-GISEL-NEXT: scratch_store_byte v0, v3, off offset:2 sc0 sc1 42; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 43; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 44; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 45; GFX940-GISEL-NEXT: s_endpgm 46; 47; GFX11-SDAG-LABEL: soff1_voff1: 48; GFX11-SDAG: ; %bb.0: ; %bb 49; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 50; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 51; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 2 52; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 4 53; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 54; GFX11-SDAG-NEXT: v_add3_u32 v0, 4, s0, v0 55; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc 56; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 57; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc 58; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 59; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 60; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 61; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 62; GFX11-SDAG-NEXT: s_endpgm 63; 64; GFX11-GISEL-LABEL: soff1_voff1: 65; GFX11-GISEL: ; %bb.0: ; %bb 66; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 67; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 1 68; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 69; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 70; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 71; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 72; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v1, v0 73; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 2 74; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 75; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 76; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 77; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 78; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 79; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 80; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 81; GFX11-GISEL-NEXT: s_endpgm 82bb: 83 %soff1 = mul i32 %soff, 1 84 %a = alloca i8, i32 64, align 4, addrspace(5) 85 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1 86 %voff = call i32 @llvm.amdgcn.workitem.id.x() 87 %voff1 = mul i32 %voff, 1 88 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1 89 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 90 store volatile i8 1, i8 addrspace(5)* %p1 91 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 92 store volatile i8 2, i8 addrspace(5)* %p2 93 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 94 store volatile i8 4, i8 addrspace(5)* %p4 95 ret void 96} 97 98define amdgpu_kernel void @soff1_voff2(i32 %soff) { 99; GFX940-SDAG-LABEL: soff1_voff2: 100; GFX940-SDAG: ; %bb.0: ; %bb 101; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 102; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 103; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 104; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 105; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 106; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 107; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 108; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 109; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 110; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 111; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 112; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 113; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 114; GFX940-SDAG-NEXT: s_endpgm 115; 116; GFX940-GISEL-LABEL: soff1_voff2: 117; GFX940-GISEL: ; %bb.0: ; %bb 118; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 119; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 120; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 121; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 122; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 123; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 124; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 125; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 126; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 127; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 128; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 129; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 130; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 131; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 132; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 133; GFX940-GISEL-NEXT: s_endpgm 134; 135; GFX11-SDAG-LABEL: soff1_voff2: 136; GFX11-SDAG: ; %bb.0: ; %bb 137; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 138; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 139; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 140; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 2 141; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 4 142; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 143; GFX11-SDAG-NEXT: v_add3_u32 v0, 4, s0, v0 144; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc 145; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 146; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc 147; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 148; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 149; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 150; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 151; GFX11-SDAG-NEXT: s_endpgm 152; 153; GFX11-GISEL-LABEL: soff1_voff2: 154; GFX11-GISEL: ; %bb.0: ; %bb 155; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 156; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 157; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 1 158; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 159; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 160; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 161; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 162; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v1, v0 163; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 2 164; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 165; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 166; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 167; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 168; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 169; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 170; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 171; GFX11-GISEL-NEXT: s_endpgm 172bb: 173 %soff1 = mul i32 %soff, 1 174 %a = alloca i8, i32 64, align 4, addrspace(5) 175 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1 176 %voff = call i32 @llvm.amdgcn.workitem.id.x() 177 %voff2 = mul i32 %voff, 2 178 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2 179 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 180 store volatile i8 1, i8 addrspace(5)* %p1 181 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 182 store volatile i8 2, i8 addrspace(5)* %p2 183 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 184 store volatile i8 4, i8 addrspace(5)* %p4 185 ret void 186} 187 188define amdgpu_kernel void @soff1_voff4(i32 %soff) { 189; GFX940-SDAG-LABEL: soff1_voff4: 190; GFX940-SDAG: ; %bb.0: ; %bb 191; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 192; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 193; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 194; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 195; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 196; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 197; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 198; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 199; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 200; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 201; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 202; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 203; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 204; GFX940-SDAG-NEXT: s_endpgm 205; 206; GFX940-GISEL-LABEL: soff1_voff4: 207; GFX940-GISEL: ; %bb.0: ; %bb 208; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 209; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 210; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 211; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 212; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 213; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 214; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 215; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 216; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 217; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 218; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 219; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 220; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 221; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 222; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 223; GFX940-GISEL-NEXT: s_endpgm 224; 225; GFX11-SDAG-LABEL: soff1_voff4: 226; GFX11-SDAG: ; %bb.0: ; %bb 227; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 228; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 229; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 230; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 2 231; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 4 232; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 233; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 4 234; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 dlc 235; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 236; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 dlc 237; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 238; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, s0 offset:4 dlc 239; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 240; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 241; GFX11-SDAG-NEXT: s_endpgm 242; 243; GFX11-GISEL-LABEL: soff1_voff4: 244; GFX11-GISEL: ; %bb.0: ; %bb 245; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 246; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 247; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 1 248; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 249; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 250; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 251; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 252; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v1, v0 253; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 2 254; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 255; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 256; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 257; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 258; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 259; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 260; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 261; GFX11-GISEL-NEXT: s_endpgm 262bb: 263 %soff1 = mul i32 %soff, 1 264 %a = alloca i8, i32 64, align 4, addrspace(5) 265 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1 266 %voff = call i32 @llvm.amdgcn.workitem.id.x() 267 %voff4 = mul i32 %voff, 4 268 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4 269 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 270 store volatile i8 1, i8 addrspace(5)* %p1 271 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 272 store volatile i8 2, i8 addrspace(5)* %p2 273 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 274 store volatile i8 4, i8 addrspace(5)* %p4 275 ret void 276} 277 278define amdgpu_kernel void @soff2_voff1(i32 %soff) { 279; GFX940-SDAG-LABEL: soff2_voff1: 280; GFX940-SDAG: ; %bb.0: ; %bb 281; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 282; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 283; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 284; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 285; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 1 286; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 287; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 288; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 289; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 290; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 291; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 292; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 293; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 294; GFX940-SDAG-NEXT: s_endpgm 295; 296; GFX940-GISEL-LABEL: soff2_voff1: 297; GFX940-GISEL: ; %bb.0: ; %bb 298; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 299; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 300; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 301; GFX940-GISEL-NEXT: v_mov_b32_e32 v3, 2 302; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 303; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1 304; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 305; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 306; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 307; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 308; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 309; GFX940-GISEL-NEXT: scratch_store_byte v0, v3, off offset:2 sc0 sc1 310; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 311; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 312; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 313; GFX940-GISEL-NEXT: s_endpgm 314; 315; GFX11-SDAG-LABEL: soff2_voff1: 316; GFX11-SDAG: ; %bb.0: ; %bb 317; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 318; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 319; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 2 320; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 4 321; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 322; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 1 323; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 324; GFX11-SDAG-NEXT: v_add3_u32 v0, 4, s0, v0 325; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc 326; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 327; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc 328; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 329; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 330; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 331; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 332; GFX11-SDAG-NEXT: s_endpgm 333; 334; GFX11-GISEL-LABEL: soff2_voff1: 335; GFX11-GISEL: ; %bb.0: ; %bb 336; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 337; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 1 338; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 339; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 340; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1 341; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 342; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 343; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v1, v0 344; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 2 345; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 346; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 347; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 348; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 349; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 350; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 351; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 352; GFX11-GISEL-NEXT: s_endpgm 353bb: 354 %soff2 = mul i32 %soff, 2 355 %a = alloca i8, i32 64, align 4, addrspace(5) 356 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2 357 %voff = call i32 @llvm.amdgcn.workitem.id.x() 358 %voff1 = mul i32 %voff, 1 359 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1 360 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 361 store volatile i8 1, i8 addrspace(5)* %p1 362 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 363 store volatile i8 2, i8 addrspace(5)* %p2 364 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 365 store volatile i8 4, i8 addrspace(5)* %p4 366 ret void 367} 368 369define amdgpu_kernel void @soff2_voff2(i32 %soff) { 370; GFX940-SDAG-LABEL: soff2_voff2: 371; GFX940-SDAG: ; %bb.0: ; %bb 372; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 373; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 374; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 375; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 376; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 377; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 1 378; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 379; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 380; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 381; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 382; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 383; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 384; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 385; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 386; GFX940-SDAG-NEXT: s_endpgm 387; 388; GFX940-GISEL-LABEL: soff2_voff2: 389; GFX940-GISEL: ; %bb.0: ; %bb 390; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 391; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 392; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 393; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 394; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 395; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1 396; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 397; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 398; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 399; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 400; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 401; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 402; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 403; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 404; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 405; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 406; GFX940-GISEL-NEXT: s_endpgm 407; 408; GFX11-SDAG-LABEL: soff2_voff2: 409; GFX11-SDAG: ; %bb.0: ; %bb 410; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 411; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 412; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 413; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 2 414; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 4 415; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 416; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 1 417; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 418; GFX11-SDAG-NEXT: v_add3_u32 v0, 4, s0, v0 419; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc 420; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 421; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc 422; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 423; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 424; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 425; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 426; GFX11-SDAG-NEXT: s_endpgm 427; 428; GFX11-GISEL-LABEL: soff2_voff2: 429; GFX11-GISEL: ; %bb.0: ; %bb 430; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 431; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 432; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 1 433; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 434; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 435; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1 436; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 437; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 438; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v1, v0 439; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 2 440; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 441; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 442; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 443; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 444; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 445; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 446; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 447; GFX11-GISEL-NEXT: s_endpgm 448bb: 449 %soff2 = mul i32 %soff, 2 450 %a = alloca i8, i32 64, align 4, addrspace(5) 451 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2 452 %voff = call i32 @llvm.amdgcn.workitem.id.x() 453 %voff2 = mul i32 %voff, 2 454 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2 455 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 456 store volatile i8 1, i8 addrspace(5)* %p1 457 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 458 store volatile i8 2, i8 addrspace(5)* %p2 459 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 460 store volatile i8 4, i8 addrspace(5)* %p4 461 ret void 462} 463 464define amdgpu_kernel void @soff2_voff4(i32 %soff) { 465; GFX940-SDAG-LABEL: soff2_voff4: 466; GFX940-SDAG: ; %bb.0: ; %bb 467; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 468; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 469; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 470; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 471; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 472; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 1 473; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 474; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 475; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 476; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 477; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 478; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 479; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 480; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 481; GFX940-SDAG-NEXT: s_endpgm 482; 483; GFX940-GISEL-LABEL: soff2_voff4: 484; GFX940-GISEL: ; %bb.0: ; %bb 485; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 486; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 487; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 488; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 489; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 490; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1 491; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 492; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 493; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 494; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 495; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 496; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 497; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 498; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 499; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 500; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 501; GFX940-GISEL-NEXT: s_endpgm 502; 503; GFX11-SDAG-LABEL: soff2_voff4: 504; GFX11-SDAG: ; %bb.0: ; %bb 505; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 506; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 507; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 508; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 2 509; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 4 510; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 511; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 1 512; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 513; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 4 514; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 dlc 515; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 516; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 dlc 517; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 518; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, s0 offset:4 dlc 519; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 520; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 521; GFX11-SDAG-NEXT: s_endpgm 522; 523; GFX11-GISEL-LABEL: soff2_voff4: 524; GFX11-GISEL: ; %bb.0: ; %bb 525; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 526; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 527; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 1 528; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 529; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 530; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1 531; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 532; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 533; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v1, v0 534; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 2 535; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 536; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 537; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 538; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 539; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 540; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 541; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 542; GFX11-GISEL-NEXT: s_endpgm 543bb: 544 %soff2 = mul i32 %soff, 2 545 %a = alloca i8, i32 64, align 4, addrspace(5) 546 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2 547 %voff = call i32 @llvm.amdgcn.workitem.id.x() 548 %voff4 = mul i32 %voff, 4 549 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4 550 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 551 store volatile i8 1, i8 addrspace(5)* %p1 552 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 553 store volatile i8 2, i8 addrspace(5)* %p2 554 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 555 store volatile i8 4, i8 addrspace(5)* %p4 556 ret void 557} 558 559define amdgpu_kernel void @soff4_voff1(i32 %soff) { 560; GFX940-SDAG-LABEL: soff4_voff1: 561; GFX940-SDAG: ; %bb.0: ; %bb 562; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 563; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 564; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 565; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 566; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 2 567; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 568; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 569; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 570; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 571; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 572; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 573; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 574; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 575; GFX940-SDAG-NEXT: s_endpgm 576; 577; GFX940-GISEL-LABEL: soff4_voff1: 578; GFX940-GISEL: ; %bb.0: ; %bb 579; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 580; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 581; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 582; GFX940-GISEL-NEXT: v_mov_b32_e32 v3, 2 583; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 584; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2 585; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 586; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 587; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 588; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 589; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 590; GFX940-GISEL-NEXT: scratch_store_byte v0, v3, off offset:2 sc0 sc1 591; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 592; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 593; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 594; GFX940-GISEL-NEXT: s_endpgm 595; 596; GFX11-SDAG-LABEL: soff4_voff1: 597; GFX11-SDAG: ; %bb.0: ; %bb 598; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 599; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 600; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 2 601; GFX11-SDAG-NEXT: v_mov_b32_e32 v4, 4 602; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 603; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 2 604; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 605; GFX11-SDAG-NEXT: v_add3_u32 v2, 4, s0, v0 606; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 4 607; GFX11-SDAG-NEXT: scratch_store_b8 v2, v1, off offset:1 dlc 608; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 609; GFX11-SDAG-NEXT: scratch_store_b8 v2, v3, off offset:2 dlc 610; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 611; GFX11-SDAG-NEXT: scratch_store_b8 v0, v4, s0 offset:4 dlc 612; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 613; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 614; GFX11-SDAG-NEXT: s_endpgm 615; 616; GFX11-GISEL-LABEL: soff4_voff1: 617; GFX11-GISEL: ; %bb.0: ; %bb 618; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 619; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 1 620; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 621; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 622; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 623; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 624; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 625; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v1, v0 626; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 2 627; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 628; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 629; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 630; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 631; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 632; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 633; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 634; GFX11-GISEL-NEXT: s_endpgm 635bb: 636 %soff4 = mul i32 %soff, 4 637 %a = alloca i8, i32 64, align 4, addrspace(5) 638 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4 639 %voff = call i32 @llvm.amdgcn.workitem.id.x() 640 %voff1 = mul i32 %voff, 1 641 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1 642 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 643 store volatile i8 1, i8 addrspace(5)* %p1 644 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 645 store volatile i8 2, i8 addrspace(5)* %p2 646 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 647 store volatile i8 4, i8 addrspace(5)* %p4 648 ret void 649} 650 651define amdgpu_kernel void @soff4_voff2(i32 %soff) { 652; GFX940-SDAG-LABEL: soff4_voff2: 653; GFX940-SDAG: ; %bb.0: ; %bb 654; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 655; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 656; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 657; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 658; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 659; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 2 660; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 661; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 662; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 663; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 664; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 665; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 666; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 667; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 668; GFX940-SDAG-NEXT: s_endpgm 669; 670; GFX940-GISEL-LABEL: soff4_voff2: 671; GFX940-GISEL: ; %bb.0: ; %bb 672; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 673; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 674; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 675; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 676; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 677; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2 678; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 679; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 680; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 681; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 682; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 683; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 684; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 685; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 686; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 687; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 688; GFX940-GISEL-NEXT: s_endpgm 689; 690; GFX11-SDAG-LABEL: soff4_voff2: 691; GFX11-SDAG: ; %bb.0: ; %bb 692; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 693; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 694; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 695; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 2 696; GFX11-SDAG-NEXT: v_mov_b32_e32 v4, 4 697; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 698; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 2 699; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 700; GFX11-SDAG-NEXT: v_add3_u32 v3, 4, s0, v0 701; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 4 702; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 dlc 703; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 704; GFX11-SDAG-NEXT: scratch_store_b8 v3, v2, off offset:2 dlc 705; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 706; GFX11-SDAG-NEXT: scratch_store_b8 v0, v4, s0 offset:4 dlc 707; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 708; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 709; GFX11-SDAG-NEXT: s_endpgm 710; 711; GFX11-GISEL-LABEL: soff4_voff2: 712; GFX11-GISEL: ; %bb.0: ; %bb 713; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 714; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 715; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 1 716; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 717; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 718; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 719; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 720; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 721; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v1, v0 722; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 2 723; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 724; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 725; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 726; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 727; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 728; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 729; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 730; GFX11-GISEL-NEXT: s_endpgm 731bb: 732 %soff4 = mul i32 %soff, 4 733 %a = alloca i8, i32 64, align 4, addrspace(5) 734 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4 735 %voff = call i32 @llvm.amdgcn.workitem.id.x() 736 %voff2 = mul i32 %voff, 2 737 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2 738 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 739 store volatile i8 1, i8 addrspace(5)* %p1 740 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 741 store volatile i8 2, i8 addrspace(5)* %p2 742 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 743 store volatile i8 4, i8 addrspace(5)* %p4 744 ret void 745} 746 747define amdgpu_kernel void @soff4_voff4(i32 %soff) { 748; GFX940-SDAG-LABEL: soff4_voff4: 749; GFX940-SDAG: ; %bb.0: ; %bb 750; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 751; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 752; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 753; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 754; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 755; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 2 756; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 757; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 758; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 759; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 760; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 761; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 762; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 763; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 764; GFX940-SDAG-NEXT: s_endpgm 765; 766; GFX940-GISEL-LABEL: soff4_voff4: 767; GFX940-GISEL: ; %bb.0: ; %bb 768; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 769; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 770; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 771; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 772; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 773; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2 774; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 775; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 776; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 777; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 778; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 779; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 780; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 781; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 782; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 783; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 784; GFX940-GISEL-NEXT: s_endpgm 785; 786; GFX11-SDAG-LABEL: soff4_voff4: 787; GFX11-SDAG: ; %bb.0: ; %bb 788; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 789; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 790; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 791; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 2 792; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 4 793; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 794; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 2 795; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 796; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 4 797; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 dlc 798; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 799; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 dlc 800; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 801; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, s0 offset:4 dlc 802; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 803; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 804; GFX11-SDAG-NEXT: s_endpgm 805; 806; GFX11-GISEL-LABEL: soff4_voff4: 807; GFX11-GISEL: ; %bb.0: ; %bb 808; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 809; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 810; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 1 811; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 812; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 813; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 814; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 815; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 816; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v1, v0 817; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 2 818; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 819; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 820; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 821; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 822; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 823; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 824; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 825; GFX11-GISEL-NEXT: s_endpgm 826bb: 827 %soff4 = mul i32 %soff, 4 828 %a = alloca i8, i32 64, align 4, addrspace(5) 829 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4 830 %voff = call i32 @llvm.amdgcn.workitem.id.x() 831 %voff4 = mul i32 %voff, 4 832 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4 833 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 834 store volatile i8 1, i8 addrspace(5)* %p1 835 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 836 store volatile i8 2, i8 addrspace(5)* %p2 837 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 838 store volatile i8 4, i8 addrspace(5)* %p4 839 ret void 840} 841