1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-SDAG 3; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-GISEL 4; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11-SDAG 5; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11-GISEL 6 7; Test flat scratch SVS addressing mode with various combinations of alignment 8; of soffset, voffset and inst_offset. 9 10declare i32 @llvm.amdgcn.workitem.id.x() 11 12define amdgpu_kernel void @soff1_voff1(i32 %soff) { 13; GFX940-SDAG-LABEL: soff1_voff1: 14; GFX940-SDAG: ; %bb.0: ; %bb 15; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 16; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 17; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 18; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 19; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 20; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 21; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 22; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 23; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 24; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 25; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 26; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 27; GFX940-SDAG-NEXT: s_endpgm 28; 29; GFX940-GISEL-LABEL: soff1_voff1: 30; GFX940-GISEL: ; %bb.0: ; %bb 31; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 32; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 33; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 34; GFX940-GISEL-NEXT: v_mov_b32_e32 v3, 2 35; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 36; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 37; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 38; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 39; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 40; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 41; GFX940-GISEL-NEXT: scratch_store_byte v0, v3, off offset:2 sc0 sc1 42; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 43; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 44; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 45; GFX940-GISEL-NEXT: s_endpgm 46; 47; GFX11-SDAG-LABEL: soff1_voff1: 48; GFX11-SDAG: ; %bb.0: ; %bb 49; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 50; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 51; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 2 52; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 4 53; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 54; GFX11-SDAG-NEXT: v_add3_u32 v0, 4, s0, v0 55; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc 56; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 57; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc 58; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 59; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 60; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 61; GFX11-SDAG-NEXT: s_endpgm 62; 63; GFX11-GISEL-LABEL: soff1_voff1: 64; GFX11-GISEL: ; %bb.0: ; %bb 65; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 66; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 1 67; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 68; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 69; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 70; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v1, v0 71; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 2 72; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 73; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 74; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 75; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 76; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 77; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 78; GFX11-GISEL-NEXT: s_endpgm 79bb: 80 %soff1 = mul i32 %soff, 1 81 %a = alloca i8, i32 64, align 4, addrspace(5) 82 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1 83 %voff = call i32 @llvm.amdgcn.workitem.id.x() 84 %voff1 = mul i32 %voff, 1 85 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1 86 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 87 store volatile i8 1, i8 addrspace(5)* %p1 88 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 89 store volatile i8 2, i8 addrspace(5)* %p2 90 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 91 store volatile i8 4, i8 addrspace(5)* %p4 92 ret void 93} 94 95define amdgpu_kernel void @soff1_voff2(i32 %soff) { 96; GFX940-SDAG-LABEL: soff1_voff2: 97; GFX940-SDAG: ; %bb.0: ; %bb 98; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 99; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 100; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 101; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 102; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 103; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 104; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 105; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 106; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 107; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 108; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 109; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 110; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 111; GFX940-SDAG-NEXT: s_endpgm 112; 113; GFX940-GISEL-LABEL: soff1_voff2: 114; GFX940-GISEL: ; %bb.0: ; %bb 115; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 116; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 117; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 118; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 119; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 120; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 121; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 122; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 123; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 124; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 125; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 126; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 127; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 128; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 129; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 130; GFX940-GISEL-NEXT: s_endpgm 131; 132; GFX11-SDAG-LABEL: soff1_voff2: 133; GFX11-SDAG: ; %bb.0: ; %bb 134; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 135; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 136; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 137; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 2 138; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 4 139; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 140; GFX11-SDAG-NEXT: v_add3_u32 v0, 4, s0, v0 141; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc 142; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 143; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc 144; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 145; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 146; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 147; GFX11-SDAG-NEXT: s_endpgm 148; 149; GFX11-GISEL-LABEL: soff1_voff2: 150; GFX11-GISEL: ; %bb.0: ; %bb 151; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 152; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 153; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 1 154; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 155; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 156; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 157; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v1, v0 158; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 2 159; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 160; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 161; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 162; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 163; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 164; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 165; GFX11-GISEL-NEXT: s_endpgm 166bb: 167 %soff1 = mul i32 %soff, 1 168 %a = alloca i8, i32 64, align 4, addrspace(5) 169 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1 170 %voff = call i32 @llvm.amdgcn.workitem.id.x() 171 %voff2 = mul i32 %voff, 2 172 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2 173 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 174 store volatile i8 1, i8 addrspace(5)* %p1 175 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 176 store volatile i8 2, i8 addrspace(5)* %p2 177 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 178 store volatile i8 4, i8 addrspace(5)* %p4 179 ret void 180} 181 182define amdgpu_kernel void @soff1_voff4(i32 %soff) { 183; GFX940-SDAG-LABEL: soff1_voff4: 184; GFX940-SDAG: ; %bb.0: ; %bb 185; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 186; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 187; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 188; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 189; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 190; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 191; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 192; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 193; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 194; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 195; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 196; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 197; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 198; GFX940-SDAG-NEXT: s_endpgm 199; 200; GFX940-GISEL-LABEL: soff1_voff4: 201; GFX940-GISEL: ; %bb.0: ; %bb 202; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 203; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 204; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 205; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 206; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 207; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 208; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 209; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 210; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 211; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 212; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 213; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 214; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 215; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 216; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 217; GFX940-GISEL-NEXT: s_endpgm 218; 219; GFX11-SDAG-LABEL: soff1_voff4: 220; GFX11-SDAG: ; %bb.0: ; %bb 221; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 222; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 223; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 224; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 2 225; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 4 226; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 227; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 4 228; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 dlc 229; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 230; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 dlc 231; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 232; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, s0 offset:4 dlc 233; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 234; GFX11-SDAG-NEXT: s_endpgm 235; 236; GFX11-GISEL-LABEL: soff1_voff4: 237; GFX11-GISEL: ; %bb.0: ; %bb 238; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 239; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 240; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 1 241; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 242; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 243; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 244; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v1, v0 245; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 2 246; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 247; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 248; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 249; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 250; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 251; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 252; GFX11-GISEL-NEXT: s_endpgm 253bb: 254 %soff1 = mul i32 %soff, 1 255 %a = alloca i8, i32 64, align 4, addrspace(5) 256 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1 257 %voff = call i32 @llvm.amdgcn.workitem.id.x() 258 %voff4 = mul i32 %voff, 4 259 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4 260 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 261 store volatile i8 1, i8 addrspace(5)* %p1 262 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 263 store volatile i8 2, i8 addrspace(5)* %p2 264 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 265 store volatile i8 4, i8 addrspace(5)* %p4 266 ret void 267} 268 269define amdgpu_kernel void @soff2_voff1(i32 %soff) { 270; GFX940-SDAG-LABEL: soff2_voff1: 271; GFX940-SDAG: ; %bb.0: ; %bb 272; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 273; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 274; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 275; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 276; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 1 277; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 278; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 279; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 280; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 281; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 282; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 283; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 284; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 285; GFX940-SDAG-NEXT: s_endpgm 286; 287; GFX940-GISEL-LABEL: soff2_voff1: 288; GFX940-GISEL: ; %bb.0: ; %bb 289; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 290; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 291; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 292; GFX940-GISEL-NEXT: v_mov_b32_e32 v3, 2 293; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 294; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1 295; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 296; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 297; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 298; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 299; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 300; GFX940-GISEL-NEXT: scratch_store_byte v0, v3, off offset:2 sc0 sc1 301; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 302; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 303; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 304; GFX940-GISEL-NEXT: s_endpgm 305; 306; GFX11-SDAG-LABEL: soff2_voff1: 307; GFX11-SDAG: ; %bb.0: ; %bb 308; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 309; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 310; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 2 311; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 4 312; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 313; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 1 314; GFX11-SDAG-NEXT: v_add3_u32 v0, 4, s0, v0 315; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc 316; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 317; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc 318; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 319; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 320; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 321; GFX11-SDAG-NEXT: s_endpgm 322; 323; GFX11-GISEL-LABEL: soff2_voff1: 324; GFX11-GISEL: ; %bb.0: ; %bb 325; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 326; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 1 327; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 328; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 329; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1 330; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 331; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v1, v0 332; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 2 333; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 334; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 335; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 336; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 337; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 338; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 339; GFX11-GISEL-NEXT: s_endpgm 340bb: 341 %soff2 = mul i32 %soff, 2 342 %a = alloca i8, i32 64, align 4, addrspace(5) 343 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2 344 %voff = call i32 @llvm.amdgcn.workitem.id.x() 345 %voff1 = mul i32 %voff, 1 346 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1 347 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 348 store volatile i8 1, i8 addrspace(5)* %p1 349 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 350 store volatile i8 2, i8 addrspace(5)* %p2 351 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 352 store volatile i8 4, i8 addrspace(5)* %p4 353 ret void 354} 355 356define amdgpu_kernel void @soff2_voff2(i32 %soff) { 357; GFX940-SDAG-LABEL: soff2_voff2: 358; GFX940-SDAG: ; %bb.0: ; %bb 359; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 360; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 361; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 362; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 363; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 364; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 1 365; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 366; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 367; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 368; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 369; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 370; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 371; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 372; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 373; GFX940-SDAG-NEXT: s_endpgm 374; 375; GFX940-GISEL-LABEL: soff2_voff2: 376; GFX940-GISEL: ; %bb.0: ; %bb 377; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 378; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 379; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 380; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 381; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 382; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1 383; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 384; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 385; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 386; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 387; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 388; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 389; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 390; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 391; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 392; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 393; GFX940-GISEL-NEXT: s_endpgm 394; 395; GFX11-SDAG-LABEL: soff2_voff2: 396; GFX11-SDAG: ; %bb.0: ; %bb 397; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 398; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 399; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 400; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 2 401; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 4 402; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 403; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 1 404; GFX11-SDAG-NEXT: v_add3_u32 v0, 4, s0, v0 405; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc 406; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 407; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc 408; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 409; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 410; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 411; GFX11-SDAG-NEXT: s_endpgm 412; 413; GFX11-GISEL-LABEL: soff2_voff2: 414; GFX11-GISEL: ; %bb.0: ; %bb 415; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 416; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 417; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 1 418; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 419; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 420; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1 421; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 422; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v1, v0 423; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 2 424; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 425; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 426; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 427; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 428; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 429; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 430; GFX11-GISEL-NEXT: s_endpgm 431bb: 432 %soff2 = mul i32 %soff, 2 433 %a = alloca i8, i32 64, align 4, addrspace(5) 434 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2 435 %voff = call i32 @llvm.amdgcn.workitem.id.x() 436 %voff2 = mul i32 %voff, 2 437 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2 438 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 439 store volatile i8 1, i8 addrspace(5)* %p1 440 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 441 store volatile i8 2, i8 addrspace(5)* %p2 442 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 443 store volatile i8 4, i8 addrspace(5)* %p4 444 ret void 445} 446 447define amdgpu_kernel void @soff2_voff4(i32 %soff) { 448; GFX940-SDAG-LABEL: soff2_voff4: 449; GFX940-SDAG: ; %bb.0: ; %bb 450; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 451; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 452; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 453; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 454; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 455; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 1 456; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 457; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 458; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 459; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 460; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 461; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 462; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 463; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 464; GFX940-SDAG-NEXT: s_endpgm 465; 466; GFX940-GISEL-LABEL: soff2_voff4: 467; GFX940-GISEL: ; %bb.0: ; %bb 468; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 469; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 470; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 471; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 472; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 473; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1 474; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 475; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 476; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 477; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 478; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 479; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 480; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 481; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 482; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 483; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 484; GFX940-GISEL-NEXT: s_endpgm 485; 486; GFX11-SDAG-LABEL: soff2_voff4: 487; GFX11-SDAG: ; %bb.0: ; %bb 488; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 489; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 490; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 491; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 2 492; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 4 493; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 494; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 1 495; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 4 496; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 dlc 497; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 498; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 dlc 499; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 500; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, s0 offset:4 dlc 501; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 502; GFX11-SDAG-NEXT: s_endpgm 503; 504; GFX11-GISEL-LABEL: soff2_voff4: 505; GFX11-GISEL: ; %bb.0: ; %bb 506; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 507; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 508; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 1 509; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 510; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 511; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1 512; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 513; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v1, v0 514; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 2 515; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 516; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 517; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 518; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 519; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 520; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 521; GFX11-GISEL-NEXT: s_endpgm 522bb: 523 %soff2 = mul i32 %soff, 2 524 %a = alloca i8, i32 64, align 4, addrspace(5) 525 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2 526 %voff = call i32 @llvm.amdgcn.workitem.id.x() 527 %voff4 = mul i32 %voff, 4 528 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4 529 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 530 store volatile i8 1, i8 addrspace(5)* %p1 531 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 532 store volatile i8 2, i8 addrspace(5)* %p2 533 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 534 store volatile i8 4, i8 addrspace(5)* %p4 535 ret void 536} 537 538define amdgpu_kernel void @soff4_voff1(i32 %soff) { 539; GFX940-SDAG-LABEL: soff4_voff1: 540; GFX940-SDAG: ; %bb.0: ; %bb 541; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 542; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 543; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 544; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 545; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 2 546; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 547; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 548; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 549; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 550; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 551; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 552; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 553; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 554; GFX940-SDAG-NEXT: s_endpgm 555; 556; GFX940-GISEL-LABEL: soff4_voff1: 557; GFX940-GISEL: ; %bb.0: ; %bb 558; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 559; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 560; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 561; GFX940-GISEL-NEXT: v_mov_b32_e32 v3, 2 562; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 563; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2 564; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 565; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 566; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 567; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 568; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 569; GFX940-GISEL-NEXT: scratch_store_byte v0, v3, off offset:2 sc0 sc1 570; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 571; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 572; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 573; GFX940-GISEL-NEXT: s_endpgm 574; 575; GFX11-SDAG-LABEL: soff4_voff1: 576; GFX11-SDAG: ; %bb.0: ; %bb 577; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 578; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 579; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 2 580; GFX11-SDAG-NEXT: v_mov_b32_e32 v4, 4 581; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 582; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 2 583; GFX11-SDAG-NEXT: v_add3_u32 v2, 4, s0, v0 584; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 4 585; GFX11-SDAG-NEXT: scratch_store_b8 v2, v1, off offset:1 dlc 586; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 587; GFX11-SDAG-NEXT: scratch_store_b8 v2, v3, off offset:2 dlc 588; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 589; GFX11-SDAG-NEXT: scratch_store_b8 v0, v4, s0 offset:4 dlc 590; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 591; GFX11-SDAG-NEXT: s_endpgm 592; 593; GFX11-GISEL-LABEL: soff4_voff1: 594; GFX11-GISEL: ; %bb.0: ; %bb 595; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 596; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 1 597; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 598; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 599; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 600; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 601; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v1, v0 602; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 2 603; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 604; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 605; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 606; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 607; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 608; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 609; GFX11-GISEL-NEXT: s_endpgm 610bb: 611 %soff4 = mul i32 %soff, 4 612 %a = alloca i8, i32 64, align 4, addrspace(5) 613 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4 614 %voff = call i32 @llvm.amdgcn.workitem.id.x() 615 %voff1 = mul i32 %voff, 1 616 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1 617 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 618 store volatile i8 1, i8 addrspace(5)* %p1 619 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 620 store volatile i8 2, i8 addrspace(5)* %p2 621 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 622 store volatile i8 4, i8 addrspace(5)* %p4 623 ret void 624} 625 626define amdgpu_kernel void @soff4_voff2(i32 %soff) { 627; GFX940-SDAG-LABEL: soff4_voff2: 628; GFX940-SDAG: ; %bb.0: ; %bb 629; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 630; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 631; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 632; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 633; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 634; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 2 635; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 636; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 637; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 638; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 639; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 640; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 641; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 642; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 643; GFX940-SDAG-NEXT: s_endpgm 644; 645; GFX940-GISEL-LABEL: soff4_voff2: 646; GFX940-GISEL: ; %bb.0: ; %bb 647; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 648; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 649; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 650; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 651; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 652; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2 653; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 654; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 655; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 656; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 657; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 658; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 659; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 660; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 661; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 662; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 663; GFX940-GISEL-NEXT: s_endpgm 664; 665; GFX11-SDAG-LABEL: soff4_voff2: 666; GFX11-SDAG: ; %bb.0: ; %bb 667; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 668; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 669; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 670; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 2 671; GFX11-SDAG-NEXT: v_mov_b32_e32 v4, 4 672; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 673; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 2 674; GFX11-SDAG-NEXT: v_add3_u32 v3, 4, s0, v0 675; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 4 676; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 dlc 677; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 678; GFX11-SDAG-NEXT: scratch_store_b8 v3, v2, off offset:2 dlc 679; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 680; GFX11-SDAG-NEXT: scratch_store_b8 v0, v4, s0 offset:4 dlc 681; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 682; GFX11-SDAG-NEXT: s_endpgm 683; 684; GFX11-GISEL-LABEL: soff4_voff2: 685; GFX11-GISEL: ; %bb.0: ; %bb 686; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 687; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 688; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 1 689; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 690; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 691; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 692; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 693; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v1, v0 694; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 2 695; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 696; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 697; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 698; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 699; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 700; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 701; GFX11-GISEL-NEXT: s_endpgm 702bb: 703 %soff4 = mul i32 %soff, 4 704 %a = alloca i8, i32 64, align 4, addrspace(5) 705 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4 706 %voff = call i32 @llvm.amdgcn.workitem.id.x() 707 %voff2 = mul i32 %voff, 2 708 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2 709 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 710 store volatile i8 1, i8 addrspace(5)* %p1 711 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 712 store volatile i8 2, i8 addrspace(5)* %p2 713 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 714 store volatile i8 4, i8 addrspace(5)* %p4 715 ret void 716} 717 718define amdgpu_kernel void @soff4_voff4(i32 %soff) { 719; GFX940-SDAG-LABEL: soff4_voff4: 720; GFX940-SDAG: ; %bb.0: ; %bb 721; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 722; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 723; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 724; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 725; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 726; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 2 727; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 728; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 729; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 730; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 731; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 732; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 733; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 734; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 735; GFX940-SDAG-NEXT: s_endpgm 736; 737; GFX940-GISEL-LABEL: soff4_voff4: 738; GFX940-GISEL: ; %bb.0: ; %bb 739; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 740; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 741; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 742; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 743; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 744; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2 745; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 746; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 747; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 748; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 749; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 750; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 751; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 752; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 753; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 754; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 755; GFX940-GISEL-NEXT: s_endpgm 756; 757; GFX11-SDAG-LABEL: soff4_voff4: 758; GFX11-SDAG: ; %bb.0: ; %bb 759; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 760; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 761; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 762; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 2 763; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 4 764; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 765; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 2 766; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 4 767; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 dlc 768; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 769; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 dlc 770; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 771; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, s0 offset:4 dlc 772; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 773; GFX11-SDAG-NEXT: s_endpgm 774; 775; GFX11-GISEL-LABEL: soff4_voff4: 776; GFX11-GISEL: ; %bb.0: ; %bb 777; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 778; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 779; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 1 780; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 781; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 782; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 783; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 784; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v1, v0 785; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 2 786; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 787; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 788; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 789; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 790; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 791; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 792; GFX11-GISEL-NEXT: s_endpgm 793bb: 794 %soff4 = mul i32 %soff, 4 795 %a = alloca i8, i32 64, align 4, addrspace(5) 796 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4 797 %voff = call i32 @llvm.amdgcn.workitem.id.x() 798 %voff4 = mul i32 %voff, 4 799 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4 800 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 801 store volatile i8 1, i8 addrspace(5)* %p1 802 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 803 store volatile i8 2, i8 addrspace(5)* %p2 804 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 805 store volatile i8 4, i8 addrspace(5)* %p4 806 ret void 807} 808