1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-SDAG 3; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-GISEL 4; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11-SDAG 5; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11-GISEL 6 7; Test flat scratch SVS addressing mode with various combinations of alignment 8; of soffset, voffset and inst_offset. 9 10declare i32 @llvm.amdgcn.workitem.id.x() 11 12define amdgpu_kernel void @soff1_voff1(i32 %soff) { 13; GFX940-SDAG-LABEL: soff1_voff1: 14; GFX940-SDAG: ; %bb.0: ; %bb 15; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 16; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 17; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 18; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 19; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 20; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 21; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 22; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 23; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 24; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 25; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 26; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 27; GFX940-SDAG-NEXT: s_endpgm 28; 29; GFX940-GISEL-LABEL: soff1_voff1: 30; GFX940-GISEL: ; %bb.0: ; %bb 31; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 32; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 33; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 34; GFX940-GISEL-NEXT: v_mov_b32_e32 v3, 2 35; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 36; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 37; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 38; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 39; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 40; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 41; GFX940-GISEL-NEXT: scratch_store_byte v0, v3, off offset:2 sc0 sc1 42; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 43; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 44; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 45; GFX940-GISEL-NEXT: s_endpgm 46; 47; GFX11-SDAG-LABEL: soff1_voff1: 48; GFX11-SDAG: ; %bb.0: ; %bb 49; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 50; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 51; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 2 52; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 4 53; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 54; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 4 55; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 dlc 56; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 57; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 dlc 58; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 59; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, s0 offset:4 dlc 60; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 61; GFX11-SDAG-NEXT: s_endpgm 62; 63; GFX11-GISEL-LABEL: soff1_voff1: 64; GFX11-GISEL: ; %bb.0: ; %bb 65; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 66; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 1 67; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 68; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 69; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 70; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v1, v0 71; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 2 72; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 73; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 74; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 75; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 76; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 77; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 78; GFX11-GISEL-NEXT: s_endpgm 79bb: 80 %soff1 = mul i32 %soff, 1 81 %a = alloca i8, i32 64, align 4, addrspace(5) 82 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1 83 %voff = call i32 @llvm.amdgcn.workitem.id.x() 84 %voff1 = mul i32 %voff, 1 85 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1 86 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 87 store volatile i8 1, i8 addrspace(5)* %p1 88 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 89 store volatile i8 2, i8 addrspace(5)* %p2 90 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 91 store volatile i8 4, i8 addrspace(5)* %p4 92 ret void 93} 94 95define amdgpu_kernel void @soff1_voff2(i32 %soff) { 96; GFX940-SDAG-LABEL: soff1_voff2: 97; GFX940-SDAG: ; %bb.0: ; %bb 98; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 99; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 100; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 101; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 102; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 103; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 104; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 105; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 106; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 107; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 108; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 109; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 110; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 111; GFX940-SDAG-NEXT: s_endpgm 112; 113; GFX940-GISEL-LABEL: soff1_voff2: 114; GFX940-GISEL: ; %bb.0: ; %bb 115; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 116; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 117; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 118; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 119; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 120; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 121; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 122; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 123; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 124; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 125; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 126; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 127; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 128; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 129; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 130; GFX940-GISEL-NEXT: s_endpgm 131; 132; GFX11-SDAG-LABEL: soff1_voff2: 133; GFX11-SDAG: ; %bb.0: ; %bb 134; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 135; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 136; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 137; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 2 138; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 4 139; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 140; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 4 141; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 dlc 142; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 143; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 dlc 144; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 145; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, s0 offset:4 dlc 146; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 147; GFX11-SDAG-NEXT: s_endpgm 148; 149; GFX11-GISEL-LABEL: soff1_voff2: 150; GFX11-GISEL: ; %bb.0: ; %bb 151; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 152; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 153; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 1 154; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 155; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 156; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 157; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v1, v0 158; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 2 159; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 160; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 161; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 162; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 163; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 164; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 165; GFX11-GISEL-NEXT: s_endpgm 166bb: 167 %soff1 = mul i32 %soff, 1 168 %a = alloca i8, i32 64, align 4, addrspace(5) 169 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1 170 %voff = call i32 @llvm.amdgcn.workitem.id.x() 171 %voff2 = mul i32 %voff, 2 172 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2 173 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 174 store volatile i8 1, i8 addrspace(5)* %p1 175 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 176 store volatile i8 2, i8 addrspace(5)* %p2 177 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 178 store volatile i8 4, i8 addrspace(5)* %p4 179 ret void 180} 181 182define amdgpu_kernel void @soff1_voff4(i32 %soff) { 183; GFX940-SDAG-LABEL: soff1_voff4: 184; GFX940-SDAG: ; %bb.0: ; %bb 185; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 186; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 187; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 188; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 189; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 190; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 191; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 192; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 193; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 194; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 195; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 196; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 197; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 198; GFX940-SDAG-NEXT: s_endpgm 199; 200; GFX940-GISEL-LABEL: soff1_voff4: 201; GFX940-GISEL: ; %bb.0: ; %bb 202; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 203; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 204; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 205; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 206; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 207; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 208; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 209; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 210; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 211; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 212; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 213; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 214; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 215; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 216; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 217; GFX940-GISEL-NEXT: s_endpgm 218; 219; GFX11-SDAG-LABEL: soff1_voff4: 220; GFX11-SDAG: ; %bb.0: ; %bb 221; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 222; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 223; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 224; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 2 225; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 4 226; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 227; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 4 228; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 dlc 229; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 230; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 dlc 231; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 232; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, s0 offset:4 dlc 233; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 234; GFX11-SDAG-NEXT: s_endpgm 235; 236; GFX11-GISEL-LABEL: soff1_voff4: 237; GFX11-GISEL: ; %bb.0: ; %bb 238; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 239; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 240; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 1 241; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 242; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 243; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 244; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v1, v0 245; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 2 246; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 247; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 248; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 249; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 250; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 251; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 252; GFX11-GISEL-NEXT: s_endpgm 253bb: 254 %soff1 = mul i32 %soff, 1 255 %a = alloca i8, i32 64, align 4, addrspace(5) 256 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1 257 %voff = call i32 @llvm.amdgcn.workitem.id.x() 258 %voff4 = mul i32 %voff, 4 259 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4 260 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 261 store volatile i8 1, i8 addrspace(5)* %p1 262 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 263 store volatile i8 2, i8 addrspace(5)* %p2 264 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 265 store volatile i8 4, i8 addrspace(5)* %p4 266 ret void 267} 268 269define amdgpu_kernel void @soff2_voff1(i32 %soff) { 270; GFX940-SDAG-LABEL: soff2_voff1: 271; GFX940-SDAG: ; %bb.0: ; %bb 272; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 273; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 274; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 275; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 276; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 1 277; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 278; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 279; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 280; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 281; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 282; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 283; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 284; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 285; GFX940-SDAG-NEXT: s_endpgm 286; 287; GFX940-GISEL-LABEL: soff2_voff1: 288; GFX940-GISEL: ; %bb.0: ; %bb 289; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 290; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 291; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 292; GFX940-GISEL-NEXT: v_mov_b32_e32 v3, 2 293; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 294; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1 295; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 296; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 297; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 298; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 299; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 300; GFX940-GISEL-NEXT: scratch_store_byte v0, v3, off offset:2 sc0 sc1 301; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 302; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 303; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 304; GFX940-GISEL-NEXT: s_endpgm 305; 306; GFX11-SDAG-LABEL: soff2_voff1: 307; GFX11-SDAG: ; %bb.0: ; %bb 308; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 309; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 310; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 2 311; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 4 312; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 313; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 1 314; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 4 315; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 dlc 316; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 317; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 dlc 318; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 319; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, s0 offset:4 dlc 320; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 321; GFX11-SDAG-NEXT: s_endpgm 322; 323; GFX11-GISEL-LABEL: soff2_voff1: 324; GFX11-GISEL: ; %bb.0: ; %bb 325; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 326; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 1 327; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 328; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 329; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1 330; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 331; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v1, v0 332; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 2 333; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 334; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 335; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 336; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 337; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 338; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 339; GFX11-GISEL-NEXT: s_endpgm 340bb: 341 %soff2 = mul i32 %soff, 2 342 %a = alloca i8, i32 64, align 4, addrspace(5) 343 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2 344 %voff = call i32 @llvm.amdgcn.workitem.id.x() 345 %voff1 = mul i32 %voff, 1 346 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1 347 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 348 store volatile i8 1, i8 addrspace(5)* %p1 349 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 350 store volatile i8 2, i8 addrspace(5)* %p2 351 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 352 store volatile i8 4, i8 addrspace(5)* %p4 353 ret void 354} 355 356define amdgpu_kernel void @soff2_voff2(i32 %soff) { 357; GFX940-SDAG-LABEL: soff2_voff2: 358; GFX940-SDAG: ; %bb.0: ; %bb 359; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 360; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 361; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 362; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 363; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 364; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 1 365; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 366; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 367; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 368; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 369; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 370; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 371; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 372; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 373; GFX940-SDAG-NEXT: s_endpgm 374; 375; GFX940-GISEL-LABEL: soff2_voff2: 376; GFX940-GISEL: ; %bb.0: ; %bb 377; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 378; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 379; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 380; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 381; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 382; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1 383; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 384; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 385; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 386; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 387; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 388; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 389; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 390; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 391; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 392; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 393; GFX940-GISEL-NEXT: s_endpgm 394; 395; GFX11-SDAG-LABEL: soff2_voff2: 396; GFX11-SDAG: ; %bb.0: ; %bb 397; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 398; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 399; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 400; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 2 401; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 4 402; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 403; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 1 404; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 4 405; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 dlc 406; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 407; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 dlc 408; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 409; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, s0 offset:4 dlc 410; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 411; GFX11-SDAG-NEXT: s_endpgm 412; 413; GFX11-GISEL-LABEL: soff2_voff2: 414; GFX11-GISEL: ; %bb.0: ; %bb 415; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 416; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 417; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 1 418; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 419; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 420; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1 421; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 422; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v1, v0 423; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 2 424; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 425; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 426; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 427; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 428; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 429; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 430; GFX11-GISEL-NEXT: s_endpgm 431bb: 432 %soff2 = mul i32 %soff, 2 433 %a = alloca i8, i32 64, align 4, addrspace(5) 434 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2 435 %voff = call i32 @llvm.amdgcn.workitem.id.x() 436 %voff2 = mul i32 %voff, 2 437 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2 438 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 439 store volatile i8 1, i8 addrspace(5)* %p1 440 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 441 store volatile i8 2, i8 addrspace(5)* %p2 442 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 443 store volatile i8 4, i8 addrspace(5)* %p4 444 ret void 445} 446 447define amdgpu_kernel void @soff2_voff4(i32 %soff) { 448; GFX940-SDAG-LABEL: soff2_voff4: 449; GFX940-SDAG: ; %bb.0: ; %bb 450; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 451; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 452; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 453; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 454; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 455; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 1 456; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 457; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 458; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 459; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 460; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 461; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 462; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 463; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 464; GFX940-SDAG-NEXT: s_endpgm 465; 466; GFX940-GISEL-LABEL: soff2_voff4: 467; GFX940-GISEL: ; %bb.0: ; %bb 468; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 469; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 470; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 471; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 472; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 473; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1 474; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 475; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 476; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 477; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 478; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 479; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 480; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 481; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 482; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 483; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 484; GFX940-GISEL-NEXT: s_endpgm 485; 486; GFX11-SDAG-LABEL: soff2_voff4: 487; GFX11-SDAG: ; %bb.0: ; %bb 488; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 489; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 490; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 491; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 2 492; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 4 493; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 494; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 1 495; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 4 496; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 dlc 497; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 498; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 dlc 499; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 500; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, s0 offset:4 dlc 501; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 502; GFX11-SDAG-NEXT: s_endpgm 503; 504; GFX11-GISEL-LABEL: soff2_voff4: 505; GFX11-GISEL: ; %bb.0: ; %bb 506; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 507; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 508; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 1 509; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 510; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 511; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1 512; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 513; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v1, v0 514; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 2 515; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 516; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 517; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 518; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 519; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 520; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 521; GFX11-GISEL-NEXT: s_endpgm 522bb: 523 %soff2 = mul i32 %soff, 2 524 %a = alloca i8, i32 64, align 4, addrspace(5) 525 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2 526 %voff = call i32 @llvm.amdgcn.workitem.id.x() 527 %voff4 = mul i32 %voff, 4 528 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4 529 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 530 store volatile i8 1, i8 addrspace(5)* %p1 531 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 532 store volatile i8 2, i8 addrspace(5)* %p2 533 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 534 store volatile i8 4, i8 addrspace(5)* %p4 535 ret void 536} 537 538define amdgpu_kernel void @soff4_voff1(i32 %soff) { 539; GFX940-SDAG-LABEL: soff4_voff1: 540; GFX940-SDAG: ; %bb.0: ; %bb 541; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 542; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 543; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 544; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 545; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 2 546; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 547; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 548; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 549; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 550; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 551; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 552; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 553; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 554; GFX940-SDAG-NEXT: s_endpgm 555; 556; GFX940-GISEL-LABEL: soff4_voff1: 557; GFX940-GISEL: ; %bb.0: ; %bb 558; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 559; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 560; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 561; GFX940-GISEL-NEXT: v_mov_b32_e32 v3, 2 562; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 563; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2 564; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 565; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 566; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 567; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 568; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 569; GFX940-GISEL-NEXT: scratch_store_byte v0, v3, off offset:2 sc0 sc1 570; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 571; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 572; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 573; GFX940-GISEL-NEXT: s_endpgm 574; 575; GFX11-SDAG-LABEL: soff4_voff1: 576; GFX11-SDAG: ; %bb.0: ; %bb 577; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 578; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 579; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 2 580; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 4 581; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 582; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 2 583; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 4 584; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 dlc 585; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 586; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 dlc 587; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 588; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, s0 offset:4 dlc 589; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 590; GFX11-SDAG-NEXT: s_endpgm 591; 592; GFX11-GISEL-LABEL: soff4_voff1: 593; GFX11-GISEL: ; %bb.0: ; %bb 594; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 595; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 1 596; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 597; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 598; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 599; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 600; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v1, v0 601; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 2 602; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 603; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 604; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 605; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 606; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 607; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 608; GFX11-GISEL-NEXT: s_endpgm 609bb: 610 %soff4 = mul i32 %soff, 4 611 %a = alloca i8, i32 64, align 4, addrspace(5) 612 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4 613 %voff = call i32 @llvm.amdgcn.workitem.id.x() 614 %voff1 = mul i32 %voff, 1 615 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1 616 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 617 store volatile i8 1, i8 addrspace(5)* %p1 618 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 619 store volatile i8 2, i8 addrspace(5)* %p2 620 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 621 store volatile i8 4, i8 addrspace(5)* %p4 622 ret void 623} 624 625define amdgpu_kernel void @soff4_voff2(i32 %soff) { 626; GFX940-SDAG-LABEL: soff4_voff2: 627; GFX940-SDAG: ; %bb.0: ; %bb 628; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 629; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 630; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 631; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 632; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 633; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 2 634; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 635; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 636; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 637; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 638; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 639; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 640; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 641; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 642; GFX940-SDAG-NEXT: s_endpgm 643; 644; GFX940-GISEL-LABEL: soff4_voff2: 645; GFX940-GISEL: ; %bb.0: ; %bb 646; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 647; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 648; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 649; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 650; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 651; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2 652; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 653; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 654; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 655; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 656; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 657; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 658; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 659; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 660; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 661; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 662; GFX940-GISEL-NEXT: s_endpgm 663; 664; GFX11-SDAG-LABEL: soff4_voff2: 665; GFX11-SDAG: ; %bb.0: ; %bb 666; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 667; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 668; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 669; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 2 670; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 4 671; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 672; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 2 673; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 4 674; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 dlc 675; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 676; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 dlc 677; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 678; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, s0 offset:4 dlc 679; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 680; GFX11-SDAG-NEXT: s_endpgm 681; 682; GFX11-GISEL-LABEL: soff4_voff2: 683; GFX11-GISEL: ; %bb.0: ; %bb 684; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 685; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 686; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 1 687; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 688; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 689; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 690; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 691; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v1, v0 692; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 2 693; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 694; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 695; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 696; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 697; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 698; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 699; GFX11-GISEL-NEXT: s_endpgm 700bb: 701 %soff4 = mul i32 %soff, 4 702 %a = alloca i8, i32 64, align 4, addrspace(5) 703 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4 704 %voff = call i32 @llvm.amdgcn.workitem.id.x() 705 %voff2 = mul i32 %voff, 2 706 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2 707 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 708 store volatile i8 1, i8 addrspace(5)* %p1 709 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 710 store volatile i8 2, i8 addrspace(5)* %p2 711 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 712 store volatile i8 4, i8 addrspace(5)* %p4 713 ret void 714} 715 716define amdgpu_kernel void @soff4_voff4(i32 %soff) { 717; GFX940-SDAG-LABEL: soff4_voff4: 718; GFX940-SDAG: ; %bb.0: ; %bb 719; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 720; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 721; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 722; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 723; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 724; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 2 725; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 726; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 727; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 728; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 729; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 730; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 731; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 732; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 733; GFX940-SDAG-NEXT: s_endpgm 734; 735; GFX940-GISEL-LABEL: soff4_voff4: 736; GFX940-GISEL: ; %bb.0: ; %bb 737; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 738; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 739; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 740; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 741; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 742; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2 743; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 744; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 745; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 746; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 747; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 748; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 749; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 750; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 751; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 752; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 753; GFX940-GISEL-NEXT: s_endpgm 754; 755; GFX11-SDAG-LABEL: soff4_voff4: 756; GFX11-SDAG: ; %bb.0: ; %bb 757; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 758; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 759; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 760; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 2 761; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 4 762; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 763; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 2 764; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 4 765; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 dlc 766; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 767; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 dlc 768; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 769; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, s0 offset:4 dlc 770; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 771; GFX11-SDAG-NEXT: s_endpgm 772; 773; GFX11-GISEL-LABEL: soff4_voff4: 774; GFX11-GISEL: ; %bb.0: ; %bb 775; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 776; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 777; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 1 778; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 779; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 780; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 781; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 782; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, v1, v0 783; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 2 784; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 785; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 786; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 787; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 788; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 789; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 790; GFX11-GISEL-NEXT: s_endpgm 791bb: 792 %soff4 = mul i32 %soff, 4 793 %a = alloca i8, i32 64, align 4, addrspace(5) 794 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4 795 %voff = call i32 @llvm.amdgcn.workitem.id.x() 796 %voff4 = mul i32 %voff, 4 797 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4 798 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 799 store volatile i8 1, i8 addrspace(5)* %p1 800 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 801 store volatile i8 2, i8 addrspace(5)* %p2 802 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 803 store volatile i8 4, i8 addrspace(5)* %p4 804 ret void 805} 806