1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-SDAG 3; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-GISEL 4; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11-SDAG 5; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11-GISEL 6 7; Test flat scratch SVS addressing mode with various combinations of alignment 8; of soffset, voffset and inst_offset. 9 10declare i32 @llvm.amdgcn.workitem.id.x() 11 12define amdgpu_kernel void @soff1_voff1(i32 %soff) { 13; GFX940-SDAG-LABEL: soff1_voff1: 14; GFX940-SDAG: ; %bb.0: ; %bb 15; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 16; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 17; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 18; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 19; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 20; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 21; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 22; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 23; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 24; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 25; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 26; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 27; GFX940-SDAG-NEXT: s_endpgm 28; 29; GFX940-GISEL-LABEL: soff1_voff1: 30; GFX940-GISEL: ; %bb.0: ; %bb 31; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 32; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 33; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 34; GFX940-GISEL-NEXT: v_mov_b32_e32 v3, 2 35; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 36; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 37; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 38; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 39; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 40; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 41; GFX940-GISEL-NEXT: scratch_store_byte v0, v3, off offset:2 sc0 sc1 42; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 43; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 44; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 45; GFX940-GISEL-NEXT: s_endpgm 46; 47; GFX11-SDAG-LABEL: soff1_voff1: 48; GFX11-SDAG: ; %bb.0: ; %bb 49; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 50; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2 51; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 4 52; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 53; GFX11-SDAG-NEXT: v_add3_u32 v0, 4, s0, v0 54; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc 55; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 56; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc 57; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 58; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 59; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 60; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 61; GFX11-SDAG-NEXT: s_endpgm 62; 63; GFX11-GISEL-LABEL: soff1_voff1: 64; GFX11-GISEL: ; %bb.0: ; %bb 65; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 66; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4 67; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 68; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 69; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 70; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0 71; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 72; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 73; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 74; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 75; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 76; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 77; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 78; GFX11-GISEL-NEXT: s_endpgm 79bb: 80 %soff1 = mul i32 %soff, 1 81 %a = alloca i8, i32 64, align 4, addrspace(5) 82 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1 83 %voff = call i32 @llvm.amdgcn.workitem.id.x() 84 %voff1 = mul i32 %voff, 1 85 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1 86 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 87 store volatile i8 1, i8 addrspace(5)* %p1 88 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 89 store volatile i8 2, i8 addrspace(5)* %p2 90 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 91 store volatile i8 4, i8 addrspace(5)* %p4 92 ret void 93} 94 95define amdgpu_kernel void @soff1_voff2(i32 %soff) { 96; GFX940-SDAG-LABEL: soff1_voff2: 97; GFX940-SDAG: ; %bb.0: ; %bb 98; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 99; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 100; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 101; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 102; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 103; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 104; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 105; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 106; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 107; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 108; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 109; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 110; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 111; GFX940-SDAG-NEXT: s_endpgm 112; 113; GFX940-GISEL-LABEL: soff1_voff2: 114; GFX940-GISEL: ; %bb.0: ; %bb 115; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 116; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 117; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 118; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 119; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 120; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 121; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 122; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 123; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 124; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 125; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 126; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 127; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 128; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 129; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 130; GFX940-GISEL-NEXT: s_endpgm 131; 132; GFX11-SDAG-LABEL: soff1_voff2: 133; GFX11-SDAG: ; %bb.0: ; %bb 134; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 135; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_lshlrev_b32 v0, 1, v0 136; GFX11-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 137; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 138; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 139; GFX11-SDAG-NEXT: v_add3_u32 v0, 4, s0, v0 140; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc 141; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 142; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc 143; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 144; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 145; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 146; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 147; GFX11-SDAG-NEXT: s_endpgm 148; 149; GFX11-GISEL-LABEL: soff1_voff2: 150; GFX11-GISEL: ; %bb.0: ; %bb 151; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 152; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 153; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4 154; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 155; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 156; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 157; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0 158; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 159; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 160; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 161; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 162; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 163; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 164; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 165; GFX11-GISEL-NEXT: s_endpgm 166bb: 167 %soff1 = mul i32 %soff, 1 168 %a = alloca i8, i32 64, align 4, addrspace(5) 169 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1 170 %voff = call i32 @llvm.amdgcn.workitem.id.x() 171 %voff2 = mul i32 %voff, 2 172 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2 173 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 174 store volatile i8 1, i8 addrspace(5)* %p1 175 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 176 store volatile i8 2, i8 addrspace(5)* %p2 177 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 178 store volatile i8 4, i8 addrspace(5)* %p4 179 ret void 180} 181 182define amdgpu_kernel void @soff1_voff4(i32 %soff) { 183; GFX940-SDAG-LABEL: soff1_voff4: 184; GFX940-SDAG: ; %bb.0: ; %bb 185; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 186; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 187; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 188; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 189; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 190; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 191; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 192; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 193; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 194; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 195; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 196; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 197; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 198; GFX940-SDAG-NEXT: s_endpgm 199; 200; GFX940-GISEL-LABEL: soff1_voff4: 201; GFX940-GISEL: ; %bb.0: ; %bb 202; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 203; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 204; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 205; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 206; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 207; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 208; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 209; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 210; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 211; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 212; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 213; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 214; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 215; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 216; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 217; GFX940-GISEL-NEXT: s_endpgm 218; 219; GFX11-SDAG-LABEL: soff1_voff4: 220; GFX11-SDAG: ; %bb.0: ; %bb 221; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 222; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_lshlrev_b32 v0, 2, v0 223; GFX11-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 224; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 225; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 4 226; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 dlc 227; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 228; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 dlc 229; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 230; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, s0 offset:4 dlc 231; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 232; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 233; GFX11-SDAG-NEXT: s_endpgm 234; 235; GFX11-GISEL-LABEL: soff1_voff4: 236; GFX11-GISEL: ; %bb.0: ; %bb 237; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 238; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 239; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4 240; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 241; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 242; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 243; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0 244; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 245; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 246; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 247; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 248; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 249; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 250; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 251; GFX11-GISEL-NEXT: s_endpgm 252bb: 253 %soff1 = mul i32 %soff, 1 254 %a = alloca i8, i32 64, align 4, addrspace(5) 255 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1 256 %voff = call i32 @llvm.amdgcn.workitem.id.x() 257 %voff4 = mul i32 %voff, 4 258 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4 259 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 260 store volatile i8 1, i8 addrspace(5)* %p1 261 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 262 store volatile i8 2, i8 addrspace(5)* %p2 263 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 264 store volatile i8 4, i8 addrspace(5)* %p4 265 ret void 266} 267 268define amdgpu_kernel void @soff2_voff1(i32 %soff) { 269; GFX940-SDAG-LABEL: soff2_voff1: 270; GFX940-SDAG: ; %bb.0: ; %bb 271; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 272; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 273; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 274; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 275; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 1 276; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 277; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 278; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 279; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 280; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 281; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 282; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 283; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 284; GFX940-SDAG-NEXT: s_endpgm 285; 286; GFX940-GISEL-LABEL: soff2_voff1: 287; GFX940-GISEL: ; %bb.0: ; %bb 288; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 289; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 290; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 291; GFX940-GISEL-NEXT: v_mov_b32_e32 v3, 2 292; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 293; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1 294; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 295; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 296; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 297; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 298; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 299; GFX940-GISEL-NEXT: scratch_store_byte v0, v3, off offset:2 sc0 sc1 300; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 301; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 302; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 303; GFX940-GISEL-NEXT: s_endpgm 304; 305; GFX11-SDAG-LABEL: soff2_voff1: 306; GFX11-SDAG: ; %bb.0: ; %bb 307; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 308; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2 309; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 4 310; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 311; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 1 312; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 313; GFX11-SDAG-NEXT: v_add3_u32 v0, 4, s0, v0 314; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc 315; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 316; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc 317; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 318; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 319; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 320; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 321; GFX11-SDAG-NEXT: s_endpgm 322; 323; GFX11-GISEL-LABEL: soff2_voff1: 324; GFX11-GISEL: ; %bb.0: ; %bb 325; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 326; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4 327; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 328; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1 329; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 330; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 331; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0 332; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 333; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 334; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 335; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 336; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 337; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 338; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 339; GFX11-GISEL-NEXT: s_endpgm 340bb: 341 %soff2 = mul i32 %soff, 2 342 %a = alloca i8, i32 64, align 4, addrspace(5) 343 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2 344 %voff = call i32 @llvm.amdgcn.workitem.id.x() 345 %voff1 = mul i32 %voff, 1 346 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1 347 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 348 store volatile i8 1, i8 addrspace(5)* %p1 349 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 350 store volatile i8 2, i8 addrspace(5)* %p2 351 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 352 store volatile i8 4, i8 addrspace(5)* %p4 353 ret void 354} 355 356define amdgpu_kernel void @soff2_voff2(i32 %soff) { 357; GFX940-SDAG-LABEL: soff2_voff2: 358; GFX940-SDAG: ; %bb.0: ; %bb 359; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 360; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 361; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 362; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 363; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 364; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 1 365; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 366; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 367; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 368; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 369; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 370; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 371; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 372; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 373; GFX940-SDAG-NEXT: s_endpgm 374; 375; GFX940-GISEL-LABEL: soff2_voff2: 376; GFX940-GISEL: ; %bb.0: ; %bb 377; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 378; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 379; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 380; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 381; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 382; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1 383; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 384; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 385; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 386; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 387; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 388; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 389; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 390; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 391; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 392; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 393; GFX940-GISEL-NEXT: s_endpgm 394; 395; GFX11-SDAG-LABEL: soff2_voff2: 396; GFX11-SDAG: ; %bb.0: ; %bb 397; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 398; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_lshlrev_b32 v0, 1, v0 399; GFX11-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 400; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 401; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 1 402; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) 403; GFX11-SDAG-NEXT: v_add3_u32 v0, 4, s0, v0 404; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, off offset:1 dlc 405; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 406; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, off offset:2 dlc 407; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 408; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 409; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 410; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 411; GFX11-SDAG-NEXT: s_endpgm 412; 413; GFX11-GISEL-LABEL: soff2_voff2: 414; GFX11-GISEL: ; %bb.0: ; %bb 415; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 416; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 417; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4 418; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 419; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1 420; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 421; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 422; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0 423; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 424; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 425; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 426; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 427; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 428; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 429; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 430; GFX11-GISEL-NEXT: s_endpgm 431bb: 432 %soff2 = mul i32 %soff, 2 433 %a = alloca i8, i32 64, align 4, addrspace(5) 434 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2 435 %voff = call i32 @llvm.amdgcn.workitem.id.x() 436 %voff2 = mul i32 %voff, 2 437 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2 438 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 439 store volatile i8 1, i8 addrspace(5)* %p1 440 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 441 store volatile i8 2, i8 addrspace(5)* %p2 442 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 443 store volatile i8 4, i8 addrspace(5)* %p4 444 ret void 445} 446 447define amdgpu_kernel void @soff2_voff4(i32 %soff) { 448; GFX940-SDAG-LABEL: soff2_voff4: 449; GFX940-SDAG: ; %bb.0: ; %bb 450; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 451; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 452; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 453; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 454; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 455; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 1 456; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 457; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 458; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 459; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 460; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 461; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 462; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 463; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 464; GFX940-SDAG-NEXT: s_endpgm 465; 466; GFX940-GISEL-LABEL: soff2_voff4: 467; GFX940-GISEL: ; %bb.0: ; %bb 468; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 469; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 470; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 471; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 472; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 473; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1 474; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 475; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 476; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 477; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 478; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 479; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 480; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 481; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 482; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 483; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 484; GFX940-GISEL-NEXT: s_endpgm 485; 486; GFX11-SDAG-LABEL: soff2_voff4: 487; GFX11-SDAG: ; %bb.0: ; %bb 488; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 489; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2 490; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 2, v0 491; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 492; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 1 493; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 494; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 4 495; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 dlc 496; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 497; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 dlc 498; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 499; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, s0 offset:4 dlc 500; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 501; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 502; GFX11-SDAG-NEXT: s_endpgm 503; 504; GFX11-GISEL-LABEL: soff2_voff4: 505; GFX11-GISEL: ; %bb.0: ; %bb 506; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 507; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 508; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4 509; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 510; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1 511; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 512; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 513; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0 514; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 515; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 516; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 517; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 518; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 519; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 520; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 521; GFX11-GISEL-NEXT: s_endpgm 522bb: 523 %soff2 = mul i32 %soff, 2 524 %a = alloca i8, i32 64, align 4, addrspace(5) 525 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2 526 %voff = call i32 @llvm.amdgcn.workitem.id.x() 527 %voff4 = mul i32 %voff, 4 528 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4 529 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 530 store volatile i8 1, i8 addrspace(5)* %p1 531 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 532 store volatile i8 2, i8 addrspace(5)* %p2 533 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 534 store volatile i8 4, i8 addrspace(5)* %p4 535 ret void 536} 537 538define amdgpu_kernel void @soff4_voff1(i32 %soff) { 539; GFX940-SDAG-LABEL: soff4_voff1: 540; GFX940-SDAG: ; %bb.0: ; %bb 541; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 542; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 543; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 544; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 545; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 2 546; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 547; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 548; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 549; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 550; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 551; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 552; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 553; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 554; GFX940-SDAG-NEXT: s_endpgm 555; 556; GFX940-GISEL-LABEL: soff4_voff1: 557; GFX940-GISEL: ; %bb.0: ; %bb 558; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 559; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 560; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 561; GFX940-GISEL-NEXT: v_mov_b32_e32 v3, 2 562; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 563; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2 564; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 565; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 566; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 567; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 568; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 569; GFX940-GISEL-NEXT: scratch_store_byte v0, v3, off offset:2 sc0 sc1 570; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 571; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 572; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 573; GFX940-GISEL-NEXT: s_endpgm 574; 575; GFX11-SDAG-LABEL: soff4_voff1: 576; GFX11-SDAG: ; %bb.0: ; %bb 577; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 578; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v4, 4 579; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 2 580; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 581; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 2 582; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 583; GFX11-SDAG-NEXT: v_add3_u32 v2, 4, s0, v0 584; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 4 585; GFX11-SDAG-NEXT: scratch_store_b8 v2, v1, off offset:1 dlc 586; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 587; GFX11-SDAG-NEXT: scratch_store_b8 v2, v3, off offset:2 dlc 588; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 589; GFX11-SDAG-NEXT: scratch_store_b8 v0, v4, s0 offset:4 dlc 590; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 591; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 592; GFX11-SDAG-NEXT: s_endpgm 593; 594; GFX11-GISEL-LABEL: soff4_voff1: 595; GFX11-GISEL: ; %bb.0: ; %bb 596; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 597; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4 598; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 599; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 600; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 601; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 602; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0 603; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 604; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 605; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 606; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 607; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 608; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 609; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 610; GFX11-GISEL-NEXT: s_endpgm 611bb: 612 %soff4 = mul i32 %soff, 4 613 %a = alloca i8, i32 64, align 4, addrspace(5) 614 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4 615 %voff = call i32 @llvm.amdgcn.workitem.id.x() 616 %voff1 = mul i32 %voff, 1 617 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1 618 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 619 store volatile i8 1, i8 addrspace(5)* %p1 620 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 621 store volatile i8 2, i8 addrspace(5)* %p2 622 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 623 store volatile i8 4, i8 addrspace(5)* %p4 624 ret void 625} 626 627define amdgpu_kernel void @soff4_voff2(i32 %soff) { 628; GFX940-SDAG-LABEL: soff4_voff2: 629; GFX940-SDAG: ; %bb.0: ; %bb 630; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 631; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 632; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 633; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 634; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 635; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 2 636; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 637; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 638; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 639; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 640; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 641; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 642; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 643; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 644; GFX940-SDAG-NEXT: s_endpgm 645; 646; GFX940-GISEL-LABEL: soff4_voff2: 647; GFX940-GISEL: ; %bb.0: ; %bb 648; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 649; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 650; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 651; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 652; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 653; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2 654; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 655; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 656; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 657; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 658; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 659; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 660; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 661; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 662; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 663; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 664; GFX940-GISEL-NEXT: s_endpgm 665; 666; GFX11-SDAG-LABEL: soff4_voff2: 667; GFX11-SDAG: ; %bb.0: ; %bb 668; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 669; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_lshlrev_b32 v0, 1, v0 670; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 2 671; GFX11-SDAG-NEXT: v_mov_b32_e32 v4, 4 672; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 673; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 2 674; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 675; GFX11-SDAG-NEXT: v_add3_u32 v3, 4, s0, v0 676; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 4 677; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 dlc 678; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 679; GFX11-SDAG-NEXT: scratch_store_b8 v3, v2, off offset:2 dlc 680; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 681; GFX11-SDAG-NEXT: scratch_store_b8 v0, v4, s0 offset:4 dlc 682; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 683; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 684; GFX11-SDAG-NEXT: s_endpgm 685; 686; GFX11-GISEL-LABEL: soff4_voff2: 687; GFX11-GISEL: ; %bb.0: ; %bb 688; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 689; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 690; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4 691; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 692; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 693; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 694; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 695; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0 696; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 697; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 698; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 699; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 700; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 701; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 702; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 703; GFX11-GISEL-NEXT: s_endpgm 704bb: 705 %soff4 = mul i32 %soff, 4 706 %a = alloca i8, i32 64, align 4, addrspace(5) 707 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4 708 %voff = call i32 @llvm.amdgcn.workitem.id.x() 709 %voff2 = mul i32 %voff, 2 710 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2 711 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 712 store volatile i8 1, i8 addrspace(5)* %p1 713 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 714 store volatile i8 2, i8 addrspace(5)* %p2 715 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 716 store volatile i8 4, i8 addrspace(5)* %p4 717 ret void 718} 719 720define amdgpu_kernel void @soff4_voff4(i32 %soff) { 721; GFX940-SDAG-LABEL: soff4_voff4: 722; GFX940-SDAG: ; %bb.0: ; %bb 723; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 724; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 725; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 726; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 727; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 728; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 2 729; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 730; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 731; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 732; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 733; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 734; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 735; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 736; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 737; GFX940-SDAG-NEXT: s_endpgm 738; 739; GFX940-GISEL-LABEL: soff4_voff4: 740; GFX940-GISEL: ; %bb.0: ; %bb 741; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 742; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 743; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 744; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 745; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 746; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2 747; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 748; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 749; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 750; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 751; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 752; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 753; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 754; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 755; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 756; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 757; GFX940-GISEL-NEXT: s_endpgm 758; 759; GFX11-SDAG-LABEL: soff4_voff4: 760; GFX11-SDAG: ; %bb.0: ; %bb 761; GFX11-SDAG-NEXT: s_load_b32 s0, s[0:1], 0x24 762; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2 763; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 2, v0 764; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 765; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 2 766; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 767; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 4 768; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 dlc 769; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 770; GFX11-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 dlc 771; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 772; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, s0 offset:4 dlc 773; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 774; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 775; GFX11-SDAG-NEXT: s_endpgm 776; 777; GFX11-GISEL-LABEL: soff4_voff4: 778; GFX11-GISEL: ; %bb.0: ; %bb 779; GFX11-GISEL-NEXT: s_load_b32 s0, s[0:1], 0x24 780; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 781; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4 782; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 783; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 784; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 785; GFX11-GISEL-NEXT: v_add_nc_u32_e64 v1, s0, 4 786; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0 787; GFX11-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:1 dlc 788; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 789; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:2 dlc 790; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 791; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 dlc 792; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 793; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 794; GFX11-GISEL-NEXT: s_endpgm 795bb: 796 %soff4 = mul i32 %soff, 4 797 %a = alloca i8, i32 64, align 4, addrspace(5) 798 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4 799 %voff = call i32 @llvm.amdgcn.workitem.id.x() 800 %voff4 = mul i32 %voff, 4 801 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4 802 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 803 store volatile i8 1, i8 addrspace(5)* %p1 804 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 805 store volatile i8 2, i8 addrspace(5)* %p2 806 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 807 store volatile i8 4, i8 addrspace(5)* %p4 808 ret void 809} 810