1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-SDAG 3; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-GISEL 4 5; Test flat scratch SVS addressing mode with various combinations of alignment 6; of soffset, voffset and inst_offset. 7 8declare i32 @llvm.amdgcn.workitem.id.x() 9 10define amdgpu_kernel void @soff1_voff1(i32 %soff) { 11; GFX940-SDAG-LABEL: soff1_voff1: 12; GFX940-SDAG: ; %bb.0: ; %bb 13; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 14; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 15; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 16; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 17; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 18; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 19; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 20; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 21; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 22; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 23; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 24; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 25; GFX940-SDAG-NEXT: s_endpgm 26; 27; GFX940-GISEL-LABEL: soff1_voff1: 28; GFX940-GISEL: ; %bb.0: ; %bb 29; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 30; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 31; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 32; GFX940-GISEL-NEXT: v_mov_b32_e32 v3, 2 33; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 34; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 35; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 36; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 37; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 38; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 39; GFX940-GISEL-NEXT: scratch_store_byte v0, v3, off offset:2 sc0 sc1 40; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 41; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 42; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 43; GFX940-GISEL-NEXT: s_endpgm 44bb: 45 %soff1 = mul i32 %soff, 1 46 %a = alloca i8, i32 64, align 4, addrspace(5) 47 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1 48 %voff = call i32 @llvm.amdgcn.workitem.id.x() 49 %voff1 = mul i32 %voff, 1 50 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1 51 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 52 store volatile i8 1, i8 addrspace(5)* %p1 53 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 54 store volatile i8 2, i8 addrspace(5)* %p2 55 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 56 store volatile i8 4, i8 addrspace(5)* %p4 57 ret void 58} 59 60define amdgpu_kernel void @soff1_voff2(i32 %soff) { 61; GFX940-SDAG-LABEL: soff1_voff2: 62; GFX940-SDAG: ; %bb.0: ; %bb 63; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 64; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 65; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 66; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 67; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 68; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 69; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 70; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 71; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 72; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 73; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 74; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 75; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 76; GFX940-SDAG-NEXT: s_endpgm 77; 78; GFX940-GISEL-LABEL: soff1_voff2: 79; GFX940-GISEL: ; %bb.0: ; %bb 80; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 81; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 82; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 83; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 84; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 85; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 86; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 87; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 88; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 89; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 90; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 91; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 92; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 93; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 94; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 95; GFX940-GISEL-NEXT: s_endpgm 96bb: 97 %soff1 = mul i32 %soff, 1 98 %a = alloca i8, i32 64, align 4, addrspace(5) 99 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1 100 %voff = call i32 @llvm.amdgcn.workitem.id.x() 101 %voff2 = mul i32 %voff, 2 102 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2 103 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 104 store volatile i8 1, i8 addrspace(5)* %p1 105 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 106 store volatile i8 2, i8 addrspace(5)* %p2 107 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 108 store volatile i8 4, i8 addrspace(5)* %p4 109 ret void 110} 111 112define amdgpu_kernel void @soff1_voff4(i32 %soff) { 113; GFX940-SDAG-LABEL: soff1_voff4: 114; GFX940-SDAG: ; %bb.0: ; %bb 115; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 116; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 117; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 118; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 119; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 120; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 121; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 122; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 123; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 124; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 125; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 126; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 127; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 128; GFX940-SDAG-NEXT: s_endpgm 129; 130; GFX940-GISEL-LABEL: soff1_voff4: 131; GFX940-GISEL: ; %bb.0: ; %bb 132; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 133; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 134; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 135; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 136; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 137; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 138; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 139; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 140; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 141; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 142; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 143; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 144; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 145; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 146; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 147; GFX940-GISEL-NEXT: s_endpgm 148bb: 149 %soff1 = mul i32 %soff, 1 150 %a = alloca i8, i32 64, align 4, addrspace(5) 151 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1 152 %voff = call i32 @llvm.amdgcn.workitem.id.x() 153 %voff4 = mul i32 %voff, 4 154 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4 155 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 156 store volatile i8 1, i8 addrspace(5)* %p1 157 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 158 store volatile i8 2, i8 addrspace(5)* %p2 159 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 160 store volatile i8 4, i8 addrspace(5)* %p4 161 ret void 162} 163 164define amdgpu_kernel void @soff2_voff1(i32 %soff) { 165; GFX940-SDAG-LABEL: soff2_voff1: 166; GFX940-SDAG: ; %bb.0: ; %bb 167; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 168; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 169; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 170; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 171; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 1 172; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 173; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 174; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 175; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 176; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 177; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 178; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 179; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 180; GFX940-SDAG-NEXT: s_endpgm 181; 182; GFX940-GISEL-LABEL: soff2_voff1: 183; GFX940-GISEL: ; %bb.0: ; %bb 184; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 185; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 186; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 187; GFX940-GISEL-NEXT: v_mov_b32_e32 v3, 2 188; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 189; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1 190; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 191; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 192; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 193; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 194; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 195; GFX940-GISEL-NEXT: scratch_store_byte v0, v3, off offset:2 sc0 sc1 196; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 197; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 198; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 199; GFX940-GISEL-NEXT: s_endpgm 200bb: 201 %soff2 = mul i32 %soff, 2 202 %a = alloca i8, i32 64, align 4, addrspace(5) 203 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2 204 %voff = call i32 @llvm.amdgcn.workitem.id.x() 205 %voff1 = mul i32 %voff, 1 206 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1 207 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 208 store volatile i8 1, i8 addrspace(5)* %p1 209 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 210 store volatile i8 2, i8 addrspace(5)* %p2 211 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 212 store volatile i8 4, i8 addrspace(5)* %p4 213 ret void 214} 215 216define amdgpu_kernel void @soff2_voff2(i32 %soff) { 217; GFX940-SDAG-LABEL: soff2_voff2: 218; GFX940-SDAG: ; %bb.0: ; %bb 219; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 220; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 221; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 222; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 223; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 224; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 1 225; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 226; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 227; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 228; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 229; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 230; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 231; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 232; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 233; GFX940-SDAG-NEXT: s_endpgm 234; 235; GFX940-GISEL-LABEL: soff2_voff2: 236; GFX940-GISEL: ; %bb.0: ; %bb 237; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 238; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 239; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 240; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 241; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 242; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1 243; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 244; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 245; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 246; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 247; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 248; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 249; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 250; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 251; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 252; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 253; GFX940-GISEL-NEXT: s_endpgm 254bb: 255 %soff2 = mul i32 %soff, 2 256 %a = alloca i8, i32 64, align 4, addrspace(5) 257 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2 258 %voff = call i32 @llvm.amdgcn.workitem.id.x() 259 %voff2 = mul i32 %voff, 2 260 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2 261 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 262 store volatile i8 1, i8 addrspace(5)* %p1 263 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 264 store volatile i8 2, i8 addrspace(5)* %p2 265 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 266 store volatile i8 4, i8 addrspace(5)* %p4 267 ret void 268} 269 270define amdgpu_kernel void @soff2_voff4(i32 %soff) { 271; GFX940-SDAG-LABEL: soff2_voff4: 272; GFX940-SDAG: ; %bb.0: ; %bb 273; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 274; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 275; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 276; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 277; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 278; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 1 279; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 280; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 281; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 282; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 283; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 284; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 285; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 286; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 287; GFX940-SDAG-NEXT: s_endpgm 288; 289; GFX940-GISEL-LABEL: soff2_voff4: 290; GFX940-GISEL: ; %bb.0: ; %bb 291; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 292; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 293; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 294; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 295; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 296; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1 297; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 298; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 299; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 300; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 301; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 302; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 303; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 304; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 305; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 306; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 307; GFX940-GISEL-NEXT: s_endpgm 308bb: 309 %soff2 = mul i32 %soff, 2 310 %a = alloca i8, i32 64, align 4, addrspace(5) 311 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2 312 %voff = call i32 @llvm.amdgcn.workitem.id.x() 313 %voff4 = mul i32 %voff, 4 314 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4 315 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 316 store volatile i8 1, i8 addrspace(5)* %p1 317 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 318 store volatile i8 2, i8 addrspace(5)* %p2 319 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 320 store volatile i8 4, i8 addrspace(5)* %p4 321 ret void 322} 323 324define amdgpu_kernel void @soff4_voff1(i32 %soff) { 325; GFX940-SDAG-LABEL: soff4_voff1: 326; GFX940-SDAG: ; %bb.0: ; %bb 327; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 328; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 329; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 330; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 331; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 2 332; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 333; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 334; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 335; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 336; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 337; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 338; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 339; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 340; GFX940-SDAG-NEXT: s_endpgm 341; 342; GFX940-GISEL-LABEL: soff4_voff1: 343; GFX940-GISEL: ; %bb.0: ; %bb 344; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 345; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 346; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 347; GFX940-GISEL-NEXT: v_mov_b32_e32 v3, 2 348; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 349; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2 350; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 351; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 352; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 353; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 354; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 355; GFX940-GISEL-NEXT: scratch_store_byte v0, v3, off offset:2 sc0 sc1 356; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 357; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 358; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 359; GFX940-GISEL-NEXT: s_endpgm 360bb: 361 %soff4 = mul i32 %soff, 4 362 %a = alloca i8, i32 64, align 4, addrspace(5) 363 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4 364 %voff = call i32 @llvm.amdgcn.workitem.id.x() 365 %voff1 = mul i32 %voff, 1 366 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1 367 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 368 store volatile i8 1, i8 addrspace(5)* %p1 369 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 370 store volatile i8 2, i8 addrspace(5)* %p2 371 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 372 store volatile i8 4, i8 addrspace(5)* %p4 373 ret void 374} 375 376define amdgpu_kernel void @soff4_voff2(i32 %soff) { 377; GFX940-SDAG-LABEL: soff4_voff2: 378; GFX940-SDAG: ; %bb.0: ; %bb 379; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 380; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 381; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 382; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 383; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 384; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 2 385; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 386; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 387; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 388; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 389; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 390; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 391; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 392; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 393; GFX940-SDAG-NEXT: s_endpgm 394; 395; GFX940-GISEL-LABEL: soff4_voff2: 396; GFX940-GISEL: ; %bb.0: ; %bb 397; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 398; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 399; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 400; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 401; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 402; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2 403; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 404; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 405; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 406; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 407; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 408; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 409; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 410; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 411; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 412; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 413; GFX940-GISEL-NEXT: s_endpgm 414bb: 415 %soff4 = mul i32 %soff, 4 416 %a = alloca i8, i32 64, align 4, addrspace(5) 417 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4 418 %voff = call i32 @llvm.amdgcn.workitem.id.x() 419 %voff2 = mul i32 %voff, 2 420 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2 421 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 422 store volatile i8 1, i8 addrspace(5)* %p1 423 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 424 store volatile i8 2, i8 addrspace(5)* %p2 425 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 426 store volatile i8 4, i8 addrspace(5)* %p4 427 ret void 428} 429 430define amdgpu_kernel void @soff4_voff4(i32 %soff) { 431; GFX940-SDAG-LABEL: soff4_voff4: 432; GFX940-SDAG: ; %bb.0: ; %bb 433; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24 434; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 435; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 436; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 437; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) 438; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 2 439; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4 440; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1 441; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 442; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1 443; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 444; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 445; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1 446; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) 447; GFX940-SDAG-NEXT: s_endpgm 448; 449; GFX940-GISEL-LABEL: soff4_voff4: 450; GFX940-GISEL: ; %bb.0: ; %bb 451; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24 452; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 453; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 454; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1 455; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) 456; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2 457; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1 458; GFX940-GISEL-NEXT: v_add_u32_e32 v0, v1, v0 459; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 460; GFX940-GISEL-NEXT: scratch_store_byte v0, v2, off offset:1 sc0 sc1 461; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 462; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:2 sc0 sc1 463; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 464; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 465; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off offset:4 sc0 sc1 466; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) 467; GFX940-GISEL-NEXT: s_endpgm 468bb: 469 %soff4 = mul i32 %soff, 4 470 %a = alloca i8, i32 64, align 4, addrspace(5) 471 %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4 472 %voff = call i32 @llvm.amdgcn.workitem.id.x() 473 %voff4 = mul i32 %voff, 4 474 %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4 475 %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1 476 store volatile i8 1, i8 addrspace(5)* %p1 477 %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2 478 store volatile i8 2, i8 addrspace(5)* %p2 479 %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4 480 store volatile i8 4, i8 addrspace(5)* %p4 481 ret void 482} 483