1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck --check-prefix=GFX803 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX900 %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX1010 %s 5 6define amdgpu_kernel void @test_kern_empty() local_unnamed_addr #0 { 7; GFX803-LABEL: test_kern_empty: 8; GFX803: ; %bb.0: ; %entry 9; GFX803-NEXT: s_endpgm 10; 11; GFX900-LABEL: test_kern_empty: 12; GFX900: ; %bb.0: ; %entry 13; GFX900-NEXT: s_endpgm 14; 15; GFX1010-LABEL: test_kern_empty: 16; GFX1010: ; %bb.0: ; %entry 17; GFX1010-NEXT: s_endpgm 18entry: 19 ret void 20} 21 22define amdgpu_kernel void @test_kern_stack() local_unnamed_addr #0 { 23; GFX803-LABEL: test_kern_stack: 24; GFX803: ; %bb.0: ; %entry 25; GFX803-NEXT: s_add_u32 s4, s4, s7 26; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 27; GFX803-NEXT: s_add_u32 s0, s0, s7 28; GFX803-NEXT: s_addc_u32 s1, s1, 0 29; GFX803-NEXT: v_mov_b32_e32 v0, 0 30; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s5 31; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 32; GFX803-NEXT: s_endpgm 33; 34; GFX900-LABEL: test_kern_stack: 35; GFX900: ; %bb.0: ; %entry 36; GFX900-NEXT: s_add_u32 flat_scratch_lo, s4, s7 37; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 38; GFX900-NEXT: s_add_u32 s0, s0, s7 39; GFX900-NEXT: s_addc_u32 s1, s1, 0 40; GFX900-NEXT: v_mov_b32_e32 v0, 0 41; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 42; GFX900-NEXT: s_endpgm 43; 44; GFX1010-LABEL: test_kern_stack: 45; GFX1010: ; %bb.0: ; %entry 46; GFX1010-NEXT: s_add_u32 s4, s4, s7 47; GFX1010-NEXT: s_addc_u32 s5, s5, 0 48; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 49; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 50; GFX1010-NEXT: s_add_u32 s0, s0, s7 51; GFX1010-NEXT: s_addc_u32 s1, s1, 0 52; GFX1010-NEXT: v_mov_b32_e32 v0, 0 53; GFX1010-NEXT: ; implicit-def: $vcc_hi 54; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 55; GFX1010-NEXT: s_endpgm 56entry: 57 %x = alloca i32, align 4, addrspace(5) 58 store volatile i32 0, i32 addrspace(5)* %x, align 4 59 ret void 60} 61 62define amdgpu_kernel void @test_kern_call() local_unnamed_addr #0 { 63; GFX803-LABEL: test_kern_call: 64; GFX803: ; %bb.0: ; %entry 65; GFX803-NEXT: s_add_u32 s4, s4, s7 66; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 67; GFX803-NEXT: s_add_u32 s0, s0, s7 68; GFX803-NEXT: s_addc_u32 s1, s1, 0 69; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s5 70; GFX803-NEXT: s_getpc_b64 s[4:5] 71; GFX803-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 72; GFX803-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+4 73; GFX803-NEXT: s_mov_b32 s32, 0 74; GFX803-NEXT: s_swappc_b64 s[30:31], s[4:5] 75; GFX803-NEXT: s_endpgm 76; 77; GFX900-LABEL: test_kern_call: 78; GFX900: ; %bb.0: ; %entry 79; GFX900-NEXT: s_add_u32 flat_scratch_lo, s4, s7 80; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 81; GFX900-NEXT: s_add_u32 s0, s0, s7 82; GFX900-NEXT: s_addc_u32 s1, s1, 0 83; GFX900-NEXT: s_getpc_b64 s[4:5] 84; GFX900-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 85; GFX900-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+4 86; GFX900-NEXT: s_mov_b32 s32, 0 87; GFX900-NEXT: s_swappc_b64 s[30:31], s[4:5] 88; GFX900-NEXT: s_endpgm 89; 90; GFX1010-LABEL: test_kern_call: 91; GFX1010: ; %bb.0: ; %entry 92; GFX1010-NEXT: s_add_u32 s4, s4, s7 93; GFX1010-NEXT: s_mov_b32 s32, 0 94; GFX1010-NEXT: s_addc_u32 s5, s5, 0 95; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 96; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 97; GFX1010-NEXT: s_add_u32 s0, s0, s7 98; GFX1010-NEXT: s_addc_u32 s1, s1, 0 99; GFX1010-NEXT: s_getpc_b64 s[4:5] 100; GFX1010-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 101; GFX1010-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+4 102; GFX1010-NEXT: ; implicit-def: $vcc_hi 103; GFX1010-NEXT: s_swappc_b64 s[30:31], s[4:5] 104; GFX1010-NEXT: s_endpgm 105entry: 106 tail call void @ex() #0 107 ret void 108} 109 110define amdgpu_kernel void @test_kern_stack_and_call() local_unnamed_addr #0 { 111; GFX803-LABEL: test_kern_stack_and_call: 112; GFX803: ; %bb.0: ; %entry 113; GFX803-NEXT: s_add_u32 s4, s4, s7 114; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 115; GFX803-NEXT: s_add_u32 s0, s0, s7 116; GFX803-NEXT: s_addc_u32 s1, s1, 0 117; GFX803-NEXT: v_mov_b32_e32 v0, 0 118; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s5 119; GFX803-NEXT: s_getpc_b64 s[4:5] 120; GFX803-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 121; GFX803-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+4 122; GFX803-NEXT: s_movk_i32 s32, 0x400 123; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 124; GFX803-NEXT: s_swappc_b64 s[30:31], s[4:5] 125; GFX803-NEXT: s_endpgm 126; 127; GFX900-LABEL: test_kern_stack_and_call: 128; GFX900: ; %bb.0: ; %entry 129; GFX900-NEXT: s_add_u32 flat_scratch_lo, s4, s7 130; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 131; GFX900-NEXT: s_add_u32 s0, s0, s7 132; GFX900-NEXT: s_addc_u32 s1, s1, 0 133; GFX900-NEXT: v_mov_b32_e32 v0, 0 134; GFX900-NEXT: s_getpc_b64 s[4:5] 135; GFX900-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 136; GFX900-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+4 137; GFX900-NEXT: s_movk_i32 s32, 0x400 138; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 139; GFX900-NEXT: s_swappc_b64 s[30:31], s[4:5] 140; GFX900-NEXT: s_endpgm 141; 142; GFX1010-LABEL: test_kern_stack_and_call: 143; GFX1010: ; %bb.0: ; %entry 144; GFX1010-NEXT: s_add_u32 s4, s4, s7 145; GFX1010-NEXT: s_movk_i32 s32, 0x200 146; GFX1010-NEXT: s_addc_u32 s5, s5, 0 147; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 148; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 149; GFX1010-NEXT: s_add_u32 s0, s0, s7 150; GFX1010-NEXT: s_addc_u32 s1, s1, 0 151; GFX1010-NEXT: v_mov_b32_e32 v0, 0 152; GFX1010-NEXT: s_getpc_b64 s[4:5] 153; GFX1010-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 154; GFX1010-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+4 155; GFX1010-NEXT: ; implicit-def: $vcc_hi 156; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 157; GFX1010-NEXT: s_swappc_b64 s[30:31], s[4:5] 158; GFX1010-NEXT: s_endpgm 159entry: 160 %x = alloca i32, align 4, addrspace(5) 161 store volatile i32 0, i32 addrspace(5)* %x, align 4 162 tail call void @ex() #0 163 ret void 164} 165 166define amdgpu_kernel void @test_force_fp_kern_empty() local_unnamed_addr #2 { 167; GFX803-LABEL: test_force_fp_kern_empty: 168; GFX803: ; %bb.0: ; %entry 169; GFX803-NEXT: s_mov_b32 s33, 0 170; GFX803-NEXT: s_endpgm 171; 172; GFX900-LABEL: test_force_fp_kern_empty: 173; GFX900: ; %bb.0: ; %entry 174; GFX900-NEXT: s_mov_b32 s33, 0 175; GFX900-NEXT: s_endpgm 176; 177; GFX1010-LABEL: test_force_fp_kern_empty: 178; GFX1010: ; %bb.0: ; %entry 179; GFX1010-NEXT: s_mov_b32 s33, 0 180; GFX1010-NEXT: s_endpgm 181entry: 182 ret void 183} 184 185define amdgpu_kernel void @test_force_fp_kern_stack() local_unnamed_addr #2 { 186; GFX803-LABEL: test_force_fp_kern_stack: 187; GFX803: ; %bb.0: ; %entry 188; GFX803-NEXT: s_add_u32 s4, s4, s7 189; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 190; GFX803-NEXT: s_add_u32 s0, s0, s7 191; GFX803-NEXT: s_mov_b32 s33, 0 192; GFX803-NEXT: s_addc_u32 s1, s1, 0 193; GFX803-NEXT: v_mov_b32_e32 v0, 0 194; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s5 195; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 196; GFX803-NEXT: s_endpgm 197; 198; GFX900-LABEL: test_force_fp_kern_stack: 199; GFX900: ; %bb.0: ; %entry 200; GFX900-NEXT: s_add_u32 flat_scratch_lo, s4, s7 201; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 202; GFX900-NEXT: s_add_u32 s0, s0, s7 203; GFX900-NEXT: s_mov_b32 s33, 0 204; GFX900-NEXT: s_addc_u32 s1, s1, 0 205; GFX900-NEXT: v_mov_b32_e32 v0, 0 206; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 207; GFX900-NEXT: s_endpgm 208; 209; GFX1010-LABEL: test_force_fp_kern_stack: 210; GFX1010: ; %bb.0: ; %entry 211; GFX1010-NEXT: s_add_u32 s4, s4, s7 212; GFX1010-NEXT: s_mov_b32 s33, 0 213; GFX1010-NEXT: s_addc_u32 s5, s5, 0 214; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 215; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 216; GFX1010-NEXT: s_add_u32 s0, s0, s7 217; GFX1010-NEXT: s_addc_u32 s1, s1, 0 218; GFX1010-NEXT: v_mov_b32_e32 v0, 0 219; GFX1010-NEXT: ; implicit-def: $vcc_hi 220; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 221; GFX1010-NEXT: s_endpgm 222entry: 223 %x = alloca i32, align 4, addrspace(5) 224 store volatile i32 0, i32 addrspace(5)* %x, align 4 225 ret void 226} 227 228define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 { 229; GFX803-LABEL: test_force_fp_kern_call: 230; GFX803: ; %bb.0: ; %entry 231; GFX803-NEXT: s_add_u32 s4, s4, s7 232; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 233; GFX803-NEXT: s_add_u32 s0, s0, s7 234; GFX803-NEXT: s_addc_u32 s1, s1, 0 235; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s5 236; GFX803-NEXT: s_getpc_b64 s[4:5] 237; GFX803-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 238; GFX803-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+4 239; GFX803-NEXT: s_mov_b32 s32, 0 240; GFX803-NEXT: s_mov_b32 s33, 0 241; GFX803-NEXT: s_swappc_b64 s[30:31], s[4:5] 242; GFX803-NEXT: s_endpgm 243; 244; GFX900-LABEL: test_force_fp_kern_call: 245; GFX900: ; %bb.0: ; %entry 246; GFX900-NEXT: s_add_u32 flat_scratch_lo, s4, s7 247; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 248; GFX900-NEXT: s_add_u32 s0, s0, s7 249; GFX900-NEXT: s_addc_u32 s1, s1, 0 250; GFX900-NEXT: s_getpc_b64 s[4:5] 251; GFX900-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 252; GFX900-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+4 253; GFX900-NEXT: s_mov_b32 s32, 0 254; GFX900-NEXT: s_mov_b32 s33, 0 255; GFX900-NEXT: s_swappc_b64 s[30:31], s[4:5] 256; GFX900-NEXT: s_endpgm 257; 258; GFX1010-LABEL: test_force_fp_kern_call: 259; GFX1010: ; %bb.0: ; %entry 260; GFX1010-NEXT: s_add_u32 s4, s4, s7 261; GFX1010-NEXT: s_mov_b32 s32, 0 262; GFX1010-NEXT: s_mov_b32 s33, 0 263; GFX1010-NEXT: s_addc_u32 s5, s5, 0 264; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 265; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 266; GFX1010-NEXT: s_add_u32 s0, s0, s7 267; GFX1010-NEXT: s_addc_u32 s1, s1, 0 268; GFX1010-NEXT: s_getpc_b64 s[4:5] 269; GFX1010-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 270; GFX1010-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+4 271; GFX1010-NEXT: ; implicit-def: $vcc_hi 272; GFX1010-NEXT: s_swappc_b64 s[30:31], s[4:5] 273; GFX1010-NEXT: s_endpgm 274entry: 275 tail call void @ex() #2 276 ret void 277} 278 279define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_addr #2 { 280; GFX803-LABEL: test_force_fp_kern_stack_and_call: 281; GFX803: ; %bb.0: ; %entry 282; GFX803-NEXT: s_add_u32 s4, s4, s7 283; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 284; GFX803-NEXT: s_add_u32 s0, s0, s7 285; GFX803-NEXT: s_mov_b32 s33, 0 286; GFX803-NEXT: s_addc_u32 s1, s1, 0 287; GFX803-NEXT: v_mov_b32_e32 v0, 0 288; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s5 289; GFX803-NEXT: s_getpc_b64 s[4:5] 290; GFX803-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 291; GFX803-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+4 292; GFX803-NEXT: s_movk_i32 s32, 0x400 293; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 294; GFX803-NEXT: s_swappc_b64 s[30:31], s[4:5] 295; GFX803-NEXT: s_endpgm 296; 297; GFX900-LABEL: test_force_fp_kern_stack_and_call: 298; GFX900: ; %bb.0: ; %entry 299; GFX900-NEXT: s_add_u32 flat_scratch_lo, s4, s7 300; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 301; GFX900-NEXT: s_add_u32 s0, s0, s7 302; GFX900-NEXT: s_addc_u32 s1, s1, 0 303; GFX900-NEXT: s_mov_b32 s33, 0 304; GFX900-NEXT: v_mov_b32_e32 v0, 0 305; GFX900-NEXT: s_getpc_b64 s[4:5] 306; GFX900-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 307; GFX900-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+4 308; GFX900-NEXT: s_movk_i32 s32, 0x400 309; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 310; GFX900-NEXT: s_swappc_b64 s[30:31], s[4:5] 311; GFX900-NEXT: s_endpgm 312; 313; GFX1010-LABEL: test_force_fp_kern_stack_and_call: 314; GFX1010: ; %bb.0: ; %entry 315; GFX1010-NEXT: s_add_u32 s4, s4, s7 316; GFX1010-NEXT: s_movk_i32 s32, 0x200 317; GFX1010-NEXT: s_mov_b32 s33, 0 318; GFX1010-NEXT: s_addc_u32 s5, s5, 0 319; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 320; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 321; GFX1010-NEXT: s_add_u32 s0, s0, s7 322; GFX1010-NEXT: s_addc_u32 s1, s1, 0 323; GFX1010-NEXT: v_mov_b32_e32 v0, 0 324; GFX1010-NEXT: s_getpc_b64 s[4:5] 325; GFX1010-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 326; GFX1010-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+4 327; GFX1010-NEXT: ; implicit-def: $vcc_hi 328; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 329; GFX1010-NEXT: s_swappc_b64 s[30:31], s[4:5] 330; GFX1010-NEXT: s_endpgm 331entry: 332 %x = alloca i32, align 4, addrspace(5) 333 store volatile i32 0, i32 addrspace(5)* %x, align 4 334 tail call void @ex() #2 335 ret void 336} 337 338define amdgpu_kernel void @test_sgpr_offset_kernel() #1 { 339; GFX803-LABEL: test_sgpr_offset_kernel: 340; GFX803: ; %bb.0: ; %entry 341; GFX803-NEXT: s_add_u32 s4, s4, s7 342; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 343; GFX803-NEXT: s_add_u32 s0, s0, s7 344; GFX803-NEXT: s_addc_u32 s1, s1, 0 345; GFX803-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8 346; GFX803-NEXT: s_mov_b32 s4, 0x40000 347; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s5 348; GFX803-NEXT: s_waitcnt vmcnt(0) 349; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill 350; GFX803-NEXT: ;;#ASMSTART 351; GFX803-NEXT: ;;#ASMEND 352; GFX803-NEXT: s_mov_b32 s4, 0x40000 353; GFX803-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload 354; GFX803-NEXT: s_waitcnt vmcnt(0) 355; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 356; GFX803-NEXT: s_endpgm 357; 358; GFX900-LABEL: test_sgpr_offset_kernel: 359; GFX900: ; %bb.0: ; %entry 360; GFX900-NEXT: s_add_u32 flat_scratch_lo, s4, s7 361; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 362; GFX900-NEXT: s_add_u32 s0, s0, s7 363; GFX900-NEXT: s_addc_u32 s1, s1, 0 364; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8 365; GFX900-NEXT: s_mov_b32 s6, 0x40000 366; GFX900-NEXT: s_waitcnt vmcnt(0) 367; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill 368; GFX900-NEXT: ;;#ASMSTART 369; GFX900-NEXT: ;;#ASMEND 370; GFX900-NEXT: s_mov_b32 s6, 0x40000 371; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload 372; GFX900-NEXT: s_waitcnt vmcnt(0) 373; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 374; GFX900-NEXT: s_endpgm 375; 376; GFX1010-LABEL: test_sgpr_offset_kernel: 377; GFX1010: ; %bb.0: ; %entry 378; GFX1010-NEXT: s_add_u32 s4, s4, s7 379; GFX1010-NEXT: s_addc_u32 s5, s5, 0 380; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 381; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 382; GFX1010-NEXT: s_add_u32 s0, s0, s7 383; GFX1010-NEXT: s_addc_u32 s1, s1, 0 384; GFX1010-NEXT: s_mov_b32 s6, 0x20000 385; GFX1010-NEXT: ; implicit-def: $vcc_hi 386; GFX1010-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8 387; GFX1010-NEXT: s_waitcnt vmcnt(0) 388; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill 389; GFX1010-NEXT: v_nop 390; GFX1010-NEXT: s_mov_b32 s6, 0x20000 391; GFX1010-NEXT: ;;#ASMSTART 392; GFX1010-NEXT: ;;#ASMEND 393; GFX1010-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload 394; GFX1010-NEXT: s_waitcnt vmcnt(0) 395; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 396; GFX1010-NEXT: s_endpgm 397entry: 398 ; Occupy 4096 bytes of scratch, so the offset of the spill of %a does not 399 ; fit in the instruction, and has to live in the SGPR offset. 400 %alloca = alloca i8, i32 4092, align 4, addrspace(5) 401 %buf = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)* 402 403 %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1 404 ; 0x40000 / 64 = 4096 (for wave64) 405 ; CHECK: s_add_u32 s6, s7, 0x40000 406 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 ; 4-byte Folded Spill 407 %a = load volatile i32, i32 addrspace(5)* %aptr 408 409 ; Force %a to spill 410 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () 411 412 %outptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1 413 store volatile i32 %a, i32 addrspace(5)* %outptr 414 415 ret void 416} 417 418declare hidden void @ex() local_unnamed_addr #0 419 420attributes #0 = { nounwind } 421attributes #1 = { nounwind "amdgpu-num-vgpr"="8" } 422attributes #2 = { nounwind "frame-pointer"="all" } 423