1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck --check-prefix=GFX803 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX900 %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX1010 %s 5 6define amdgpu_kernel void @test_kern_empty() local_unnamed_addr #0 { 7; GFX803-LABEL: test_kern_empty: 8; GFX803: ; %bb.0: ; %entry 9; GFX803-NEXT: s_endpgm 10; 11; GFX900-LABEL: test_kern_empty: 12; GFX900: ; %bb.0: ; %entry 13; GFX900-NEXT: s_endpgm 14; 15; GFX1010-LABEL: test_kern_empty: 16; GFX1010: ; %bb.0: ; %entry 17; GFX1010-NEXT: s_endpgm 18entry: 19 ret void 20} 21 22define amdgpu_kernel void @test_kern_stack() local_unnamed_addr #0 { 23; GFX803-LABEL: test_kern_stack: 24; GFX803: ; %bb.0: ; %entry 25; GFX803-NEXT: s_add_u32 s0, s0, s7 26; GFX803-NEXT: s_addc_u32 s1, s1, 0 27; GFX803-NEXT: v_mov_b32_e32 v0, 0 28; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 29; GFX803-NEXT: s_waitcnt vmcnt(0) 30; GFX803-NEXT: s_endpgm 31; 32; GFX900-LABEL: test_kern_stack: 33; GFX900: ; %bb.0: ; %entry 34; GFX900-NEXT: s_add_u32 s0, s0, s7 35; GFX900-NEXT: s_addc_u32 s1, s1, 0 36; GFX900-NEXT: v_mov_b32_e32 v0, 0 37; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 38; GFX900-NEXT: s_waitcnt vmcnt(0) 39; GFX900-NEXT: s_endpgm 40; 41; GFX1010-LABEL: test_kern_stack: 42; GFX1010: ; %bb.0: ; %entry 43; GFX1010-NEXT: v_mov_b32_e32 v0, 0 44; GFX1010-NEXT: s_add_u32 s0, s0, s7 45; GFX1010-NEXT: s_addc_u32 s1, s1, 0 46; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 47; GFX1010-NEXT: s_waitcnt_vscnt null, 0x0 48; GFX1010-NEXT: s_endpgm 49entry: 50 %x = alloca i32, align 4, addrspace(5) 51 store volatile i32 0, i32 addrspace(5)* %x, align 4 52 ret void 53} 54 55define amdgpu_kernel void @test_kern_call() local_unnamed_addr #0 { 56; GFX803-LABEL: test_kern_call: 57; GFX803: ; %bb.0: ; %entry 58; GFX803-NEXT: s_add_i32 s10, s10, s15 59; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8 60; GFX803-NEXT: s_add_u32 s0, s0, s15 61; GFX803-NEXT: s_addc_u32 s1, s1, 0 62; GFX803-NEXT: s_mov_b32 s32, 0 63; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s11 64; GFX803-NEXT: s_getpc_b64 s[4:5] 65; GFX803-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 66; GFX803-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+12 67; GFX803-NEXT: s_swappc_b64 s[30:31], s[4:5] 68; GFX803-NEXT: s_endpgm 69; 70; GFX900-LABEL: test_kern_call: 71; GFX900: ; %bb.0: ; %entry 72; GFX900-NEXT: s_add_u32 flat_scratch_lo, s10, s15 73; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 74; GFX900-NEXT: s_add_u32 s0, s0, s15 75; GFX900-NEXT: s_addc_u32 s1, s1, 0 76; GFX900-NEXT: s_mov_b32 s32, 0 77; GFX900-NEXT: s_getpc_b64 s[4:5] 78; GFX900-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 79; GFX900-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+12 80; GFX900-NEXT: s_swappc_b64 s[30:31], s[4:5] 81; GFX900-NEXT: s_endpgm 82; 83; GFX1010-LABEL: test_kern_call: 84; GFX1010: ; %bb.0: ; %entry 85; GFX1010-NEXT: s_add_u32 s10, s10, s15 86; GFX1010-NEXT: s_mov_b32 s32, 0 87; GFX1010-NEXT: s_addc_u32 s11, s11, 0 88; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10 89; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11 90; GFX1010-NEXT: s_add_u32 s0, s0, s15 91; GFX1010-NEXT: s_addc_u32 s1, s1, 0 92; GFX1010-NEXT: s_getpc_b64 s[4:5] 93; GFX1010-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 94; GFX1010-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+12 95; GFX1010-NEXT: s_swappc_b64 s[30:31], s[4:5] 96; GFX1010-NEXT: s_endpgm 97entry: 98 tail call void @ex() #0 99 ret void 100} 101 102define amdgpu_kernel void @test_kern_stack_and_call() local_unnamed_addr #0 { 103; GFX803-LABEL: test_kern_stack_and_call: 104; GFX803: ; %bb.0: ; %entry 105; GFX803-NEXT: s_add_i32 s10, s10, s15 106; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8 107; GFX803-NEXT: s_add_u32 s0, s0, s15 108; GFX803-NEXT: s_addc_u32 s1, s1, 0 109; GFX803-NEXT: v_mov_b32_e32 v0, 0 110; GFX803-NEXT: s_movk_i32 s32, 0x400 111; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s11 112; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 113; GFX803-NEXT: s_waitcnt vmcnt(0) 114; GFX803-NEXT: s_getpc_b64 s[4:5] 115; GFX803-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 116; GFX803-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+12 117; GFX803-NEXT: s_swappc_b64 s[30:31], s[4:5] 118; GFX803-NEXT: s_endpgm 119; 120; GFX900-LABEL: test_kern_stack_and_call: 121; GFX900: ; %bb.0: ; %entry 122; GFX900-NEXT: s_add_u32 flat_scratch_lo, s10, s15 123; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 124; GFX900-NEXT: s_add_u32 s0, s0, s15 125; GFX900-NEXT: s_addc_u32 s1, s1, 0 126; GFX900-NEXT: v_mov_b32_e32 v0, 0 127; GFX900-NEXT: s_movk_i32 s32, 0x400 128; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 129; GFX900-NEXT: s_waitcnt vmcnt(0) 130; GFX900-NEXT: s_getpc_b64 s[4:5] 131; GFX900-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 132; GFX900-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+12 133; GFX900-NEXT: s_swappc_b64 s[30:31], s[4:5] 134; GFX900-NEXT: s_endpgm 135; 136; GFX1010-LABEL: test_kern_stack_and_call: 137; GFX1010: ; %bb.0: ; %entry 138; GFX1010-NEXT: s_add_u32 s10, s10, s15 139; GFX1010-NEXT: s_movk_i32 s32, 0x200 140; GFX1010-NEXT: s_addc_u32 s11, s11, 0 141; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10 142; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11 143; GFX1010-NEXT: v_mov_b32_e32 v0, 0 144; GFX1010-NEXT: s_add_u32 s0, s0, s15 145; GFX1010-NEXT: s_addc_u32 s1, s1, 0 146; GFX1010-NEXT: s_getpc_b64 s[4:5] 147; GFX1010-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 148; GFX1010-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+12 149; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 150; GFX1010-NEXT: s_waitcnt_vscnt null, 0x0 151; GFX1010-NEXT: s_swappc_b64 s[30:31], s[4:5] 152; GFX1010-NEXT: s_endpgm 153entry: 154 %x = alloca i32, align 4, addrspace(5) 155 store volatile i32 0, i32 addrspace(5)* %x, align 4 156 tail call void @ex() #0 157 ret void 158} 159 160define amdgpu_kernel void @test_force_fp_kern_empty() local_unnamed_addr #2 { 161; GFX803-LABEL: test_force_fp_kern_empty: 162; GFX803: ; %bb.0: ; %entry 163; GFX803-NEXT: s_mov_b32 s33, 0 164; GFX803-NEXT: s_endpgm 165; 166; GFX900-LABEL: test_force_fp_kern_empty: 167; GFX900: ; %bb.0: ; %entry 168; GFX900-NEXT: s_mov_b32 s33, 0 169; GFX900-NEXT: s_endpgm 170; 171; GFX1010-LABEL: test_force_fp_kern_empty: 172; GFX1010: ; %bb.0: ; %entry 173; GFX1010-NEXT: s_mov_b32 s33, 0 174; GFX1010-NEXT: s_endpgm 175entry: 176 ret void 177} 178 179define amdgpu_kernel void @test_force_fp_kern_stack() local_unnamed_addr #2 { 180; GFX803-LABEL: test_force_fp_kern_stack: 181; GFX803: ; %bb.0: ; %entry 182; GFX803-NEXT: s_add_u32 s0, s0, s7 183; GFX803-NEXT: s_mov_b32 s33, 0 184; GFX803-NEXT: s_addc_u32 s1, s1, 0 185; GFX803-NEXT: v_mov_b32_e32 v0, 0 186; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 187; GFX803-NEXT: s_waitcnt vmcnt(0) 188; GFX803-NEXT: s_endpgm 189; 190; GFX900-LABEL: test_force_fp_kern_stack: 191; GFX900: ; %bb.0: ; %entry 192; GFX900-NEXT: s_add_u32 s0, s0, s7 193; GFX900-NEXT: s_mov_b32 s33, 0 194; GFX900-NEXT: s_addc_u32 s1, s1, 0 195; GFX900-NEXT: v_mov_b32_e32 v0, 0 196; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 197; GFX900-NEXT: s_waitcnt vmcnt(0) 198; GFX900-NEXT: s_endpgm 199; 200; GFX1010-LABEL: test_force_fp_kern_stack: 201; GFX1010: ; %bb.0: ; %entry 202; GFX1010-NEXT: v_mov_b32_e32 v0, 0 203; GFX1010-NEXT: s_add_u32 s0, s0, s7 204; GFX1010-NEXT: s_mov_b32 s33, 0 205; GFX1010-NEXT: s_addc_u32 s1, s1, 0 206; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 207; GFX1010-NEXT: s_waitcnt_vscnt null, 0x0 208; GFX1010-NEXT: s_endpgm 209entry: 210 %x = alloca i32, align 4, addrspace(5) 211 store volatile i32 0, i32 addrspace(5)* %x, align 4 212 ret void 213} 214 215define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 { 216; GFX803-LABEL: test_force_fp_kern_call: 217; GFX803: ; %bb.0: ; %entry 218; GFX803-NEXT: s_add_i32 s10, s10, s15 219; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8 220; GFX803-NEXT: s_add_u32 s0, s0, s15 221; GFX803-NEXT: s_addc_u32 s1, s1, 0 222; GFX803-NEXT: s_mov_b32 s32, 0 223; GFX803-NEXT: s_mov_b32 s33, 0 224; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s11 225; GFX803-NEXT: s_getpc_b64 s[4:5] 226; GFX803-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 227; GFX803-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+12 228; GFX803-NEXT: s_swappc_b64 s[30:31], s[4:5] 229; GFX803-NEXT: s_endpgm 230; 231; GFX900-LABEL: test_force_fp_kern_call: 232; GFX900: ; %bb.0: ; %entry 233; GFX900-NEXT: s_add_u32 flat_scratch_lo, s10, s15 234; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 235; GFX900-NEXT: s_add_u32 s0, s0, s15 236; GFX900-NEXT: s_addc_u32 s1, s1, 0 237; GFX900-NEXT: s_mov_b32 s32, 0 238; GFX900-NEXT: s_mov_b32 s33, 0 239; GFX900-NEXT: s_getpc_b64 s[4:5] 240; GFX900-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 241; GFX900-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+12 242; GFX900-NEXT: s_swappc_b64 s[30:31], s[4:5] 243; GFX900-NEXT: s_endpgm 244; 245; GFX1010-LABEL: test_force_fp_kern_call: 246; GFX1010: ; %bb.0: ; %entry 247; GFX1010-NEXT: s_add_u32 s10, s10, s15 248; GFX1010-NEXT: s_mov_b32 s32, 0 249; GFX1010-NEXT: s_mov_b32 s33, 0 250; GFX1010-NEXT: s_addc_u32 s11, s11, 0 251; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10 252; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11 253; GFX1010-NEXT: s_add_u32 s0, s0, s15 254; GFX1010-NEXT: s_addc_u32 s1, s1, 0 255; GFX1010-NEXT: s_getpc_b64 s[4:5] 256; GFX1010-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 257; GFX1010-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+12 258; GFX1010-NEXT: s_swappc_b64 s[30:31], s[4:5] 259; GFX1010-NEXT: s_endpgm 260entry: 261 tail call void @ex() #2 262 ret void 263} 264 265define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_addr #2 { 266; GFX803-LABEL: test_force_fp_kern_stack_and_call: 267; GFX803: ; %bb.0: ; %entry 268; GFX803-NEXT: s_add_i32 s10, s10, s15 269; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8 270; GFX803-NEXT: s_add_u32 s0, s0, s15 271; GFX803-NEXT: s_mov_b32 s33, 0 272; GFX803-NEXT: s_addc_u32 s1, s1, 0 273; GFX803-NEXT: v_mov_b32_e32 v0, 0 274; GFX803-NEXT: s_movk_i32 s32, 0x400 275; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s11 276; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 277; GFX803-NEXT: s_waitcnt vmcnt(0) 278; GFX803-NEXT: s_getpc_b64 s[4:5] 279; GFX803-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 280; GFX803-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+12 281; GFX803-NEXT: s_swappc_b64 s[30:31], s[4:5] 282; GFX803-NEXT: s_endpgm 283; 284; GFX900-LABEL: test_force_fp_kern_stack_and_call: 285; GFX900: ; %bb.0: ; %entry 286; GFX900-NEXT: s_add_u32 flat_scratch_lo, s10, s15 287; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 288; GFX900-NEXT: s_add_u32 s0, s0, s15 289; GFX900-NEXT: s_mov_b32 s33, 0 290; GFX900-NEXT: s_addc_u32 s1, s1, 0 291; GFX900-NEXT: v_mov_b32_e32 v0, 0 292; GFX900-NEXT: s_movk_i32 s32, 0x400 293; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 294; GFX900-NEXT: s_waitcnt vmcnt(0) 295; GFX900-NEXT: s_getpc_b64 s[4:5] 296; GFX900-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 297; GFX900-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+12 298; GFX900-NEXT: s_swappc_b64 s[30:31], s[4:5] 299; GFX900-NEXT: s_endpgm 300; 301; GFX1010-LABEL: test_force_fp_kern_stack_and_call: 302; GFX1010: ; %bb.0: ; %entry 303; GFX1010-NEXT: s_add_u32 s10, s10, s15 304; GFX1010-NEXT: s_movk_i32 s32, 0x200 305; GFX1010-NEXT: s_mov_b32 s33, 0 306; GFX1010-NEXT: s_addc_u32 s11, s11, 0 307; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10 308; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11 309; GFX1010-NEXT: v_mov_b32_e32 v0, 0 310; GFX1010-NEXT: s_add_u32 s0, s0, s15 311; GFX1010-NEXT: s_addc_u32 s1, s1, 0 312; GFX1010-NEXT: s_getpc_b64 s[4:5] 313; GFX1010-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 314; GFX1010-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+12 315; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 316; GFX1010-NEXT: s_waitcnt_vscnt null, 0x0 317; GFX1010-NEXT: s_swappc_b64 s[30:31], s[4:5] 318; GFX1010-NEXT: s_endpgm 319entry: 320 %x = alloca i32, align 4, addrspace(5) 321 store volatile i32 0, i32 addrspace(5)* %x, align 4 322 tail call void @ex() #2 323 ret void 324} 325 326define amdgpu_kernel void @test_sgpr_offset_kernel() #1 { 327; GFX803-LABEL: test_sgpr_offset_kernel: 328; GFX803: ; %bb.0: ; %entry 329; GFX803-NEXT: s_add_u32 s0, s0, s7 330; GFX803-NEXT: s_addc_u32 s1, s1, 0 331; GFX803-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8 glc 332; GFX803-NEXT: s_waitcnt vmcnt(0) 333; GFX803-NEXT: s_mov_b32 s4, 0x40000 334; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill 335; GFX803-NEXT: ;;#ASMSTART 336; GFX803-NEXT: ;;#ASMEND 337; GFX803-NEXT: s_mov_b32 s4, 0x40000 338; GFX803-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload 339; GFX803-NEXT: s_waitcnt vmcnt(0) 340; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 341; GFX803-NEXT: s_waitcnt vmcnt(0) 342; GFX803-NEXT: s_endpgm 343; 344; GFX900-LABEL: test_sgpr_offset_kernel: 345; GFX900: ; %bb.0: ; %entry 346; GFX900-NEXT: s_add_u32 s0, s0, s7 347; GFX900-NEXT: s_addc_u32 s1, s1, 0 348; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8 glc 349; GFX900-NEXT: s_waitcnt vmcnt(0) 350; GFX900-NEXT: s_mov_b32 s4, 0x40000 351; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill 352; GFX900-NEXT: ;;#ASMSTART 353; GFX900-NEXT: ;;#ASMEND 354; GFX900-NEXT: s_mov_b32 s4, 0x40000 355; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload 356; GFX900-NEXT: s_waitcnt vmcnt(0) 357; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 358; GFX900-NEXT: s_waitcnt vmcnt(0) 359; GFX900-NEXT: s_endpgm 360; 361; GFX1010-LABEL: test_sgpr_offset_kernel: 362; GFX1010: ; %bb.0: ; %entry 363; GFX1010-NEXT: s_add_u32 s0, s0, s7 364; GFX1010-NEXT: s_addc_u32 s1, s1, 0 365; GFX1010-NEXT: s_mov_b32 s4, 0x20000 366; GFX1010-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8 glc dlc 367; GFX1010-NEXT: s_waitcnt vmcnt(0) 368; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill 369; GFX1010-NEXT: s_waitcnt_depctr 0xffe3 370; GFX1010-NEXT: s_mov_b32 s4, 0x20000 371; GFX1010-NEXT: ;;#ASMSTART 372; GFX1010-NEXT: ;;#ASMEND 373; GFX1010-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload 374; GFX1010-NEXT: s_waitcnt vmcnt(0) 375; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 376; GFX1010-NEXT: s_waitcnt_vscnt null, 0x0 377; GFX1010-NEXT: s_endpgm 378entry: 379 ; Occupy 4096 bytes of scratch, so the offset of the spill of %a does not 380 ; fit in the instruction, and has to live in the SGPR offset. 381 %alloca = alloca i8, i32 4092, align 4, addrspace(5) 382 %buf = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)* 383 384 %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1 385 ; 0x40000 / 64 = 4096 (for wave64) 386 ; CHECK: s_add_u32 s6, s7, 0x40000 387 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 ; 4-byte Folded Spill 388 %a = load volatile i32, i32 addrspace(5)* %aptr 389 390 ; Force %a to spill 391 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () 392 393 %outptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1 394 store volatile i32 %a, i32 addrspace(5)* %outptr 395 396 ret void 397} 398 399declare hidden void @ex() local_unnamed_addr #0 400 401attributes #0 = { nounwind } 402attributes #1 = { nounwind "amdgpu-num-vgpr"="8" } 403attributes #2 = { nounwind "frame-pointer"="all" } 404