1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -O0 -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -stop-after=regallocfast < %s | FileCheck -check-prefixes=GCN %s 3 4; Verify that we consider the xor at the end of the waterfall loop emitted for 5; divergent indirect addressing as a terminator. 6 7declare i32 @llvm.amdgcn.workitem.id.x() #1 8 9; There should be no spill code inserted between the xor and the real terminator 10define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) { 11 ; GCN-LABEL: name: extract_w_offset_vgpr 12 ; GCN: bb.0.entry: 13 ; GCN-NEXT: successors: %bb.1(0x80000000) 14 ; GCN-NEXT: liveins: $vgpr0, $sgpr0_sgpr1 15 ; GCN-NEXT: {{ $}} 16 ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.3, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) 17 ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset.cast, align 4, addrspace 4) 18 ; GCN-NEXT: renamable $sgpr6 = COPY renamable $sgpr1 19 ; GCN-NEXT: renamable $sgpr0 = COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1 20 ; GCN-NEXT: renamable $sgpr4 = S_MOV_B32 61440 21 ; GCN-NEXT: renamable $sgpr5 = S_MOV_B32 -1 22 ; GCN-NEXT: undef renamable $sgpr0 = COPY killed renamable $sgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 23 ; GCN-NEXT: renamable $sgpr1 = COPY killed renamable $sgpr6 24 ; GCN-NEXT: renamable $sgpr2 = COPY killed renamable $sgpr5 25 ; GCN-NEXT: renamable $sgpr3 = COPY killed renamable $sgpr4 26 ; GCN-NEXT: SI_SPILL_S128_SAVE killed $sgpr0_sgpr1_sgpr2_sgpr3, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.2, align 4, addrspace 5) 27 ; GCN-NEXT: renamable $sgpr0 = S_MOV_B32 16 28 ; GCN-NEXT: renamable $sgpr1 = S_MOV_B32 15 29 ; GCN-NEXT: renamable $sgpr2 = S_MOV_B32 14 30 ; GCN-NEXT: renamable $sgpr3 = S_MOV_B32 13 31 ; GCN-NEXT: renamable $sgpr4 = S_MOV_B32 12 32 ; GCN-NEXT: renamable $sgpr5 = S_MOV_B32 11 33 ; GCN-NEXT: renamable $sgpr6 = S_MOV_B32 10 34 ; GCN-NEXT: renamable $sgpr7 = S_MOV_B32 9 35 ; GCN-NEXT: renamable $sgpr8 = S_MOV_B32 8 36 ; GCN-NEXT: renamable $sgpr9 = S_MOV_B32 7 37 ; GCN-NEXT: renamable $sgpr10 = S_MOV_B32 6 38 ; GCN-NEXT: renamable $sgpr11 = S_MOV_B32 5 39 ; GCN-NEXT: renamable $sgpr12 = S_MOV_B32 3 40 ; GCN-NEXT: renamable $sgpr13 = S_MOV_B32 2 41 ; GCN-NEXT: renamable $sgpr14 = S_MOV_B32 1 42 ; GCN-NEXT: renamable $sgpr15 = S_MOV_B32 0 43 ; GCN-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr15 44 ; GCN-NEXT: renamable $vgpr30 = COPY killed renamable $sgpr14 45 ; GCN-NEXT: renamable $vgpr29 = COPY killed renamable $sgpr13 46 ; GCN-NEXT: renamable $vgpr28 = COPY killed renamable $sgpr12 47 ; GCN-NEXT: renamable $vgpr27 = COPY killed renamable $sgpr11 48 ; GCN-NEXT: renamable $vgpr26 = COPY killed renamable $sgpr10 49 ; GCN-NEXT: renamable $vgpr25 = COPY killed renamable $sgpr9 50 ; GCN-NEXT: renamable $vgpr24 = COPY killed renamable $sgpr8 51 ; GCN-NEXT: renamable $vgpr23 = COPY killed renamable $sgpr7 52 ; GCN-NEXT: renamable $vgpr22 = COPY killed renamable $sgpr6 53 ; GCN-NEXT: renamable $vgpr21 = COPY killed renamable $sgpr5 54 ; GCN-NEXT: renamable $vgpr20 = COPY killed renamable $sgpr4 55 ; GCN-NEXT: renamable $vgpr19 = COPY killed renamable $sgpr3 56 ; GCN-NEXT: renamable $vgpr18 = COPY killed renamable $sgpr2 57 ; GCN-NEXT: renamable $vgpr17 = COPY killed renamable $sgpr1 58 ; GCN-NEXT: renamable $vgpr16 = COPY killed renamable $sgpr0 59 ; GCN-NEXT: undef renamable $vgpr0 = COPY killed renamable $vgpr0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 60 ; GCN-NEXT: renamable $vgpr1 = COPY killed renamable $vgpr30 61 ; GCN-NEXT: renamable $vgpr2 = COPY killed renamable $vgpr29 62 ; GCN-NEXT: renamable $vgpr3 = COPY killed renamable $vgpr28 63 ; GCN-NEXT: renamable $vgpr4 = COPY killed renamable $vgpr27 64 ; GCN-NEXT: renamable $vgpr5 = COPY killed renamable $vgpr26 65 ; GCN-NEXT: renamable $vgpr6 = COPY killed renamable $vgpr25 66 ; GCN-NEXT: renamable $vgpr7 = COPY killed renamable $vgpr24 67 ; GCN-NEXT: renamable $vgpr8 = COPY killed renamable $vgpr23 68 ; GCN-NEXT: renamable $vgpr9 = COPY killed renamable $vgpr22 69 ; GCN-NEXT: renamable $vgpr10 = COPY killed renamable $vgpr21 70 ; GCN-NEXT: renamable $vgpr11 = COPY killed renamable $vgpr20 71 ; GCN-NEXT: renamable $vgpr12 = COPY killed renamable $vgpr19 72 ; GCN-NEXT: renamable $vgpr13 = COPY killed renamable $vgpr18 73 ; GCN-NEXT: renamable $vgpr14 = COPY killed renamable $vgpr17 74 ; GCN-NEXT: renamable $vgpr15 = COPY killed renamable $vgpr16 75 ; GCN-NEXT: SI_SPILL_V512_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, %stack.1, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.1, align 4, addrspace 5) 76 ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_MOV_B64 $exec 77 ; GCN-NEXT: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5) 78 ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF 79 ; GCN-NEXT: renamable $sgpr0_sgpr1 = IMPLICIT_DEF 80 ; GCN-NEXT: {{ $}} 81 ; GCN-NEXT: bb.1: 82 ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 83 ; GCN-NEXT: {{ $}} 84 ; GCN-NEXT: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.4, align 4, addrspace 5) 85 ; GCN-NEXT: $vgpr17 = SI_SPILL_V32_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5) 86 ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.1, align 4, addrspace 5) 87 ; GCN-NEXT: $vgpr16 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) 88 ; GCN-NEXT: renamable $sgpr2 = V_READFIRSTLANE_B32 $vgpr16, implicit $exec 89 ; GCN-NEXT: renamable $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 $sgpr2, $vgpr16, implicit $exec 90 ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed renamable $sgpr0_sgpr1, implicit-def $exec, implicit-def dead $scc, implicit $exec 91 ; GCN-NEXT: renamable $vgpr0 = V_INDIRECT_REG_READ_GPR_IDX_B32_V16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $sgpr2, 11, implicit-def $m0, implicit $m0, implicit $exec 92 ; GCN-NEXT: SI_SPILL_V32_SAVE $vgpr0, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) 93 ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.5, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) 94 ; GCN-NEXT: renamable $sgpr2_sgpr3 = COPY renamable $sgpr0_sgpr1 95 ; GCN-NEXT: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.4, align 4, addrspace 5) 96 ; GCN-NEXT: $exec = S_XOR_B64_term $exec, killed renamable $sgpr0_sgpr1, implicit-def dead $scc 97 ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec 98 ; GCN-NEXT: {{ $}} 99 ; GCN-NEXT: bb.3: 100 ; GCN-NEXT: successors: %bb.2(0x80000000) 101 ; GCN-NEXT: {{ $}} 102 ; GCN-NEXT: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5) 103 ; GCN-NEXT: $exec = S_MOV_B64 renamable $sgpr0_sgpr1 104 ; GCN-NEXT: {{ $}} 105 ; GCN-NEXT: bb.2: 106 ; GCN-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5) 107 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = SI_SPILL_S128_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.2, align 4, addrspace 5) 108 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.out.load, addrspace 1) 109 ; GCN-NEXT: S_ENDPGM 0 110entry: 111 %id = call i32 @llvm.amdgcn.workitem.id.x() #1 112 %index = add i32 %id, 1 113 %value = extractelement <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>, i32 %index 114 store i32 %value, i32 addrspace(1)* %out 115 ret void 116} 117