1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2# RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass si-insert-waitcnts -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s 3 4--- | 5 define amdgpu_kernel void @flat_zero_waitcnt(i32 addrspace(1)* %global4, 6 <4 x i32> addrspace(1)* %global16, 7 i32* %flat4, 8 <4 x i32>* %flat16) { 9 ret void 10 } 11 12 define amdgpu_kernel void @single_fallthrough_successor_no_end_block_wait() { 13 ret void 14 } 15 16 define amdgpu_kernel void @single_branch_successor_not_next_block() { 17 ret void 18 } 19 20 define amdgpu_kernel void @preexisting_waitcnt() { 21 ret void 22 } 23 24 define amdgpu_kernel void @bundle_no_waitcnt() { 25 ret void 26 } 27 28 define amdgpu_kernel void @preexisting_waitcnt_in_bundle() { 29 ret void 30 } 31 32 define amdgpu_kernel void @insert_in_bundle() { 33 ret void 34 } 35 36 define amdgpu_kernel void @exit_bundle() { 37 ret void 38 } 39 40 define amdgpu_kernel void @cross_bundle() { 41 ret void 42 } 43 44... 45--- 46 47 48# Global loads will return in order so we should: 49# s_waitcnt vmcnt(1) 50 51# s_waitcnt vmcnt(0) 52 53# s_waitcnt vmcnt(0) 54 55name: flat_zero_waitcnt 56 57body: | 58 ; GCN-LABEL: name: flat_zero_waitcnt 59 ; GCN: bb.0: 60 ; GCN: successors: %bb.1(0x80000000) 61 ; GCN: S_WAITCNT 0 62 ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.global4, addrspace 1) 63 ; GCN: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16, addrspace 1) 64 ; GCN: S_WAITCNT 3953 65 ; GCN: $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec 66 ; GCN: S_BRANCH %bb.1 67 ; GCN: bb.1: 68 ; GCN: successors: %bb.2(0x80000000) 69 ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 70 ; GCN: S_WAITCNT 3952 71 ; GCN: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16, addrspace 1) 72 ; GCN: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec 73 ; GCN: S_BRANCH %bb.2 74 ; GCN: bb.2: 75 ; GCN: S_WAITCNT 49279 76 ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.flat4) 77 ; GCN: S_WAITCNT 3952 78 ; GCN: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.flat16) 79 ; GCN: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec 80 ; GCN: S_ENDPGM 0 81 bb.0: 82 successors: %bb.1 83 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.global4) 84 $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16) 85 $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec 86 S_BRANCH %bb.1 87 88 bb.1: 89 successors: %bb.2 90 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 91 $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16) 92 $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec 93 S_BRANCH %bb.2 94 95 bb.2: 96 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.flat4) 97 $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.flat16) 98 $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec 99 S_ENDPGM 0 100... 101--- 102# There is only a single fallthrough successor block, so there's no 103# need to wait immediately. 104 105 106name: single_fallthrough_successor_no_end_block_wait 107 108body: | 109 ; GCN-LABEL: name: single_fallthrough_successor_no_end_block_wait 110 ; GCN: bb.0: 111 ; GCN: successors: %bb.1(0x80000000) 112 ; GCN: S_WAITCNT 0 113 ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 114 ; GCN: bb.1: 115 ; GCN: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec 116 ; GCN: S_WAITCNT 112 117 ; GCN: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr 118 ; GCN: S_ENDPGM 0 119 bb.0: 120 successors: %bb.1 121 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 122 123 bb.1: 124 $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec 125 FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr 126 S_ENDPGM 0 127... 128--- 129# The block has a single predecessor with a single successor, but it 130# is not the next block so it's non-obvious that the wait is not needed. 131 132 133 134 135name: single_branch_successor_not_next_block 136 137body: | 138 ; GCN-LABEL: name: single_branch_successor_not_next_block 139 ; GCN: bb.0: 140 ; GCN: successors: %bb.2(0x80000000) 141 ; GCN: S_WAITCNT 0 142 ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 143 ; GCN: S_BRANCH %bb.2 144 ; GCN: bb.1: 145 ; GCN: FLAT_STORE_DWORD $vgpr8_vgpr9, $agpr10, 0, 0, implicit $exec, implicit $flat_scr 146 ; GCN: S_ENDPGM 0 147 ; GCN: bb.2: 148 ; GCN: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec 149 ; GCN: S_WAITCNT 112 150 ; GCN: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr 151 ; GCN: S_ENDPGM 0 152 bb.0: 153 successors: %bb.2 154 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 155 S_BRANCH %bb.2 156 157 bb.1: 158 FLAT_STORE_DWORD $vgpr8_vgpr9, $agpr10, 0, 0, implicit $exec, implicit $flat_scr 159 S_ENDPGM 0 160 161 bb.2: 162 $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec 163 FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr 164 S_ENDPGM 0 165... 166 167# GCN-LABEL: name: preexisting_waitcnt{{$}} 168# GCN: FLAT_LOAD_DWORD 169# GCN-NEXT: S_WAITCNT 0 170# GCN-NOT: S_WAITCNT 171name: preexisting_waitcnt 172tracksRegLiveness: true 173machineFunctionInfo: 174 isEntryFunction: true 175body: | 176 bb.0: 177 liveins: $vgpr1_vgpr2 178 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 179 S_WAITCNT 0 180 FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr 181 182... 183 184--- 185 186name: bundle_no_waitcnt 187tracksRegLiveness: true 188machineFunctionInfo: 189 isEntryFunction: true 190body: | 191 bb.0: 192 liveins: $vgpr1_vgpr2 193 ; GCN-LABEL: name: bundle_no_waitcnt 194 ; GCN: liveins: $vgpr1_vgpr2 195 ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 196 ; GCN: BUNDLE { 197 ; GCN: S_NOP 0 198 ; GCN: S_NOP 0 199 ; GCN: } 200 ; GCN: S_WAITCNT 112 201 ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr 202 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 203 BUNDLE { 204 S_NOP 0 205 S_NOP 0 206 } 207 FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr 208 209... 210 211--- 212 213# See the waitcnt inside the bundle and don't insert an extra 214name: preexisting_waitcnt_in_bundle 215tracksRegLiveness: true 216machineFunctionInfo: 217 isEntryFunction: true 218body: | 219 bb.0: 220 liveins: $vgpr1_vgpr2 221 ; GCN-LABEL: name: preexisting_waitcnt_in_bundle 222 ; GCN: liveins: $vgpr1_vgpr2 223 ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 224 ; GCN: BUNDLE { 225 ; GCN: S_NOP 0 226 ; GCN: S_WAITCNT 0 227 ; GCN: } 228 ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr 229 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 230 BUNDLE { 231 S_NOP 0 232 S_WAITCNT 0 233 } 234 FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr 235 236... 237 238--- 239 240# Def and use inside bundle 241 242name: insert_in_bundle 243tracksRegLiveness: true 244machineFunctionInfo: 245 isEntryFunction: true 246body: | 247 bb.0: 248 liveins: $vgpr1_vgpr2 249 ; GCN-LABEL: name: insert_in_bundle 250 ; GCN: liveins: $vgpr1_vgpr2 251 ; GCN: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 { 252 ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 253 ; GCN: S_WAITCNT 112 254 ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, internal $agpr0, 0, 0, implicit $exec, implicit $flat_scr 255 ; GCN: } 256 BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 { 257 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 258 FLAT_STORE_DWORD $vgpr2_vgpr3, internal $agpr0, 0, 0, implicit $exec, implicit $flat_scr 259 } 260... 261 262--- 263 264# Def is last instruction in bundle, use is outside bundle 265 266 267name: exit_bundle 268tracksRegLiveness: true 269machineFunctionInfo: 270 isEntryFunction: true 271body: | 272 bb.0: 273 liveins: $vgpr1_vgpr2 274 ; GCN-LABEL: name: exit_bundle 275 ; GCN: liveins: $vgpr1_vgpr2 276 ; GCN: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 { 277 ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 278 ; GCN: } 279 ; GCN: S_WAITCNT 112 280 ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr 281 BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 { 282 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 283 } 284 285 FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr 286 287... 288 289--- 290 291# Def is in bundle, use is in another bundle 292 293 294name: cross_bundle 295tracksRegLiveness: true 296machineFunctionInfo: 297 isEntryFunction: true 298body: | 299 bb.0: 300 liveins: $vgpr1_vgpr2 301 ; GCN-LABEL: name: cross_bundle 302 ; GCN: liveins: $vgpr1_vgpr2 303 ; GCN: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 { 304 ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 305 ; GCN: } 306 ; GCN: S_WAITCNT 112 307 ; GCN: BUNDLE implicit $agpr0, implicit $vgpr2_vgpr3 { 308 ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr 309 ; GCN: } 310 BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 { 311 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 312 } 313 BUNDLE implicit $agpr0, implicit $vgpr2_vgpr3 { 314 FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr 315 } 316... 317