1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2# RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass si-insert-waitcnts -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s 3 4--- | 5 define amdgpu_kernel void @flat_zero_waitcnt(i32 addrspace(1)* %global4, 6 <4 x i32> addrspace(1)* %global16, 7 i32* %flat4, 8 <4 x i32>* %flat16) { 9 ret void 10 } 11 12 define amdgpu_kernel void @single_fallthrough_successor_no_end_block_wait() { 13 ret void 14 } 15 16 define amdgpu_kernel void @single_branch_successor_not_next_block() { 17 ret void 18 } 19 20 define amdgpu_kernel void @preexisting_waitcnt() { 21 ret void 22 } 23 24 define amdgpu_kernel void @bundle_no_waitcnt() { 25 ret void 26 } 27 28 define amdgpu_kernel void @preexisting_waitcnt_in_bundle() { 29 ret void 30 } 31 32 define amdgpu_kernel void @insert_in_bundle() { 33 ret void 34 } 35 36 define amdgpu_kernel void @exit_bundle() { 37 ret void 38 } 39 40 define amdgpu_kernel void @cross_bundle() { 41 ret void 42 } 43 44 define amdgpu_kernel void @high_register_collision() { 45 ret void 46 } 47 48... 49--- 50 51 52# Global loads will return in order so we should: 53# s_waitcnt vmcnt(1) 54 55# s_waitcnt vmcnt(0) 56 57# s_waitcnt vmcnt(0) 58 59name: flat_zero_waitcnt 60 61body: | 62 ; GCN-LABEL: name: flat_zero_waitcnt 63 ; GCN: bb.0: 64 ; GCN-NEXT: successors: %bb.1(0x80000000) 65 ; GCN-NEXT: {{ $}} 66 ; GCN-NEXT: S_WAITCNT 0 67 ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.global4, addrspace 1) 68 ; GCN-NEXT: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16, addrspace 1) 69 ; GCN-NEXT: S_WAITCNT 3953 70 ; GCN-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec 71 ; GCN-NEXT: S_BRANCH %bb.1 72 ; GCN-NEXT: {{ $}} 73 ; GCN-NEXT: bb.1: 74 ; GCN-NEXT: successors: %bb.2(0x80000000) 75 ; GCN-NEXT: {{ $}} 76 ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 77 ; GCN-NEXT: S_WAITCNT 3952 78 ; GCN-NEXT: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16, addrspace 1) 79 ; GCN-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec 80 ; GCN-NEXT: S_BRANCH %bb.2 81 ; GCN-NEXT: {{ $}} 82 ; GCN-NEXT: bb.2: 83 ; GCN-NEXT: S_WAITCNT 49279 84 ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.flat4) 85 ; GCN-NEXT: S_WAITCNT 3952 86 ; GCN-NEXT: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.flat16) 87 ; GCN-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec 88 ; GCN-NEXT: S_ENDPGM 0 89 bb.0: 90 successors: %bb.1 91 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.global4) 92 $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16) 93 $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec 94 S_BRANCH %bb.1 95 96 bb.1: 97 successors: %bb.2 98 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 99 $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16) 100 $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec 101 S_BRANCH %bb.2 102 103 bb.2: 104 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.flat4) 105 $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.flat16) 106 $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec 107 S_ENDPGM 0 108... 109--- 110# There is only a single fallthrough successor block, so there's no 111# need to wait immediately. 112 113 114name: single_fallthrough_successor_no_end_block_wait 115 116body: | 117 ; GCN-LABEL: name: single_fallthrough_successor_no_end_block_wait 118 ; GCN: bb.0: 119 ; GCN-NEXT: successors: %bb.1(0x80000000) 120 ; GCN-NEXT: {{ $}} 121 ; GCN-NEXT: S_WAITCNT 0 122 ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 123 ; GCN-NEXT: {{ $}} 124 ; GCN-NEXT: bb.1: 125 ; GCN-NEXT: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec 126 ; GCN-NEXT: S_WAITCNT 112 127 ; GCN-NEXT: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr 128 ; GCN-NEXT: S_ENDPGM 0 129 bb.0: 130 successors: %bb.1 131 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 132 133 bb.1: 134 $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec 135 FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr 136 S_ENDPGM 0 137... 138--- 139# The block has a single predecessor with a single successor, but it 140# is not the next block so it's non-obvious that the wait is not needed. 141 142 143 144 145name: single_branch_successor_not_next_block 146 147body: | 148 ; GCN-LABEL: name: single_branch_successor_not_next_block 149 ; GCN: bb.0: 150 ; GCN-NEXT: successors: %bb.2(0x80000000) 151 ; GCN-NEXT: {{ $}} 152 ; GCN-NEXT: S_WAITCNT 0 153 ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 154 ; GCN-NEXT: S_BRANCH %bb.2 155 ; GCN-NEXT: {{ $}} 156 ; GCN-NEXT: bb.1: 157 ; GCN-NEXT: FLAT_STORE_DWORD $vgpr8_vgpr9, $agpr10, 0, 0, implicit $exec, implicit $flat_scr 158 ; GCN-NEXT: S_ENDPGM 0 159 ; GCN-NEXT: {{ $}} 160 ; GCN-NEXT: bb.2: 161 ; GCN-NEXT: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec 162 ; GCN-NEXT: S_WAITCNT 112 163 ; GCN-NEXT: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr 164 ; GCN-NEXT: S_ENDPGM 0 165 bb.0: 166 successors: %bb.2 167 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 168 S_BRANCH %bb.2 169 170 bb.1: 171 FLAT_STORE_DWORD $vgpr8_vgpr9, $agpr10, 0, 0, implicit $exec, implicit $flat_scr 172 S_ENDPGM 0 173 174 bb.2: 175 $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec 176 FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr 177 S_ENDPGM 0 178... 179 180# GCN-LABEL: name: preexisting_waitcnt{{$}} 181# GCN: FLAT_LOAD_DWORD 182# GCN-NEXT: S_WAITCNT 0 183# GCN-NOT: S_WAITCNT 184name: preexisting_waitcnt 185tracksRegLiveness: true 186machineFunctionInfo: 187 isEntryFunction: true 188body: | 189 bb.0: 190 liveins: $vgpr1_vgpr2 191 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 192 S_WAITCNT 0 193 FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr 194 195... 196 197--- 198 199name: bundle_no_waitcnt 200tracksRegLiveness: true 201machineFunctionInfo: 202 isEntryFunction: true 203body: | 204 bb.0: 205 liveins: $vgpr1_vgpr2 206 ; GCN-LABEL: name: bundle_no_waitcnt 207 ; GCN: liveins: $vgpr1_vgpr2 208 ; GCN-NEXT: {{ $}} 209 ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 210 ; GCN-NEXT: BUNDLE { 211 ; GCN-NEXT: S_NOP 0 212 ; GCN-NEXT: S_NOP 0 213 ; GCN-NEXT: } 214 ; GCN-NEXT: S_WAITCNT 112 215 ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr 216 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 217 BUNDLE { 218 S_NOP 0 219 S_NOP 0 220 } 221 FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr 222 223... 224 225--- 226 227# See the waitcnt inside the bundle and don't insert an extra 228name: preexisting_waitcnt_in_bundle 229tracksRegLiveness: true 230machineFunctionInfo: 231 isEntryFunction: true 232body: | 233 bb.0: 234 liveins: $vgpr1_vgpr2 235 ; GCN-LABEL: name: preexisting_waitcnt_in_bundle 236 ; GCN: liveins: $vgpr1_vgpr2 237 ; GCN-NEXT: {{ $}} 238 ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 239 ; GCN-NEXT: BUNDLE { 240 ; GCN-NEXT: S_NOP 0 241 ; GCN-NEXT: S_WAITCNT 0 242 ; GCN-NEXT: } 243 ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr 244 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 245 BUNDLE { 246 S_NOP 0 247 S_WAITCNT 0 248 } 249 FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr 250 251... 252 253--- 254 255# Def and use inside bundle 256 257name: insert_in_bundle 258tracksRegLiveness: true 259machineFunctionInfo: 260 isEntryFunction: true 261body: | 262 bb.0: 263 liveins: $vgpr1_vgpr2 264 ; GCN-LABEL: name: insert_in_bundle 265 ; GCN: liveins: $vgpr1_vgpr2 266 ; GCN-NEXT: {{ $}} 267 ; GCN-NEXT: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 { 268 ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 269 ; GCN-NEXT: S_WAITCNT 112 270 ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, internal $agpr0, 0, 0, implicit $exec, implicit $flat_scr 271 ; GCN-NEXT: } 272 BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 { 273 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 274 FLAT_STORE_DWORD $vgpr2_vgpr3, internal $agpr0, 0, 0, implicit $exec, implicit $flat_scr 275 } 276... 277 278--- 279 280# Def is last instruction in bundle, use is outside bundle 281 282 283name: exit_bundle 284tracksRegLiveness: true 285machineFunctionInfo: 286 isEntryFunction: true 287body: | 288 bb.0: 289 liveins: $vgpr1_vgpr2 290 ; GCN-LABEL: name: exit_bundle 291 ; GCN: liveins: $vgpr1_vgpr2 292 ; GCN-NEXT: {{ $}} 293 ; GCN-NEXT: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 { 294 ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 295 ; GCN-NEXT: } 296 ; GCN-NEXT: S_WAITCNT 112 297 ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr 298 BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 { 299 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 300 } 301 302 FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr 303 304... 305 306--- 307 308# Def is in bundle, use is in another bundle 309 310 311name: cross_bundle 312tracksRegLiveness: true 313machineFunctionInfo: 314 isEntryFunction: true 315body: | 316 bb.0: 317 liveins: $vgpr1_vgpr2 318 ; GCN-LABEL: name: cross_bundle 319 ; GCN: liveins: $vgpr1_vgpr2 320 ; GCN-NEXT: {{ $}} 321 ; GCN-NEXT: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 { 322 ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 323 ; GCN-NEXT: } 324 ; GCN-NEXT: S_WAITCNT 112 325 ; GCN-NEXT: BUNDLE implicit $agpr0, implicit $vgpr2_vgpr3 { 326 ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr 327 ; GCN-NEXT: } 328 BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 { 329 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 330 } 331 BUNDLE implicit $agpr0, implicit $vgpr2_vgpr3 { 332 FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr 333 } 334... 335 336--- 337# agpr should be disjoint and tracked separately from vgpr 338 339name: high_register_collision 340 341body: | 342 bb.0: 343 ; GCN-LABEL: name: high_register_collision 344 ; GCN: S_WAITCNT 0 345 ; GCN-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec 346 ; GCN-NEXT: $vgpr226 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr 347 ; GCN-NEXT: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec 348 ; GCN-NEXT: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr1, 0, 0, implicit $exec, implicit $flat_scr 349 ; GCN-NEXT: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr 350 ; GCN-NEXT: S_ENDPGM 0 351 $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec 352 $vgpr226 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr 353 $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec 354 FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr1, 0, 0, implicit $exec, implicit $flat_scr 355 FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr 356 S_ENDPGM 0 357... 358