1# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX9 %s 2# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX10 %s 3 4--- 5 6# The loop contains a store and a use of a value loaded outside of the loop. 7# We expect the waitcnt for the use to be hoisted on GFX9, but not on GFX10+ 8# because we have the vscnt counter. 9 10# GFX9-LABEL: waitcnt_vm_loop 11# GFX9-LABEL: bb.0: 12# GFX9: S_WAITCNT 39 13# GFX9-LABEL: bb.1: 14# GFX9-NOT: S_WAITCNT 39 15# GFX9-LABEL: bb.2: 16 17# GFX10-LABEL: waitcnt_vm_loop 18# GFX10-LABEL: bb.0: 19# GFX10-NOT: S_WAITCNT 16 20# GFX10-LABEL: bb.1: 21# GFX10: S_WAITCNT 16 22# GFX10-LABEL: bb.2: 23name: waitcnt_vm_loop 24body: | 25 bb.0: 26 successors: %bb.1 27 28 $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec 29 S_BRANCH %bb.1 30 31 bb.1: 32 successors: %bb.1, %bb.2 33 34 BUFFER_STORE_DWORD_OFFEN_exact $vgpr5, $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec 35 $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec 36 S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc 37 S_CBRANCH_SCC1 %bb.1, implicit killed $scc 38 S_BRANCH %bb.2 39 40 bb.2: 41 S_ENDPGM 0 42 43... 44--- 45 46# Same as before, but the loop preheader has no terminator. 47 48# GFX9-LABEL: waitcnt_vm_loop_noterm 49# GFX9-LABEL: bb.0: 50# GFX9: S_WAITCNT 39 51# GFX9-LABEL: bb.1: 52# GFX9-NOT: S_WAITCNT 39 53# GFX9-LABEL: bb.2: 54 55# GFX10-LABEL: waitcnt_vm_loop_noterm 56# GFX10-LABEL: bb.0: 57# GFX10-NOT: S_WAITCNT 16 58# GFX10-LABEL: bb.1: 59# GFX10: S_WAITCNT 16 60# GFX10-LABEL: bb.2: 61name: waitcnt_vm_loop_noterm 62body: | 63 bb.0: 64 successors: %bb.1 65 66 $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec 67 68 bb.1: 69 successors: %bb.1, %bb.2 70 71 BUFFER_STORE_DWORD_OFFEN_exact $vgpr5, $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec 72 $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec 73 S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc 74 S_CBRANCH_SCC1 %bb.1, implicit killed $scc 75 S_BRANCH %bb.2 76 77 bb.2: 78 S_ENDPGM 0 79 80... 81--- 82 83# Same as before but there is a preexisting waitcnt in the preheader. 84 85# GFX9-LABEL: waitcnt_vm_loop_noterm_wait 86# GFX9-LABEL: bb.0: 87# GFX9: S_WAITCNT 39 88# GFX9-NOT: S_WAITCNT 39 89# GFX9-LABEL: bb.1: 90# GFX9-NOT: S_WAITCNT 39 91# GFX9-LABEL: bb.2: 92name: waitcnt_vm_loop_noterm_wait 93body: | 94 bb.0: 95 successors: %bb.1 96 97 $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec 98 S_WAITCNT 3952 99 100 bb.1: 101 successors: %bb.1, %bb.2 102 103 BUFFER_STORE_DWORD_OFFEN_exact $vgpr5, $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec 104 $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec 105 S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc 106 S_CBRANCH_SCC1 %bb.1, implicit killed $scc 107 S_BRANCH %bb.2 108 109 bb.2: 110 S_ENDPGM 0 111 112... 113--- 114 115# The loop contains a store, a load, and uses values loaded both inside and 116# outside the loop. 117# We do not expect the waitcnt to be hoisted out of the loop. 118 119# GFX9-LABEL: waitcnt_vm_loop_load 120# GFX9-LABEL: bb.0: 121# GFX9-NOT: S_WAITCNT 39 122# GFX9-LABEL: bb.1: 123# GFX9: S_WAITCNT 39 124# GFX9-LABEL: bb.2: 125 126# GFX10-LABEL: waitcnt_vm_loop_load 127# GFX10-LABEL: bb.0: 128# GFX10-NOT: S_WAITCNT 16 129# GFX10-LABEL: bb.1: 130# GFX10: S_WAITCNT 16 131# GFX10-LABEL: bb.2: 132name: waitcnt_vm_loop_load 133body: | 134 bb.0: 135 successors: %bb.1 136 137 $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec 138 S_BRANCH %bb.1 139 140 bb.1: 141 successors: %bb.1, %bb.2 142 143 BUFFER_STORE_DWORD_OFFEN_exact $vgpr5, $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec 144 $vgpr7 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec 145 $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr7, implicit $exec 146 S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc 147 S_CBRANCH_SCC1 %bb.1, implicit killed $scc 148 S_BRANCH %bb.2 149 150 bb.2: 151 S_ENDPGM 0 152 153... 154--- 155 156# The loop contains a use of a value loaded outside of the loop, and no store 157# nor load. 158# We do not expect the waitcnt to be hoisted out of the loop. 159 160# GFX9-LABEL: waitcnt_vm_loop_no_store 161# GFX9-LABEL: bb.0: 162# GFX9-NOT: S_WAITCNT 39 163# GFX9-LABEL: bb.1: 164# GFX9: S_WAITCNT 39 165# GFX9-LABEL: bb.2: 166 167# GFX10-LABEL: waitcnt_vm_loop_no_store 168# GFX10-LABEL: bb.0: 169# GFX10-NOT: S_WAITCNT 16 170# GFX10-LABEL: bb.1: 171# GFX10: S_WAITCNT 16 172# GFX10-LABEL: bb.2: 173name: waitcnt_vm_loop_no_store 174body: | 175 bb.0: 176 successors: %bb.1 177 178 $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec 179 S_BRANCH %bb.1 180 181 bb.1: 182 successors: %bb.1, %bb.2 183 184 $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec 185 S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc 186 S_CBRANCH_SCC1 %bb.1, implicit killed $scc 187 S_BRANCH %bb.2 188 189 bb.2: 190 S_ENDPGM 0 191 192... 193--- 194 195# The loop contains a store, no load, and doesn't use any value loaded inside 196# or outside of the loop. There is only one use of the loaded value in the 197# exit block. 198# We don't expect any s_waitcnt vmcnt in the loop body or preheader, but expect 199# one in the exit block. 200 201 202# GFX9-LABEL: waitcnt_vm_loop_no_use 203# GFX9-LABEL: bb.0: 204# GFX9-NOT: S_WAITCNT 39 205# GFX9-LABEL: bb.1: 206# GFX9-NOT: S_WAITCNT 39 207# GFX9-LABEL: bb.2: 208 209# GFX10-LABEL: waitcnt_vm_loop_no_use 210# GFX10-LABEL: bb.0: 211# GFX10-NOT: S_WAITCNT 16 212# GFX10-LABEL: bb.1: 213# GFX10-NOT: S_WAITCNT 16 214# GFX10-LABEL: bb.2: 215name: waitcnt_vm_loop_no_use 216body: | 217 bb.0: 218 successors: %bb.1 219 220 $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec 221 S_BRANCH %bb.1 222 223 bb.1: 224 successors: %bb.1, %bb.2 225 226 BUFFER_STORE_DWORD_OFFEN_exact $vgpr5, $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec 227 $vgpr1 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec 228 S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc 229 S_CBRANCH_SCC1 %bb.1, implicit killed $scc 230 S_BRANCH %bb.2 231 232 bb.2: 233 $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 234 S_ENDPGM 0 235 236... 237--- 238 239# The loop loads a value that is not used in the loop, and uses a value loaded 240# outside of the loop. 241# We expect the waitcnt to be hoisted of the loop to wait a single time before 242# the loop is executed and avoid waiting for the load to complete on each 243# iteration. 244 245# GFX9-LABEL: waitcnt_vm_loop2 246# GFX9-LABEL: bb.0: 247# GFX9: S_WAITCNT 39 248# GFX9-LABEL: bb.1: 249# GFX9-NOT: S_WAITCNT 39 250# GFX9-LABEL: bb.2: 251 252# GFX10-LABEL: waitcnt_vm_loop2 253# GFX10-LABEL: bb.0: 254# GFX10: S_WAITCNT 16 255# GFX10-LABEL: bb.1: 256# GFX10-NOT: S_WAITCNT 16 257# GFX10-LABEL: bb.2: 258name: waitcnt_vm_loop2 259body: | 260 bb.0: 261 successors: %bb.1 262 263 $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec 264 S_BRANCH %bb.1 265 266 bb.1: 267 successors: %bb.1, %bb.2 268 269 $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec 270 $vgpr1 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec 271 S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc 272 S_CBRANCH_SCC1 %bb.1, implicit killed $scc 273 S_BRANCH %bb.2 274 275 bb.2: 276 S_ENDPGM 0 277 278... 279--- 280 281# Same as before with an additional store in the loop. We still expect the 282# waitcnt instructions to be hoisted. 283 284# GFX9-LABEL: waitcnt_vm_loop2_store 285# GFX9-LABEL: bb.0: 286# GFX9: S_WAITCNT 39 287# GFX9-LABEL: bb.1: 288# GFX9-NOT: S_WAITCNT 39 289# GFX9-LABEL: bb.2: 290 291# GFX10-LABEL: waitcnt_vm_loop2_store 292# GFX10-LABEL: bb.0: 293# GFX10: S_WAITCNT 16 294# GFX10-LABEL: bb.1: 295# GFX10-NOT: S_WAITCNT 16 296# GFX10-LABEL: bb.2: 297name: waitcnt_vm_loop2_store 298body: | 299 bb.0: 300 successors: %bb.1 301 302 $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec 303 S_BRANCH %bb.1 304 305 bb.1: 306 successors: %bb.1, %bb.2 307 308 $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec 309 $vgpr1 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec 310 BUFFER_STORE_DWORD_OFFEN_exact $vgpr5, $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec 311 S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc 312 S_CBRANCH_SCC1 %bb.1, implicit killed $scc 313 S_BRANCH %bb.2 314 315 bb.2: 316 S_ENDPGM 0 317 318... 319--- 320 321# Same as loop2 but the value loaded inside the loop is also used in the loop. 322# We do not expect the waitcnt to be hoisted out of the loop. 323 324# GFX9-LABEL: waitcnt_vm_loop2_use_in_loop 325# GFX9-LABEL: bb.0: 326# GFX9-NOT: S_WAITCNT 39 327# GFX9-LABEL: bb.1: 328# GFX9: S_WAITCNT 39 329# GFX9-LABEL: bb.2: 330 331# GFX10-LABEL: waitcnt_vm_loop2_use_in_loop 332# GFX10-LABEL: bb.0: 333# GFX10-NOT: S_WAITCNT 16 334# GFX10-LABEL: bb.1: 335# GFX10: S_WAITCNT 16 336# GFX10-LABEL: bb.2: 337name: waitcnt_vm_loop2_use_in_loop 338body: | 339 bb.0: 340 successors: %bb.1 341 342 $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec 343 S_BRANCH %bb.1 344 345 bb.1: 346 successors: %bb.1, %bb.2 347 348 $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec 349 $vgpr1 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec 350 $vgpr4 = V_ADD_U32_e32 $vgpr5, $vgpr1, implicit $exec 351 S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc 352 S_CBRANCH_SCC1 %bb.1, implicit killed $scc 353 S_BRANCH %bb.2 354 355 bb.2: 356 S_ENDPGM 0 357 358... 359--- 360 361# The loop contains a use of a value loaded outside of the loop, but we already 362# waited for that load to complete. The loop also loads a value that is not used 363# in the loop. We do not expect any waitcnt in the loop. 364 365# GFX9-LABEL: waitcnt_vm_loop2_nowait 366# GFX9-LABEL: bb.0: 367# GFX9: S_WAITCNT 39 368# GFX9-LABEL: bb.1: 369# GFX9-NOT: S_WAITCNT 39 370# GFX9-LABEL: bb.2: 371# GFX9-NOT: S_WAITCNT 39 372# GFX9-LABEL: bb.3: 373 374# GFX10-LABEL: waitcnt_vm_loop2_nowait 375# GFX10-LABEL: bb.0: 376# GFX10: S_WAITCNT 16 377# GFX10-LABEL: bb.1: 378# GFX10-NOT: S_WAITCNT 16 379# GFX10-LABEL: bb.2: 380# GFX10-NOT: S_WAITCNT 16 381# GFX10-LABEL: bb.3: 382name: waitcnt_vm_loop2_nowait 383body: | 384 bb.0: 385 successors: %bb.1 386 387 $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec 388 $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec 389 S_BRANCH %bb.1 390 391 bb.1: 392 successors: %bb.2 393 394 $vgpr3 = V_ADD_U32_e32 $vgpr4, $vgpr5, implicit $exec 395 $vgpr3 = V_ADD_U32_e32 $vgpr4, $vgpr5, implicit $exec 396 $vgpr3 = V_ADD_U32_e32 $vgpr4, $vgpr5, implicit $exec 397 398 S_BRANCH %bb.2 399 400 bb.2: 401 successors: %bb.2, %bb.3 402 403 $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec 404 $vgpr1 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec 405 S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc 406 S_CBRANCH_SCC1 %bb.2, implicit killed $scc 407 S_BRANCH %bb.3 408 409 bb.3: 410 S_ENDPGM 0 411 412... 413--- 414 415# Similar test case but for register intervals. 416 417# GFX9-LABEL: waitcnt_vm_loop2_reginterval 418# GFX9-LABEL: bb.0: 419# GFX9: S_WAITCNT 39 420# GFX9-LABEL: bb.1: 421# GFX9-NOT: S_WAITCNT 39 422# GFX9-LABEL: bb.2: 423 424# GFX10-LABEL: waitcnt_vm_loop2_reginterval 425# GFX10-LABEL: bb.0: 426# GFX10: S_WAITCNT 16 427# GFX10-LABEL: bb.1: 428# GFX10-NOT: S_WAITCNT 16 429# GFX10-LABEL: bb.2: 430name: waitcnt_vm_loop2_reginterval 431body: | 432 bb.0: 433 successors: %bb.1 434 435 $vgpr0_vgpr1_vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX4 $vgpr10_vgpr11, 0, 0, implicit $exec 436 437 S_BRANCH %bb.1 438 439 bb.1: 440 successors: %bb.1, %bb.2 441 442 $vgpr10 = COPY $vgpr0 443 444 $vgpr4_vgpr5_vgpr6_vgpr7 = IMAGE_SAMPLE_V4_V2 $vgpr20_vgpr21, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) 445 S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc 446 S_CBRANCH_SCC1 %bb.1, implicit killed $scc 447 S_BRANCH %bb.2 448 449 bb.2: 450 S_ENDPGM 0 451 452... 453--- 454 455# Similar test case but for register intervals. 456 457# GFX9-LABEL: waitcnt_vm_loop2_reginterval2 458# GFX9-LABEL: bb.0: 459# GFX9-NOT: S_WAITCNT 39 460# GFX9-LABEL: bb.1: 461# GFX9: S_WAITCNT 39 462# GFX9-LABEL: bb.2: 463 464# GFX10-LABEL: waitcnt_vm_loop2_reginterval2 465# GFX10-LABEL: bb.0: 466# GFX10-NOT: S_WAITCNT 16 467# GFX10-LABEL: bb.1: 468# GFX10: S_WAITCNT 16 469# GFX10-LABEL: bb.2: 470name: waitcnt_vm_loop2_reginterval2 471body: | 472 bb.0: 473 successors: %bb.1 474 475 $vgpr0_vgpr1_vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX4 $vgpr10_vgpr11, 0, 0, implicit $exec 476 477 S_BRANCH %bb.1 478 479 bb.1: 480 successors: %bb.1, %bb.2 481 482 $vgpr10 = COPY $vgpr0 483 484 $vgpr4_vgpr5_vgpr6_vgpr7 = IMAGE_SAMPLE_V4_V2 $vgpr20_vgpr21, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) 485 $vgpr11 = COPY $vgpr7 486 S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc 487 S_CBRANCH_SCC1 %bb.1, implicit killed $scc 488 S_BRANCH %bb.2 489 490 bb.2: 491 S_ENDPGM 0 492 493... 494--- 495 496# The loop loads a value that is not used in the loop, but uses a value loaded 497# outside of it. We expect the s_waitcnt instruction to be hoisted. 498# A s_waitcnt vmcnt(0) is generated to flush in the preheader, but for this 499# specific test case, it would be better to use vmcnt(1) instead. This is 500# currently not implemented. 501 502# GFX9-LABEL: waitcnt_vm_zero 503# GFX9-LABEL: bb.0: 504# GFX9: S_WAITCNT 3952 505# GFX9-LABEL: bb.1: 506# GFX9-NOT: S_WAITCNT 39 507# GFX9-LABEL: bb.2: 508 509# GFX10-LABEL: waitcnt_vm_zero 510# GFX10-LABEL: bb.0: 511# GFX10: S_WAITCNT 16240 512# GFX10-LABEL: bb.1: 513# GFX10-NOT: S_WAITCNT 16240 514# GFX10-LABEL: bb.2: 515 516name: waitcnt_vm_zero 517body: | 518 bb.0: 519 successors: %bb.1 520 521 $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec 522 $vgpr1 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec 523 S_BRANCH %bb.1 524 525 bb.1: 526 successors: %bb.1, %bb.2 527 528 $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr3, implicit $exec 529 $vgpr2 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr3, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec 530 S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc 531 S_CBRANCH_SCC1 %bb.1, implicit killed $scc 532 S_BRANCH %bb.2 533 534 bb.2: 535 S_ENDPGM 0 536 537... 538