1# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-optimize-exec-masking -o - %s | FileCheck %s 2 3--- | 4 target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" 5 6 define void @optimize_if_and_saveexec_xor(i32 %z, i32 %v) #0 { 7 main_body: 8 %id = call i32 @llvm.amdgcn.workitem.id.x() 9 %cc = icmp eq i32 %id, 0 10 %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %cc) 11 %1 = extractvalue { i1, i64 } %0, 0 12 %2 = extractvalue { i1, i64 } %0, 1 13 br i1 %1, label %if, label %end 14 15 if: ; preds = %main_body 16 %v.if = load volatile i32, i32 addrspace(1)* undef 17 br label %end 18 19 end: ; preds = %if, %main_body 20 %r = phi i32 [ 4, %main_body ], [ %v.if, %if ] 21 call void @llvm.amdgcn.end.cf(i64 %2) 22 store i32 %r, i32 addrspace(1)* undef 23 ret void 24 } 25 26 define void @optimize_if_and_saveexec(i32 %z, i32 %v) #0 { 27 main_body: 28 br i1 undef, label %if, label %end 29 30 if: 31 br label %end 32 33 end: 34 ret void 35 } 36 37 define void @optimize_if_or_saveexec(i32 %z, i32 %v) #0 { 38 main_body: 39 br i1 undef, label %if, label %end 40 41 if: 42 br label %end 43 44 end: 45 ret void 46 } 47 48 49 define void @optimize_if_and_saveexec_xor_valu_middle(i32 %z, i32 %v) #0 { 50 main_body: 51 %id = call i32 @llvm.amdgcn.workitem.id.x() 52 %cc = icmp eq i32 %id, 0 53 %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %cc) 54 %1 = extractvalue { i1, i64 } %0, 0 55 %2 = extractvalue { i1, i64 } %0, 1 56 store i32 %id, i32 addrspace(1)* undef 57 br i1 %1, label %if, label %end 58 59 if: ; preds = %main_body 60 %v.if = load volatile i32, i32 addrspace(1)* undef 61 br label %end 62 63 end: ; preds = %if, %main_body 64 %r = phi i32 [ 4, %main_body ], [ %v.if, %if ] 65 call void @llvm.amdgcn.end.cf(i64 %2) 66 store i32 %r, i32 addrspace(1)* undef 67 ret void 68 } 69 70 define void @optimize_if_and_saveexec_xor_wrong_reg(i32 %z, i32 %v) #0 { 71 main_body: 72 br i1 undef, label %if, label %end 73 74 if: 75 br label %end 76 77 end: 78 ret void 79 } 80 81 define void @optimize_if_and_saveexec_xor_modify_copy_to_exec(i32 %z, i32 %v) #0 { 82 main_body: 83 br i1 undef, label %if, label %end 84 85 if: 86 br label %end 87 88 end: 89 ret void 90 } 91 92 define void @optimize_if_and_saveexec_xor_live_out_setexec(i32 %z, i32 %v) #0 { 93 main_body: 94 br i1 undef, label %if, label %end 95 96 if: 97 br label %end 98 99 end: 100 ret void 101 } 102 103 define void @optimize_if_unknown_saveexec(i32 %z, i32 %v) #0 { 104 main_body: 105 br i1 undef, label %if, label %end 106 107 if: 108 br label %end 109 110 end: 111 ret void 112 } 113 114 define void @optimize_if_andn2_saveexec(i32 %z, i32 %v) #0 { 115 main_body: 116 br i1 undef, label %if, label %end 117 118 if: 119 br label %end 120 121 end: 122 ret void 123 } 124 125 define void @optimize_if_andn2_saveexec_no_commute(i32 %z, i32 %v) #0 { 126 main_body: 127 br i1 undef, label %if, label %end 128 129 if: 130 br label %end 131 132 end: 133 ret void 134 } 135 136 ; Function Attrs: nounwind readnone 137 declare i32 @llvm.amdgcn.workitem.id.x() #1 138 139 declare { i1, i64 } @llvm.amdgcn.if(i1) 140 141 declare void @llvm.amdgcn.end.cf(i64) 142 143 144 attributes #0 = { nounwind } 145 attributes #1 = { nounwind readnone } 146 147... 148--- 149# CHECK-LABEL: name: optimize_if_and_saveexec_xor{{$}} 150# CHECK: %sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 %vcc, implicit-def %exec, implicit-def %scc, implicit %exec 151# CHECK-NEXT: %sgpr0_sgpr1 = S_XOR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc 152# CHECK-NEXT: SI_MASK_BRANCH 153 154name: optimize_if_and_saveexec_xor 155alignment: 0 156exposesReturnsTwice: false 157legalized: false 158regBankSelected: false 159selected: false 160tracksRegLiveness: true 161liveins: 162 - { reg: '%vgpr0' } 163frameInfo: 164 isFrameAddressTaken: false 165 isReturnAddressTaken: false 166 hasStackMap: false 167 hasPatchPoint: false 168 stackSize: 0 169 offsetAdjustment: 0 170 maxAlignment: 0 171 adjustsStack: false 172 hasCalls: false 173 maxCallFrameSize: 0 174 hasOpaqueSPAdjustment: false 175 hasVAStart: false 176 hasMustTailInVarArgFunc: false 177body: | 178 bb.0.main_body: 179 successors: %bb.1.if, %bb.2.end 180 liveins: %vgpr0 181 182 %sgpr0_sgpr1 = COPY %exec 183 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec 184 %vgpr0 = V_MOV_B32_e32 4, implicit %exec 185 %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc 186 %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc 187 %exec = S_MOV_B64_term killed %sgpr2_sgpr3 188 SI_MASK_BRANCH %bb.2.end, implicit %exec 189 S_BRANCH %bb.1.if 190 191 bb.1.if: 192 successors: %bb.2.end 193 liveins: %sgpr0_sgpr1 194 195 %sgpr7 = S_MOV_B32 61440 196 %sgpr6 = S_MOV_B32 -1 197 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) 198 199 bb.2.end: 200 liveins: %vgpr0, %sgpr0_sgpr1 201 202 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc 203 %sgpr3 = S_MOV_B32 61440 204 %sgpr2 = S_MOV_B32 -1 205 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) 206 S_ENDPGM 207 208... 209--- 210# CHECK-LABEL: name: optimize_if_and_saveexec{{$}} 211# CHECK: %sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 %vcc, implicit-def %exec, implicit-def %scc, implicit %exec 212# CHECK-NEXT: SI_MASK_BRANCH 213 214name: optimize_if_and_saveexec 215alignment: 0 216exposesReturnsTwice: false 217legalized: false 218regBankSelected: false 219selected: false 220tracksRegLiveness: true 221liveins: 222 - { reg: '%vgpr0' } 223frameInfo: 224 isFrameAddressTaken: false 225 isReturnAddressTaken: false 226 hasStackMap: false 227 hasPatchPoint: false 228 stackSize: 0 229 offsetAdjustment: 0 230 maxAlignment: 0 231 adjustsStack: false 232 hasCalls: false 233 maxCallFrameSize: 0 234 hasOpaqueSPAdjustment: false 235 hasVAStart: false 236 hasMustTailInVarArgFunc: false 237body: | 238 bb.0.main_body: 239 successors: %bb.1.if, %bb.2.end 240 liveins: %vgpr0 241 242 %sgpr0_sgpr1 = COPY %exec 243 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec 244 %vgpr0 = V_MOV_B32_e32 4, implicit %exec 245 %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc 246 %exec = S_MOV_B64_term killed %sgpr2_sgpr3 247 SI_MASK_BRANCH %bb.2.end, implicit %exec 248 S_BRANCH %bb.1.if 249 250 bb.1.if: 251 successors: %bb.2.end 252 liveins: %sgpr0_sgpr1 253 254 %sgpr7 = S_MOV_B32 61440 255 %sgpr6 = S_MOV_B32 -1 256 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) 257 258 bb.2.end: 259 liveins: %vgpr0, %sgpr0_sgpr1 260 261 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc 262 %sgpr3 = S_MOV_B32 61440 263 %sgpr2 = S_MOV_B32 -1 264 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) 265 S_ENDPGM 266 267... 268--- 269# CHECK-LABEL: name: optimize_if_or_saveexec{{$}} 270# CHECK: %sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 %vcc, implicit-def %exec, implicit-def %scc, implicit %exec 271# CHECK-NEXT: SI_MASK_BRANCH 272 273name: optimize_if_or_saveexec 274alignment: 0 275exposesReturnsTwice: false 276legalized: false 277regBankSelected: false 278selected: false 279tracksRegLiveness: true 280liveins: 281 - { reg: '%vgpr0' } 282frameInfo: 283 isFrameAddressTaken: false 284 isReturnAddressTaken: false 285 hasStackMap: false 286 hasPatchPoint: false 287 stackSize: 0 288 offsetAdjustment: 0 289 maxAlignment: 0 290 adjustsStack: false 291 hasCalls: false 292 maxCallFrameSize: 0 293 hasOpaqueSPAdjustment: false 294 hasVAStart: false 295 hasMustTailInVarArgFunc: false 296body: | 297 bb.0.main_body: 298 successors: %bb.1.if, %bb.2.end 299 liveins: %vgpr0 300 301 %sgpr0_sgpr1 = COPY %exec 302 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec 303 %vgpr0 = V_MOV_B32_e32 4, implicit %exec 304 %sgpr2_sgpr3 = S_OR_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc 305 %exec = S_MOV_B64_term killed %sgpr2_sgpr3 306 SI_MASK_BRANCH %bb.2.end, implicit %exec 307 S_BRANCH %bb.1.if 308 309 bb.1.if: 310 successors: %bb.2.end 311 liveins: %sgpr0_sgpr1 312 313 %sgpr7 = S_MOV_B32 61440 314 %sgpr6 = S_MOV_B32 -1 315 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) 316 317 bb.2.end: 318 liveins: %vgpr0, %sgpr0_sgpr1 319 320 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc 321 %sgpr3 = S_MOV_B32 61440 322 %sgpr2 = S_MOV_B32 -1 323 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) 324 S_ENDPGM 325 326... 327--- 328# CHECK-LABEL: name: optimize_if_and_saveexec_xor_valu_middle 329# CHECK: %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc 330# CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET %vgpr0, undef %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) 331# CHECK-NEXT: %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc 332# CHECK-NEXT: %exec = COPY killed %sgpr2_sgpr3 333# CHECK-NEXT: SI_MASK_BRANCH 334name: optimize_if_and_saveexec_xor_valu_middle 335alignment: 0 336exposesReturnsTwice: false 337legalized: false 338regBankSelected: false 339selected: false 340tracksRegLiveness: true 341liveins: 342 - { reg: '%vgpr0' } 343frameInfo: 344 isFrameAddressTaken: false 345 isReturnAddressTaken: false 346 hasStackMap: false 347 hasPatchPoint: false 348 stackSize: 0 349 offsetAdjustment: 0 350 maxAlignment: 0 351 adjustsStack: false 352 hasCalls: false 353 maxCallFrameSize: 0 354 hasOpaqueSPAdjustment: false 355 hasVAStart: false 356 hasMustTailInVarArgFunc: false 357body: | 358 bb.0.main_body: 359 successors: %bb.1.if, %bb.2.end 360 liveins: %vgpr0 361 362 %sgpr0_sgpr1 = COPY %exec 363 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec 364 %vgpr0 = V_MOV_B32_e32 4, implicit %exec 365 %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc 366 BUFFER_STORE_DWORD_OFFSET %vgpr0, undef %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) 367 %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc 368 %exec = S_MOV_B64_term killed %sgpr2_sgpr3 369 SI_MASK_BRANCH %bb.2.end, implicit %exec 370 S_BRANCH %bb.1.if 371 372 bb.1.if: 373 successors: %bb.2.end 374 liveins: %sgpr0_sgpr1 375 376 %sgpr7 = S_MOV_B32 61440 377 %sgpr6 = S_MOV_B32 -1 378 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) 379 380 bb.2.end: 381 liveins: %vgpr0, %sgpr0_sgpr1 382 383 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc 384 %sgpr3 = S_MOV_B32 61440 385 %sgpr2 = S_MOV_B32 -1 386 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) 387 S_ENDPGM 388 389... 390--- 391# CHECK-LABEL: name: optimize_if_and_saveexec_xor_wrong_reg{{$}} 392# CHECK: %sgpr0_sgpr1 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc 393# CHECK-NEXT: %sgpr0_sgpr1 = S_XOR_B64 undef %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc 394# CHECK-NEXT: %exec = COPY %sgpr0_sgpr1 395# CHECK-NEXT: SI_MASK_BRANCH %bb.2.end, implicit %exec 396name: optimize_if_and_saveexec_xor_wrong_reg 397alignment: 0 398exposesReturnsTwice: false 399legalized: false 400regBankSelected: false 401selected: false 402tracksRegLiveness: true 403liveins: 404 - { reg: '%vgpr0' } 405frameInfo: 406 isFrameAddressTaken: false 407 isReturnAddressTaken: false 408 hasStackMap: false 409 hasPatchPoint: false 410 stackSize: 0 411 offsetAdjustment: 0 412 maxAlignment: 0 413 adjustsStack: false 414 hasCalls: false 415 maxCallFrameSize: 0 416 hasOpaqueSPAdjustment: false 417 hasVAStart: false 418 hasMustTailInVarArgFunc: false 419body: | 420 bb.0.main_body: 421 successors: %bb.1.if, %bb.2.end 422 liveins: %vgpr0 423 424 %sgpr6 = S_MOV_B32 -1 425 %sgpr7 = S_MOV_B32 61440 426 %sgpr0_sgpr1 = COPY %exec 427 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec 428 %vgpr0 = V_MOV_B32_e32 4, implicit %exec 429 %sgpr0_sgpr1 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc 430 %sgpr0_sgpr1 = S_XOR_B64 undef %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc 431 %exec = S_MOV_B64_term %sgpr0_sgpr1 432 SI_MASK_BRANCH %bb.2.end, implicit %exec 433 S_BRANCH %bb.1.if 434 435 bb.1.if: 436 successors: %bb.2.end 437 liveins: %sgpr0_sgpr1 , %sgpr4_sgpr5_sgpr6_sgpr7 438 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) 439 440 bb.2.end: 441 liveins: %vgpr0, %sgpr0_sgpr1, %sgpr4_sgpr5_sgpr6_sgpr7 442 443 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc 444 %sgpr3 = S_MOV_B32 61440 445 %sgpr2 = S_MOV_B32 -1 446 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) 447 S_ENDPGM 448 449... 450--- 451# CHECK-LABEL: name: optimize_if_and_saveexec_xor_modify_copy_to_exec{{$}} 452# CHECK: %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc 453# CHECK-NEXT: %sgpr2_sgpr3 = S_OR_B64 killed %sgpr2_sgpr3, 1, implicit-def %scc 454# CHECK-NEXT: %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc 455# CHECK-NEXT: %exec = COPY killed %sgpr2_sgpr3 456# CHECK-NEXT: SI_MASK_BRANCH %bb.2.end, implicit %exec 457 458name: optimize_if_and_saveexec_xor_modify_copy_to_exec 459alignment: 0 460exposesReturnsTwice: false 461legalized: false 462regBankSelected: false 463selected: false 464tracksRegLiveness: true 465liveins: 466 - { reg: '%vgpr0' } 467frameInfo: 468 isFrameAddressTaken: false 469 isReturnAddressTaken: false 470 hasStackMap: false 471 hasPatchPoint: false 472 stackSize: 0 473 offsetAdjustment: 0 474 maxAlignment: 0 475 adjustsStack: false 476 hasCalls: false 477 maxCallFrameSize: 0 478 hasOpaqueSPAdjustment: false 479 hasVAStart: false 480 hasMustTailInVarArgFunc: false 481body: | 482 bb.0.main_body: 483 successors: %bb.1.if, %bb.2.end 484 liveins: %vgpr0 485 486 %sgpr0_sgpr1 = COPY %exec 487 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec 488 %vgpr0 = V_MOV_B32_e32 4, implicit %exec 489 %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc 490 %sgpr2_sgpr3 = S_OR_B64 killed %sgpr2_sgpr3, 1, implicit-def %scc 491 %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc 492 %exec = S_MOV_B64_term killed %sgpr2_sgpr3 493 SI_MASK_BRANCH %bb.2.end, implicit %exec 494 S_BRANCH %bb.1.if 495 496 bb.1.if: 497 successors: %bb.2.end 498 liveins: %sgpr0_sgpr1 499 500 %sgpr7 = S_MOV_B32 61440 501 %sgpr6 = S_MOV_B32 -1 502 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) 503 504 bb.2.end: 505 liveins: %vgpr0, %sgpr0_sgpr1 506 507 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc 508 %sgpr0 = S_MOV_B32 0 509 %sgpr1 = S_MOV_B32 1 510 %sgpr2 = S_MOV_B32 -1 511 %sgpr3 = S_MOV_B32 61440 512 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) 513 S_ENDPGM 514 515... 516--- 517# CHECK-LABEL: name: optimize_if_and_saveexec_xor_live_out_setexec{{$}} 518# CHECK: %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc 519# CHECK-NEXT: %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc 520# CHECK-NEXT: %exec = COPY %sgpr2_sgpr3 521# CHECK-NEXT: SI_MASK_BRANCH 522name: optimize_if_and_saveexec_xor_live_out_setexec 523alignment: 0 524exposesReturnsTwice: false 525legalized: false 526regBankSelected: false 527selected: false 528tracksRegLiveness: true 529liveins: 530 - { reg: '%vgpr0' } 531frameInfo: 532 isFrameAddressTaken: false 533 isReturnAddressTaken: false 534 hasStackMap: false 535 hasPatchPoint: false 536 stackSize: 0 537 offsetAdjustment: 0 538 maxAlignment: 0 539 adjustsStack: false 540 hasCalls: false 541 maxCallFrameSize: 0 542 hasOpaqueSPAdjustment: false 543 hasVAStart: false 544 hasMustTailInVarArgFunc: false 545body: | 546 bb.0.main_body: 547 successors: %bb.1.if, %bb.2.end 548 liveins: %vgpr0 549 550 %sgpr0_sgpr1 = COPY %exec 551 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec 552 %vgpr0 = V_MOV_B32_e32 4, implicit %exec 553 %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc 554 %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc 555 %exec = S_MOV_B64_term %sgpr2_sgpr3 556 SI_MASK_BRANCH %bb.2.end, implicit %exec 557 S_BRANCH %bb.1.if 558 559 bb.1.if: 560 successors: %bb.2.end 561 liveins: %sgpr0_sgpr1, %sgpr2_sgpr3 562 S_SLEEP 0, implicit %sgpr2_sgpr3 563 %sgpr7 = S_MOV_B32 61440 564 %sgpr6 = S_MOV_B32 -1 565 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) 566 567 bb.2.end: 568 liveins: %vgpr0, %sgpr0_sgpr1 569 570 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc 571 %sgpr3 = S_MOV_B32 61440 572 %sgpr2 = S_MOV_B32 -1 573 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) 574 S_ENDPGM 575 576... 577 578# CHECK-LABEL: name: optimize_if_unknown_saveexec{{$}} 579# CHECK: %sgpr0_sgpr1 = COPY %exec 580# CHECK: %sgpr2_sgpr3 = S_LSHR_B64 %sgpr0_sgpr1, killed %vcc_lo, implicit-def %scc 581# CHECK-NEXT: %exec = COPY killed %sgpr2_sgpr3 582# CHECK-NEXT: SI_MASK_BRANCH %bb.2.end, implicit %exec 583 584name: optimize_if_unknown_saveexec 585alignment: 0 586exposesReturnsTwice: false 587legalized: false 588regBankSelected: false 589selected: false 590tracksRegLiveness: true 591liveins: 592 - { reg: '%vgpr0' } 593frameInfo: 594 isFrameAddressTaken: false 595 isReturnAddressTaken: false 596 hasStackMap: false 597 hasPatchPoint: false 598 stackSize: 0 599 offsetAdjustment: 0 600 maxAlignment: 0 601 adjustsStack: false 602 hasCalls: false 603 maxCallFrameSize: 0 604 hasOpaqueSPAdjustment: false 605 hasVAStart: false 606 hasMustTailInVarArgFunc: false 607body: | 608 bb.0.main_body: 609 successors: %bb.1.if, %bb.2.end 610 liveins: %vgpr0 611 612 %sgpr0_sgpr1 = COPY %exec 613 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec 614 %vgpr0 = V_MOV_B32_e32 4, implicit %exec 615 %sgpr2_sgpr3 = S_LSHR_B64 %sgpr0_sgpr1, killed %vcc_lo, implicit-def %scc 616 %exec = S_MOV_B64_term killed %sgpr2_sgpr3 617 SI_MASK_BRANCH %bb.2.end, implicit %exec 618 S_BRANCH %bb.1.if 619 620 bb.1.if: 621 successors: %bb.2.end 622 liveins: %sgpr0_sgpr1 623 624 %sgpr7 = S_MOV_B32 61440 625 %sgpr6 = S_MOV_B32 -1 626 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) 627 628 bb.2.end: 629 liveins: %vgpr0, %sgpr0_sgpr1 630 631 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc 632 %sgpr3 = S_MOV_B32 61440 633 %sgpr2 = S_MOV_B32 -1 634 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) 635 S_ENDPGM 636 637... 638--- 639# CHECK-LABEL: name: optimize_if_andn2_saveexec{{$}} 640# CHECK: %sgpr0_sgpr1 = S_ANDN2_SAVEEXEC_B64 %vcc, implicit-def %exec, implicit-def %scc, implicit %exec 641# CHECK-NEXT: SI_MASK_BRANCH 642 643name: optimize_if_andn2_saveexec 644alignment: 0 645exposesReturnsTwice: false 646legalized: false 647regBankSelected: false 648selected: false 649tracksRegLiveness: true 650liveins: 651 - { reg: '%vgpr0' } 652frameInfo: 653 isFrameAddressTaken: false 654 isReturnAddressTaken: false 655 hasStackMap: false 656 hasPatchPoint: false 657 stackSize: 0 658 offsetAdjustment: 0 659 maxAlignment: 0 660 adjustsStack: false 661 hasCalls: false 662 maxCallFrameSize: 0 663 hasOpaqueSPAdjustment: false 664 hasVAStart: false 665 hasMustTailInVarArgFunc: false 666body: | 667 bb.0.main_body: 668 successors: %bb.1.if, %bb.2.end 669 liveins: %vgpr0 670 671 %sgpr0_sgpr1 = COPY %exec 672 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec 673 %vgpr0 = V_MOV_B32_e32 4, implicit %exec 674 %sgpr2_sgpr3 = S_ANDN2_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc 675 %exec = S_MOV_B64_term killed %sgpr2_sgpr3 676 SI_MASK_BRANCH %bb.2.end, implicit %exec 677 S_BRANCH %bb.1.if 678 679 bb.1.if: 680 successors: %bb.2.end 681 liveins: %sgpr0_sgpr1 682 683 %sgpr7 = S_MOV_B32 61440 684 %sgpr6 = S_MOV_B32 -1 685 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) 686 687 bb.2.end: 688 liveins: %vgpr0, %sgpr0_sgpr1 689 690 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc 691 %sgpr3 = S_MOV_B32 61440 692 %sgpr2 = S_MOV_B32 -1 693 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) 694 S_ENDPGM 695 696... 697--- 698# CHECK-LABEL: name: optimize_if_andn2_saveexec_no_commute{{$}} 699# CHECK: %sgpr2_sgpr3 = S_ANDN2_B64 killed %vcc, %sgpr0_sgpr1, implicit-def %scc 700# CHECK-NEXT: %exec = COPY killed %sgpr2_sgpr3 701# CHECK-NEXT: SI_MASK_BRANCH %bb.2.end, implicit %exec 702name: optimize_if_andn2_saveexec_no_commute 703alignment: 0 704exposesReturnsTwice: false 705legalized: false 706regBankSelected: false 707selected: false 708tracksRegLiveness: true 709liveins: 710 - { reg: '%vgpr0' } 711frameInfo: 712 isFrameAddressTaken: false 713 isReturnAddressTaken: false 714 hasStackMap: false 715 hasPatchPoint: false 716 stackSize: 0 717 offsetAdjustment: 0 718 maxAlignment: 0 719 adjustsStack: false 720 hasCalls: false 721 maxCallFrameSize: 0 722 hasOpaqueSPAdjustment: false 723 hasVAStart: false 724 hasMustTailInVarArgFunc: false 725body: | 726 bb.0.main_body: 727 successors: %bb.1.if, %bb.2.end 728 liveins: %vgpr0 729 730 %sgpr0_sgpr1 = COPY %exec 731 %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec 732 %vgpr0 = V_MOV_B32_e32 4, implicit %exec 733 %sgpr2_sgpr3 = S_ANDN2_B64 killed %vcc, %sgpr0_sgpr1, implicit-def %scc 734 %exec = S_MOV_B64_term killed %sgpr2_sgpr3 735 SI_MASK_BRANCH %bb.2.end, implicit %exec 736 S_BRANCH %bb.1.if 737 738 bb.1.if: 739 successors: %bb.2.end 740 liveins: %sgpr0_sgpr1 741 742 %sgpr7 = S_MOV_B32 61440 743 %sgpr6 = S_MOV_B32 -1 744 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) 745 746 bb.2.end: 747 liveins: %vgpr0, %sgpr0_sgpr1 748 749 %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc 750 %sgpr3 = S_MOV_B32 61440 751 %sgpr2 = S_MOV_B32 -1 752 BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) 753 S_ENDPGM 754 755... 756