1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN 3 4define amdgpu_cs void @if_then(<4 x i32> inreg %input, <4 x i32> inreg %output, <3 x i32> %LocalInvocationId) { 5; GCN-LABEL: if_then: 6; GCN: ; %bb.0: ; %.entry 7; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 8; GCN-NEXT: v_mov_b32_e32 v3, 0 9; GCN-NEXT: s_and_saveexec_b32 s0, vcc_lo 10; GCN-NEXT: ; %bb.1: ; %.bb0 11; GCN-NEXT: v_mov_b32_e32 v3, 1 12; GCN-NEXT: ; %bb.2: ; %.merge 13; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s0 14; GCN-NEXT: v_cmp_lt_u32_e32 vcc_lo, 3, v0 15; GCN-NEXT: s_and_saveexec_b32 s0, vcc_lo 16; GCN-NEXT: s_cbranch_execz .LBB0_4 17; GCN-NEXT: ; %bb.3: ; %.then 18; GCN-NEXT: v_mov_b32_e32 v1, v3 19; GCN-NEXT: s_not_b32 exec_lo, exec_lo 20; GCN-NEXT: v_mov_b32_e32 v1, 0 21; GCN-NEXT: s_not_b32 exec_lo, exec_lo 22; GCN-NEXT: s_or_saveexec_b32 s1, -1 23; GCN-NEXT: v_mov_b32_e32 v2, 0 24; GCN-NEXT: v_mov_b32_dpp v2, v1 row_shr:1 row_mask:0xf bank_mask:0xf 25; GCN-NEXT: s_mov_b32 exec_lo, s1 26; GCN-NEXT: v_mov_b32_e32 v0, v2 27; GCN-NEXT: v_mov_b32_e32 v4, -1 28; GCN-NEXT: v_mov_b32_e32 v3, 0 29; GCN-NEXT: buffer_store_dword v4, v0, s[4:7], 0 offen 30; GCN-NEXT: .LBB0_4: ; %.end 31; GCN-NEXT: s_waitcnt_depctr 0xffe3 32; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s0 33; GCN-NEXT: v_mov_b32_e32 v0, -1 34; GCN-NEXT: buffer_store_dword v0, v3, s[4:7], 0 offen 35; GCN-NEXT: s_endpgm 36.entry: 37 %LocalInvocationId.i0 = extractelement <3 x i32> %LocalInvocationId, i32 0 38 %.not10002 = icmp eq i32 %LocalInvocationId.i0, 0 39 br i1 %.not10002, label %.merge, label %.bb0 40 41.bb0: 42 br label %.merge 43 44.merge: 45 %src = phi i32 [ 0, %.entry ], [ 1, %.bb0 ] 46 %i530 = icmp ult i32 %LocalInvocationId.i0, 4 47 br i1 %i530, label %.end, label %.then 48 49.then: 50 %i562 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %src, i32 0) 51 %i563 = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %i562, i32 273, i32 15, i32 15, i1 false) 52 %i564 = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %i563) 53 call void @llvm.amdgcn.raw.buffer.store.i32(i32 -1, <4 x i32> %output, i32 %i564, i32 0, i32 0) 54 br label %.end 55 56.end: 57 %idx = phi i32 [ 0, %.then ], [ %src, %.merge ] 58 call void @llvm.amdgcn.raw.buffer.store.i32(i32 -1, <4 x i32> %output, i32 %idx, i32 0, i32 0) 59 ret void 60} 61 62 63define amdgpu_cs void @if_else_vgpr_opt(<4 x i32> inreg %input, <4 x i32> inreg %output, <3 x i32> %LocalInvocationId) { 64; GCN-LABEL: if_else_vgpr_opt: 65; GCN: ; %bb.0: ; %.entry 66; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 67; GCN-NEXT: v_mov_b32_e32 v3, 0 68; GCN-NEXT: s_and_saveexec_b32 s0, vcc_lo 69; GCN-NEXT: ; %bb.1: ; %.bb0 70; GCN-NEXT: v_mov_b32_e32 v3, 1 71; GCN-NEXT: ; %bb.2: ; %.merge 72; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s0 73; GCN-NEXT: v_cmp_lt_u32_e32 vcc_lo, 3, v0 74; GCN-NEXT: s_and_saveexec_b32 s0, vcc_lo 75; GCN-NEXT: s_xor_b32 s0, exec_lo, s0 76; GCN-NEXT: s_cbranch_execnz .LBB1_5 77; GCN-NEXT: ; %bb.3: ; %Flow 78; GCN-NEXT: s_andn2_saveexec_b32 s0, s0 79; GCN-NEXT: s_cbranch_execnz .LBB1_6 80; GCN-NEXT: .LBB1_4: ; %.end 81; GCN-NEXT: s_endpgm 82; GCN-NEXT: .LBB1_5: ; %.else 83; GCN-NEXT: s_or_saveexec_b32 s1, -1 84; GCN-NEXT: v_mov_b32_e32 v1, 0 85; GCN-NEXT: s_mov_b32 exec_lo, s1 86; GCN-NEXT: v_mov_b32_e32 v2, v3 87; GCN-NEXT: s_not_b32 exec_lo, exec_lo 88; GCN-NEXT: v_mov_b32_e32 v2, 0 89; GCN-NEXT: s_not_b32 exec_lo, exec_lo 90; GCN-NEXT: s_or_saveexec_b32 s1, -1 91; GCN-NEXT: v_mov_b32_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf 92; GCN-NEXT: s_mov_b32 exec_lo, s1 93; GCN-NEXT: v_mov_b32_e32 v0, v1 94; GCN-NEXT: v_mov_b32_e32 v3, -1 95; GCN-NEXT: buffer_store_dword v3, v0, s[4:7], 0 offen 96; GCN-NEXT: ; implicit-def: $vgpr3 97; GCN-NEXT: s_andn2_saveexec_b32 s0, s0 98; GCN-NEXT: s_cbranch_execz .LBB1_4 99; GCN-NEXT: .LBB1_6: ; %.then 100; GCN-NEXT: v_mov_b32_e32 v0, -1 101; GCN-NEXT: buffer_store_dword v0, v3, s[4:7], 0 offen 102; GCN-NEXT: s_endpgm 103.entry: 104 %LocalInvocationId.i0 = extractelement <3 x i32> %LocalInvocationId, i32 0 105 %.not10002 = icmp eq i32 %LocalInvocationId.i0, 0 106 br i1 %.not10002, label %.merge, label %.bb0 107 108.bb0: 109 br label %.merge 110 111.merge: 112 %src = phi i32 [ 0, %.entry ], [ 1, %.bb0 ] 113 %i530 = icmp ult i32 %LocalInvocationId.i0, 4 114 br i1 %i530, label %.then, label %.else 115 116.then: 117 call void @llvm.amdgcn.raw.buffer.store.i32(i32 -1, <4 x i32> %output, i32 %src, i32 0, i32 0) 118 br label %.end 119 120.else: 121 %i562 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %src, i32 0) 122 %i563 = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %i562, i32 273, i32 15, i32 15, i1 false) 123 %i564 = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %i563) 124 call void @llvm.amdgcn.raw.buffer.store.i32(i32 -1, <4 x i32> %output, i32 %i564, i32 0, i32 0) 125 br label %.end 126 127.end: 128 ret void 129} 130 131declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #0 132declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) #0 133declare i32 @llvm.amdgcn.strict.wwm.i32(i32) #1 134declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32 immarg, i32 immarg) #2 135 136attributes #0 = { convergent nounwind readnone willreturn } 137attributes #1 = { convergent nounwind readnone speculatable willreturn } 138attributes #2 = { nounwind willreturn writeonly } 139 140