1; RUN: llc -mtriple=amdgcn -amdgpu-set-wave-priority=true -o - %s | \ 2; RUN: FileCheck %s 3 4; CHECK-LABEL: no_setprio: 5; CHECK-NOT: s_setprio 6; CHECK: ; return to shader part epilog 7define amdgpu_ps <2 x float> @no_setprio() { 8 ret <2 x float> <float 0.0, float 0.0> 9} 10 11; CHECK-LABEL: vmem_in_exit_block: 12; CHECK: s_setprio 3 13; CHECK: buffer_load_dwordx2 14; CHECK-NEXT: s_setprio 0 15; CHECK: ; return to shader part epilog 16define amdgpu_ps <2 x float> @vmem_in_exit_block(<4 x i32> inreg %p) { 17 %v = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %p, i32 0, i32 0, i32 0, i32 0) 18 ret <2 x float> %v 19} 20 21; CHECK-LABEL: branch: 22; CHECK: s_setprio 3 23; CHECK: s_cbranch_scc0 [[A:.*]] 24; CHECK: {{.*}}: ; %b 25; CHECK: buffer_load_dwordx2 26; CHECK-NEXT: s_setprio 0 27; CHECK: s_branch [[EXIT:.*]] 28; CHECK: [[A]]: ; %a 29; CHECK-NEXT: s_setprio 0 30; CHECK: s_branch [[EXIT]] 31; CHECK-NEXT: [[EXIT]]: 32define amdgpu_ps <2 x float> @branch(<4 x i32> inreg %p, i32 inreg %i) { 33 %cond = icmp eq i32 %i, 0 34 br i1 %cond, label %a, label %b 35 36a: 37 ret <2 x float> <float 0.0, float 0.0> 38 39b: 40 %v = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %p, i32 0, i32 0, i32 0, i32 0) 41 ret <2 x float> %v 42} 43 44; CHECK-LABEL: setprio_follows_setprio: 45; CHECK: s_setprio 3 46; CHECK: buffer_load_dwordx2 47; CHECK: s_cbranch_scc1 [[C:.*]] 48; CHECK: {{.*}}: ; %a 49; CHECK: buffer_load_dwordx2 50; CHECK-NEXT: s_setprio 0 51; CHECK: s_cbranch_scc1 [[C]] 52; CHECK: {{.*}}: ; %b 53; CHECK-NOT: s_setprio 54; CHECK: s_branch [[EXIT:.*]] 55; CHECK: [[C]]: ; %c 56; CHECK-NEXT: s_setprio 0 57; CHECK: s_branch [[EXIT]] 58; CHECK: [[EXIT]]: 59define amdgpu_ps <2 x float> @setprio_follows_setprio(<4 x i32> inreg %p, i32 inreg %i) { 60entry: 61 %v1 = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %p, i32 0, i32 0, i32 0, i32 0) 62 %cond1 = icmp ne i32 %i, 0 63 br i1 %cond1, label %a, label %c 64 65a: 66 %v2 = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %p, i32 0, i32 0, i32 1, i32 0) 67 %cond2 = icmp ne i32 %i, 1 68 br i1 %cond2, label %b, label %c 69 70b: 71 ret <2 x float> %v2 72 73c: 74 %v3 = phi <2 x float> [%v1, %entry], [%v2, %a] 75 %v4 = fadd <2 x float> %v1, %v3 76 ret <2 x float> %v4 77} 78 79; CHECK-LABEL: loop: 80; CHECK: {{.*}}: ; %entry 81; CHECK: s_setprio 3 82; CHECK-NOT: s_setprio 83; CHECK: [[LOOP:.*]]: ; %loop 84; CHECK-NOT: s_setprio 85; CHECK: buffer_load_dwordx2 86; CHECK-NOT: s_setprio 87; CHECK: s_cbranch_scc1 [[LOOP]] 88; CHECK-NEXT: {{.*}}: ; %exit 89; CHECK-NEXT: s_setprio 0 90define amdgpu_ps <2 x float> @loop(<4 x i32> inreg %p) { 91entry: 92 br label %loop 93 94loop: 95 %i = phi i32 [0, %entry], [%i2, %loop] 96 %sum = phi <2 x float> [<float 0.0, float 0.0>, %entry], [%sum2, %loop] 97 98 %i2 = add i32 %i, 1 99 100 %v = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %p, i32 %i, i32 0, i32 0, i32 0) 101 %sum2 = fadd <2 x float> %sum, %v 102 103 %cond = icmp ult i32 %i2, 5 104 br i1 %cond, label %loop, label %exit 105 106exit: 107 ret <2 x float> %sum2 108} 109 110; CHECK-LABEL: edge_split: 111; CHECK: s_setprio 3 112; CHECK: buffer_load_dwordx2 113; CHECK-NOT: s_setprio 114; CHECK: s_cbranch_scc1 [[ANOTHER_LOAD:.*]] 115; CHECK: {{.*}}: ; %loop.preheader 116; CHECK-NEXT: s_setprio 0 117; CHECK: [[LOOP:.*]]: ; %loop 118; CHECK-NOT: s_setprio 119; CHECK: s_cbranch_scc1 [[LOOP]] 120; CHECK {{.*}}: ; %exit 121; CHECK-NOT: s_setprio 122; CHECK: s_branch [[RET:.*]] 123; CHECK: [[ANOTHER_LOAD]]: ; %another_load 124; CHECK: buffer_load_dwordx2 125; CHECK-NEXT: s_setprio 0 126; CHECK: s_branch [[RET]] 127; CHECK: [[RET]]: 128define amdgpu_ps <2 x float> @edge_split(<4 x i32> inreg %p, i32 inreg %x) { 129entry: 130 %v = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %p, i32 0, i32 0, i32 0, i32 0) 131 %cond = icmp ne i32 %x, 0 132 br i1 %cond, label %loop, label %another_load 133 134loop: 135 %i = phi i32 [0, %entry], [%i2, %loop] 136 %mul = phi <2 x float> [%v, %entry], [%mul2, %loop] 137 138 %i2 = add i32 %i, 1 139 %mul2 = fmul <2 x float> %mul, %v 140 141 %cond2 = icmp ult i32 %i2, 5 142 br i1 %cond2, label %loop, label %exit 143 144exit: 145 ret <2 x float> %mul2 146 147another_load: 148 %v2 = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %p, i32 0, i32 0, i32 1, i32 0) 149 %sum = fadd <2 x float> %v, %v2 150 ret <2 x float> %sum 151} 152 153declare <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32>, i32, i32, i32, i32) nounwind 154