1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 2 3; Make sure to test with f32 and i32 compares. If we have to use float 4; compares, we always have multiple condition registers. If we can do 5; scalar compares, we don't want to use multiple condition registers. 6 7; GCN-LABEL: {{^}}opt_select_i32_and_cmp_i32: 8; GCN-DAG: s_cmp_lg_u32 9; GCN: s_cselect_b64 [[CMP1:s\[[0-9]+:[0-9]+\]]], -1, 0 10; GCN-DAG: s_cmp_lg_u32 11; GCN: s_cselect_b64 [[CMP2:s\[[0-9]+:[0-9]+\]]], -1, 0 12; GCN: s_and_b64 vcc, [[CMP1]], [[CMP2]] 13; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc 14; GCN-NOT: [[RESULT]] 15; GCN: buffer_store_dword [[RESULT]] 16define amdgpu_kernel void @opt_select_i32_and_cmp_i32(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %x, i32 %y) #0 { 17 %icmp0 = icmp ne i32 %a, %b 18 %icmp1 = icmp ne i32 %a, %c 19 %and = and i1 %icmp0, %icmp1 20 %select = select i1 %and, i32 %x, i32 %y 21 store i32 %select, i32 addrspace(1)* %out 22 ret void 23} 24 25; GCN-LABEL: {{^}}opt_select_i32_and_cmp_f32: 26; GCN-DAG: v_cmp_lg_f32_e32 vcc 27; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]] 28; GCN: s_and_b64 vcc, vcc, [[CMP1]] 29; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc 30; GCN-NOT: [[RESULT]] 31; GCN: buffer_store_dword [[RESULT]] 32define amdgpu_kernel void @opt_select_i32_and_cmp_f32(i32 addrspace(1)* %out, float %a, float %b, float %c, i32 %x, i32 %y) #0 { 33 %fcmp0 = fcmp one float %a, %b 34 %fcmp1 = fcmp one float %a, %c 35 %and = and i1 %fcmp0, %fcmp1 36 %select = select i1 %and, i32 %x, i32 %y 37 store i32 %select, i32 addrspace(1)* %out 38 ret void 39} 40 41; GCN-LABEL: {{^}}opt_select_i64_and_cmp_i32: 42; GCN-DAG: s_cmp_lg_u32 43; GCN: s_cselect_b64 [[CMP1:s\[[0-9]+:[0-9]+\]]], -1, 0 44; GCN-DAG: s_cmp_lg_u32 45; GCN: s_cselect_b64 [[CMP2:s\[[0-9]+:[0-9]+\]]], -1, 0 46; GCN: s_and_b64 vcc, [[CMP1]], [[CMP2]] 47; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc 48; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc 49; GCN: buffer_store_dwordx2 v[[[RESULT0]]:[[RESULT1]]] 50define amdgpu_kernel void @opt_select_i64_and_cmp_i32(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i64 %x, i64 %y) #0 { 51 %icmp0 = icmp ne i32 %a, %b 52 %icmp1 = icmp ne i32 %a, %c 53 %and = and i1 %icmp0, %icmp1 54 %select = select i1 %and, i64 %x, i64 %y 55 store i64 %select, i64 addrspace(1)* %out 56 ret void 57} 58 59; GCN-LABEL: {{^}}opt_select_i64_and_cmp_f32: 60; GCN-DAG: v_cmp_lg_f32_e32 vcc, 61; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]] 62; GCN: s_and_b64 vcc, vcc, [[CMP1]] 63; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc 64; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc 65; GCN: buffer_store_dwordx2 v[[[RESULT0]]:[[RESULT1]]] 66define amdgpu_kernel void @opt_select_i64_and_cmp_f32(i64 addrspace(1)* %out, float %a, float %b, float %c, i64 %x, i64 %y) #0 { 67 %fcmp0 = fcmp one float %a, %b 68 %fcmp1 = fcmp one float %a, %c 69 %and = and i1 %fcmp0, %fcmp1 70 %select = select i1 %and, i64 %x, i64 %y 71 store i64 %select, i64 addrspace(1)* %out 72 ret void 73} 74 75; GCN-LABEL: {{^}}opt_select_i32_or_cmp_i32: 76; GCN-DAG: s_cmp_lg_u32 77; GCN: s_cselect_b64 [[CMP1:s\[[0-9]+:[0-9]+\]]], -1, 0 78; GCN-DAG: s_cmp_lg_u32 79; GCN: s_cselect_b64 [[CMP2:s\[[0-9]+:[0-9]+\]]], -1, 0 80; GCN: s_or_b64 vcc, [[CMP1]], [[CMP2]] 81; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc 82; GCN-NOT: [[RESULT]] 83; GCN: buffer_store_dword [[RESULT]] 84; GCN: s_endpgm 85define amdgpu_kernel void @opt_select_i32_or_cmp_i32(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %x, i32 %y) #0 { 86 %icmp0 = icmp ne i32 %a, %b 87 %icmp1 = icmp ne i32 %a, %c 88 %or = or i1 %icmp0, %icmp1 89 %select = select i1 %or, i32 %x, i32 %y 90 store i32 %select, i32 addrspace(1)* %out 91 ret void 92} 93 94; GCN-LABEL: {{^}}opt_select_i32_or_cmp_f32: 95; GCN-DAG: v_cmp_lg_f32_e32 vcc 96; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]] 97; GCN: s_or_b64 vcc, vcc, [[CMP1]] 98; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc 99; GCN-NOT: [[RESULT]] 100; GCN: buffer_store_dword [[RESULT]] 101define amdgpu_kernel void @opt_select_i32_or_cmp_f32(i32 addrspace(1)* %out, float %a, float %b, float %c, i32 %x, i32 %y) #0 { 102 %fcmp0 = fcmp one float %a, %b 103 %fcmp1 = fcmp one float %a, %c 104 %or = or i1 %fcmp0, %fcmp1 105 %select = select i1 %or, i32 %x, i32 %y 106 store i32 %select, i32 addrspace(1)* %out 107 ret void 108} 109 110; GCN-LABEL: {{^}}opt_select_i64_or_cmp_i32: 111; GCN-DAG: s_cmp_lg_u32 112; GCN: s_cselect_b64 [[CMP1:s\[[0-9]+:[0-9]+\]]], -1, 0 113; GCN-DAG: s_cmp_lg_u32 114; GCN: s_cselect_b64 [[CMP2:s\[[0-9]+:[0-9]+\]]], -1, 0 115; GCN: s_or_b64 vcc, [[CMP1]], [[CMP2]] 116; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc 117; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc 118; GCN: buffer_store_dwordx2 v[[[RESULT0]]:[[RESULT1]]] 119define amdgpu_kernel void @opt_select_i64_or_cmp_i32(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i64 %x, i64 %y) #0 { 120 %icmp0 = icmp ne i32 %a, %b 121 %icmp1 = icmp ne i32 %a, %c 122 %or = or i1 %icmp0, %icmp1 123 %select = select i1 %or, i64 %x, i64 %y 124 store i64 %select, i64 addrspace(1)* %out 125 ret void 126} 127 128; GCN-LABEL: {{^}}opt_select_i64_or_cmp_f32: 129; GCN-DAG: v_cmp_lg_f32_e32 vcc, 130; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]] 131; GCN: s_or_b64 vcc, vcc, [[CMP1]] 132; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc 133; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc 134; GCN: buffer_store_dwordx2 v[[[RESULT0]]:[[RESULT1]]] 135define amdgpu_kernel void @opt_select_i64_or_cmp_f32(i64 addrspace(1)* %out, float %a, float %b, float %c, i64 %x, i64 %y) #0 { 136 %fcmp0 = fcmp one float %a, %b 137 %fcmp1 = fcmp one float %a, %c 138 %or = or i1 %fcmp0, %fcmp1 139 %select = select i1 %or, i64 %x, i64 %y 140 store i64 %select, i64 addrspace(1)* %out 141 ret void 142} 143 144; GCN-LABEL: {{^}}regression: 145; GCN: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 1.0 146; GCN: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 0 147; GCN: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 0 148 149define amdgpu_kernel void @regression(float addrspace(1)* %out, float %c0, float %c1) #0 { 150entry: 151 %cmp0 = fcmp oeq float %c0, 1.0 152 br i1 %cmp0, label %if0, label %endif 153 154if0: 155 %cmp1 = fcmp oeq float %c1, 0.0 156 br i1 %cmp1, label %if1, label %endif 157 158if1: 159 %cmp2 = xor i1 %cmp1, true 160 br label %endif 161 162endif: 163 %tmp0 = phi i1 [ true, %entry ], [ %cmp2, %if1 ], [ false, %if0 ] 164 %tmp2 = select i1 %tmp0, float 4.0, float 0.0 165 store float %tmp2, float addrspace(1)* %out 166 ret void 167} 168 169attributes #0 = { nounwind } 170