1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 2; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 3; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 4 5; Make sure fdiv is promoted to f32. 6 7; GCN-LABEL: {{^}}v_fdiv_f16 8; SI: v_cvt_f32_f16 9; SI: v_cvt_f32_f16 10; SI: v_div_scale_f32 11; SI-DAG: v_div_scale_f32 12; SI-DAG: v_rcp_f32 13; SI: v_fma_f32 14; SI: v_fma_f32 15; SI: v_mul_f32 16; SI: v_fma_f32 17; SI: v_fma_f32 18; SI: v_fma_f32 19; SI: v_div_fmas_f32 20; SI: v_div_fixup_f32 21; SI: v_cvt_f16_f32 22 23; VI: flat_load_ushort [[LHS:v[0-9]+]] 24; VI: flat_load_ushort [[RHS:v[0-9]+]] 25 26; VI-DAG: v_cvt_f32_f16_e32 [[CVT_LHS:v[0-9]+]], [[LHS]] 27; VI-DAG: v_cvt_f32_f16_e32 [[CVT_RHS:v[0-9]+]], [[RHS]] 28 29; VI-DAG: v_rcp_f32_e32 [[RCP_RHS:v[0-9]+]], [[CVT_RHS]] 30; VI: v_mul_f32_e32 [[MUL:v[0-9]+]], [[RCP_RHS]], [[CVT_LHS]] 31; VI: v_cvt_f16_f32_e32 [[CVT_BACK:v[0-9]+]], [[MUL]] 32; VI: v_div_fixup_f16 [[RESULT:v[0-9]+]], [[CVT_BACK]], [[RHS]], [[LHS]] 33; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 34define void @v_fdiv_f16( 35 half addrspace(1)* %r, 36 half addrspace(1)* %a, 37 half addrspace(1)* %b) #0 { 38entry: 39 %tid = call i32 @llvm.amdgcn.workitem.id.x() 40 %tid.ext = sext i32 %tid to i64 41 %gep.a = getelementptr inbounds half, half addrspace(1)* %a, i64 %tid.ext 42 %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext 43 %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext 44 %a.val = load volatile half, half addrspace(1)* %gep.a 45 %b.val = load volatile half, half addrspace(1)* %gep.b 46 %r.val = fdiv half %a.val, %b.val 47 store half %r.val, half addrspace(1)* %gep.r 48 ret void 49} 50 51; GCN-LABEL: {{^}}v_rcp_f16: 52; VI: flat_load_ushort [[VAL:v[0-9]+]] 53; VI-NOT: [[VAL]] 54; VI: v_rcp_f16_e32 [[RESULT:v[0-9]+]], [[VAL]] 55; VI-NOT: [[RESULT]] 56; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 57define void @v_rcp_f16(half addrspace(1)* %r, half addrspace(1)* %b) #0 { 58entry: 59 %tid = call i32 @llvm.amdgcn.workitem.id.x() 60 %tid.ext = sext i32 %tid to i64 61 %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext 62 %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext 63 %b.val = load volatile half, half addrspace(1)* %gep.b 64 %r.val = fdiv half 1.0, %b.val 65 store half %r.val, half addrspace(1)* %gep.r 66 ret void 67} 68 69; GCN-LABEL: {{^}}v_rcp_f16_abs: 70; VI: flat_load_ushort [[VAL:v[0-9]+]] 71; VI-NOT: [[VAL]] 72; VI: v_rcp_f16_e64 [[RESULT:v[0-9]+]], |[[VAL]]| 73; VI-NOT: [RESULT]] 74; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 75define void @v_rcp_f16_abs(half addrspace(1)* %r, half addrspace(1)* %b) #0 { 76entry: 77 %tid = call i32 @llvm.amdgcn.workitem.id.x() 78 %tid.ext = sext i32 %tid to i64 79 %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext 80 %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext 81 %b.val = load volatile half, half addrspace(1)* %gep.b 82 %b.abs = call half @llvm.fabs.f16(half %b.val) 83 %r.val = fdiv half 1.0, %b.abs 84 store half %r.val, half addrspace(1)* %gep.r 85 ret void 86} 87 88; GCN-LABEL: {{^}}v_rcp_f16_arcp: 89; VI: flat_load_ushort [[VAL:v[0-9]+]] 90; VI-NOT: [[VAL]] 91; VI: v_rcp_f16_e32 [[RESULT:v[0-9]+]], [[VAL]] 92; VI-NOT: [[RESULT]] 93; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 94define void @v_rcp_f16_arcp(half addrspace(1)* %r, half addrspace(1)* %b) #0 { 95entry: 96 %tid = call i32 @llvm.amdgcn.workitem.id.x() 97 %tid.ext = sext i32 %tid to i64 98 %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext 99 %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext 100 %b.val = load volatile half, half addrspace(1)* %gep.b 101 %r.val = fdiv arcp half 1.0, %b.val 102 store half %r.val, half addrspace(1)* %gep.r 103 ret void 104} 105 106; GCN-LABEL: {{^}}v_rcp_f16_neg: 107; VI: flat_load_ushort [[VAL:v[0-9]+]] 108; VI-NOT: [[VAL]] 109; VI: v_rcp_f16_e64 [[RESULT:v[0-9]+]], -[[VAL]] 110; VI-NOT: [RESULT]] 111; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 112define void @v_rcp_f16_neg(half addrspace(1)* %r, half addrspace(1)* %b) #0 { 113entry: 114 %tid = call i32 @llvm.amdgcn.workitem.id.x() 115 %tid.ext = sext i32 %tid to i64 116 %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext 117 %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext 118 %b.val = load volatile half, half addrspace(1)* %gep.b 119 %r.val = fdiv half -1.0, %b.val 120 store half %r.val, half addrspace(1)* %gep.r 121 ret void 122} 123 124; GCN-LABEL: {{^}}v_rsq_f16: 125; VI: flat_load_ushort [[VAL:v[0-9]+]] 126; VI-NOT: [[VAL]] 127; VI: v_rsq_f16_e32 [[RESULT:v[0-9]+]], [[VAL]] 128; VI-NOT: [RESULT]] 129; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 130define void @v_rsq_f16(half addrspace(1)* %r, half addrspace(1)* %b) #0 { 131entry: 132 %tid = call i32 @llvm.amdgcn.workitem.id.x() 133 %tid.ext = sext i32 %tid to i64 134 %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext 135 %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext 136 %b.val = load volatile half, half addrspace(1)* %gep.b 137 %b.sqrt = call half @llvm.sqrt.f16(half %b.val) 138 %r.val = fdiv half 1.0, %b.sqrt 139 store half %r.val, half addrspace(1)* %gep.r 140 ret void 141} 142 143; GCN-LABEL: {{^}}v_rsq_f16_neg: 144; VI: flat_load_ushort [[VAL:v[0-9]+]] 145; VI-NOT: [[VAL]] 146; VI: v_sqrt_f16_e32 [[SQRT:v[0-9]+]], [[VAL]] 147; VI-NEXT: v_rcp_f16_e64 [[RESULT:v[0-9]+]], -[[SQRT]] 148; VI-NOT: [RESULT]] 149; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 150define void @v_rsq_f16_neg(half addrspace(1)* %r, half addrspace(1)* %b) #0 { 151entry: 152 %tid = call i32 @llvm.amdgcn.workitem.id.x() 153 %tid.ext = sext i32 %tid to i64 154 %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext 155 %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext 156 %b.val = load volatile half, half addrspace(1)* %gep.b 157 %b.sqrt = call half @llvm.sqrt.f16(half %b.val) 158 %r.val = fdiv half -1.0, %b.sqrt 159 store half %r.val, half addrspace(1)* %gep.r 160 ret void 161} 162 163; GCN-LABEL: {{^}}v_fdiv_f16_arcp: 164; VI: flat_load_ushort [[LHS:v[0-9]+]] 165; VI: flat_load_ushort [[RHS:v[0-9]+]] 166 167; VI: v_rcp_f16_e32 [[RCP:v[0-9]+]], [[RHS]] 168; VI: v_mul_f16_e32 [[RESULT:v[0-9]+]], [[RCP]], [[LHS]] 169 170; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 171define void @v_fdiv_f16_arcp(half addrspace(1)* %r, half addrspace(1)* %a, half addrspace(1)* %b) #0 { 172entry: 173 %tid = call i32 @llvm.amdgcn.workitem.id.x() 174 %tid.ext = sext i32 %tid to i64 175 %gep.a = getelementptr inbounds half, half addrspace(1)* %a, i64 %tid.ext 176 %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext 177 %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext 178 %a.val = load volatile half, half addrspace(1)* %gep.a 179 %b.val = load volatile half, half addrspace(1)* %gep.b 180 %r.val = fdiv arcp half %a.val, %b.val 181 store half %r.val, half addrspace(1)* %gep.r 182 ret void 183} 184 185; GCN-LABEL: {{^}}v_fdiv_f16_unsafe: 186; VI: flat_load_ushort [[LHS:v[0-9]+]] 187; VI: flat_load_ushort [[RHS:v[0-9]+]] 188 189; VI: v_rcp_f16_e32 [[RCP:v[0-9]+]], [[RHS]] 190; VI: v_mul_f16_e32 [[RESULT:v[0-9]+]], [[RCP]], [[LHS]] 191 192; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 193define void @v_fdiv_f16_unsafe(half addrspace(1)* %r, half addrspace(1)* %a, half addrspace(1)* %b) #2 { 194entry: 195 %tid = call i32 @llvm.amdgcn.workitem.id.x() 196 %tid.ext = sext i32 %tid to i64 197 %gep.a = getelementptr inbounds half, half addrspace(1)* %a, i64 %tid.ext 198 %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext 199 %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext 200 %a.val = load volatile half, half addrspace(1)* %gep.a 201 %b.val = load volatile half, half addrspace(1)* %gep.b 202 %r.val = fdiv half %a.val, %b.val 203 store half %r.val, half addrspace(1)* %gep.r 204 ret void 205} 206 207declare i32 @llvm.amdgcn.workitem.id.x() #1 208declare half @llvm.sqrt.f16(half) #1 209declare half @llvm.fabs.f16(half) #1 210 211attributes #0 = { nounwind } 212attributes #1 = { nounwind readnone } 213attributes #2 = { nounwind "unsafe-fp-math"="true" } 214