1; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s 2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 3 4; DAGCombiner will transform: 5; (fabs (f16 bitcast (i16 a))) => (f16 bitcast (and (i16 a), 0x7FFFFFFF)) 6; unless isFabsFree returns true 7 8; GCN-LABEL: {{^}}fabs_free_f16: 9; GCN: flat_load_ushort [[VAL:v[0-9]+]], 10; GCN: v_and_b32_e32 [[RESULT:v[0-9]+]], 0x7fff, [[VAL]] 11; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 12 13define void @fabs_free_f16(half addrspace(1)* %out, i16 %in) { 14 %bc= bitcast i16 %in to half 15 %fabs = call half @llvm.fabs.f16(half %bc) 16 store half %fabs, half addrspace(1)* %out 17 ret void 18} 19 20; GCN-LABEL: {{^}}fabs_f16: 21; CI: flat_load_ushort [[VAL:v[0-9]+]], 22; CI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[VAL]] 23; CI: v_cvt_f16_f32_e64 [[RESULT:v[0-9]+]], |[[CVT0]]| 24; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 25define void @fabs_f16(half addrspace(1)* %out, half %in) { 26 %fabs = call half @llvm.fabs.f16(half %in) 27 store half %fabs, half addrspace(1)* %out 28 ret void 29} 30 31; FIXME: Should be able to use single and 32; GCN-LABEL: {{^}}fabs_v2f16: 33; CI: v_cvt_f32_f16_e32 v{{[0-9]+}}, 34; CI: v_cvt_f32_f16_e32 v{{[0-9]+}}, 35; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}| 36; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}| 37 38; VI: flat_load_ushort [[LO:v[0-9]+]] 39; VI: flat_load_ushort [[HI:v[0-9]+]] 40; VI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7fff{{$}} 41; VI-DAG: v_and_b32_e32 [[FABS_LO:v[0-9]+]], [[MASK]], [[LO]] 42; VI-DAG: v_and_b32_e32 [[FABS_LO:v[0-9]+]], [[MASK]], [[HI]] 43; VI-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, 44; VI-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xffff, 45; VI: v_or_b32 46; VI: flat_store_dword 47define void @fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) { 48 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in) 49 store <2 x half> %fabs, <2 x half> addrspace(1)* %out 50 ret void 51} 52 53; GCN-LABEL: {{^}}fabs_v4f16: 54; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}| 55; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}| 56; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}| 57; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}| 58 59; VI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7fff{{$}} 60; VI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} 61; VI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} 62; VI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} 63; VI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} 64 65; GCN: flat_store_dwordx2 66define void @fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x half> %in) { 67 %fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %in) 68 store <4 x half> %fabs, <4 x half> addrspace(1)* %out 69 ret void 70} 71 72; GCN-LABEL: {{^}}fabs_fold_f16: 73; GCN: flat_load_ushort [[IN0:v[0-9]+]] 74; GCN: flat_load_ushort [[IN1:v[0-9]+]] 75; CI-DAG: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[IN0]] 76; CI-DAG: v_cvt_f32_f16_e32 [[CVT1:v[0-9]+]], [[IN1]] 77; CI: v_mul_f32_e64 [[RESULT:v[0-9]+]], |[[CVT1]]|, [[CVT0]] 78; CI: v_cvt_f16_f32_e32 [[CVTRESULT:v[0-9]+]], [[RESULT]] 79; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[CVTRESULT]] 80 81; VI-NOT: and 82; VI: v_mul_f16_e64 [[RESULT:v[0-9]+]], |[[IN1]]|, [[IN0]] 83; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 84define void @fabs_fold_f16(half addrspace(1)* %out, half %in0, half %in1) { 85 %fabs = call half @llvm.fabs.f16(half %in0) 86 %fmul = fmul half %fabs, %in1 87 store half %fmul, half addrspace(1)* %out 88 ret void 89} 90 91declare half @llvm.fabs.f16(half) readnone 92declare <2 x half> @llvm.fabs.v2f16(<2 x half>) readnone 93declare <4 x half> @llvm.fabs.v4f16(<4 x half>) readnone 94