1; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 2 3declare half @llvm.fabs.f16(half %a) 4declare i1 @llvm.amdgcn.class.f16(half %a, i32 %b) 5 6; GCN-LABEL: {{^}}class_f16 7; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] 8; GCN: buffer_load_dword v[[B_I32:[0-9]+]] 9; VI: v_cmp_class_f16_e32 vcc, v[[A_F16]], v[[B_I32]] 10; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] 11; GCN: buffer_store_dword v[[R_I32]] 12; GCN: s_endpgm 13define void @class_f16( 14 i32 addrspace(1)* %r, 15 half addrspace(1)* %a, 16 i32 addrspace(1)* %b) { 17entry: 18 %a.val = load half, half addrspace(1)* %a 19 %b.val = load i32, i32 addrspace(1)* %b 20 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 %b.val) 21 %r.val.sext = sext i1 %r.val to i32 22 store i32 %r.val.sext, i32 addrspace(1)* %r 23 ret void 24} 25 26; GCN-LABEL: {{^}}class_f16_fabs 27; GCN: s_load_dword s[[SA_F16:[0-9]+]] 28; GCN: s_load_dword s[[SB_I32:[0-9]+]] 29; VI: v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]] 30; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], |v[[VA_F16]]|, s[[SB_I32]] 31; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]] 32; GCN: buffer_store_dword v[[VR_I32]] 33; GCN: s_endpgm 34define void @class_f16_fabs( 35 i32 addrspace(1)* %r, 36 half %a.val, 37 i32 %b.val) { 38entry: 39 %a.val.fabs = call half @llvm.fabs.f16(half %a.val) 40 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val.fabs, i32 %b.val) 41 %r.val.sext = sext i1 %r.val to i32 42 store i32 %r.val.sext, i32 addrspace(1)* %r 43 ret void 44} 45 46; GCN-LABEL: {{^}}class_f16_fneg 47; GCN: s_load_dword s[[SA_F16:[0-9]+]] 48; GCN: s_load_dword s[[SB_I32:[0-9]+]] 49; VI: v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]] 50; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -v[[VA_F16]], s[[SB_I32]] 51; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]] 52; GCN: buffer_store_dword v[[VR_I32]] 53; GCN: s_endpgm 54define void @class_f16_fneg( 55 i32 addrspace(1)* %r, 56 half %a.val, 57 i32 %b.val) { 58entry: 59 %a.val.fneg = fsub half -0.0, %a.val 60 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val.fneg, i32 %b.val) 61 %r.val.sext = sext i1 %r.val to i32 62 store i32 %r.val.sext, i32 addrspace(1)* %r 63 ret void 64} 65 66; GCN-LABEL: {{^}}class_f16_fabs_fneg 67; GCN: s_load_dword s[[SA_F16:[0-9]+]] 68; GCN: s_load_dword s[[SB_I32:[0-9]+]] 69; VI: v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]] 70; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -|v[[VA_F16]]|, s[[SB_I32]] 71; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]] 72; GCN: buffer_store_dword v[[VR_I32]] 73; GCN: s_endpgm 74define void @class_f16_fabs_fneg( 75 i32 addrspace(1)* %r, 76 half %a.val, 77 i32 %b.val) { 78entry: 79 %a.val.fabs = call half @llvm.fabs.f16(half %a.val) 80 %a.val.fabs.fneg = fsub half -0.0, %a.val.fabs 81 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val.fabs.fneg, i32 %b.val) 82 %r.val.sext = sext i1 %r.val to i32 83 store i32 %r.val.sext, i32 addrspace(1)* %r 84 ret void 85} 86 87; GCN-LABEL: {{^}}class_f16_1 88; GCN: s_load_dword s[[SA_F16:[0-9]+]] 89; VI: v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]] 90; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], v[[VA_F16]], 1{{$}} 91; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]] 92; GCN: buffer_store_dword v[[VR_I32]] 93; GCN: s_endpgm 94define void @class_f16_1( 95 i32 addrspace(1)* %r, 96 half %a.val) { 97entry: 98 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 1) 99 %r.val.sext = sext i1 %r.val to i32 100 store i32 %r.val.sext, i32 addrspace(1)* %r 101 ret void 102} 103 104; GCN-LABEL: {{^}}class_f16_64 105; GCN: s_load_dword s[[SA_F16:[0-9]+]] 106; VI: v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]] 107; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], v[[VA_F16]], 64{{$}} 108; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]] 109; GCN: buffer_store_dword v[[VR_I32]] 110; GCN: s_endpgm 111define void @class_f16_64( 112 i32 addrspace(1)* %r, 113 half %a.val) { 114entry: 115 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 64) 116 %r.val.sext = sext i1 %r.val to i32 117 store i32 %r.val.sext, i32 addrspace(1)* %r 118 ret void 119} 120 121; GCN-LABEL: {{^}}class_f16_full_mask 122; GCN: s_load_dword s[[SA_F16:[0-9]+]] 123; VI: v_mov_b32_e32 v[[MASK:[0-9]+]], 0x3ff{{$}} 124; VI: v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]] 125; VI: v_cmp_class_f16_e32 vcc, v[[VA_F16]], v[[MASK]] 126; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, vcc 127; GCN: buffer_store_dword v[[VR_I32]] 128; GCN: s_endpgm 129define void @class_f16_full_mask( 130 i32 addrspace(1)* %r, 131 half %a.val) { 132entry: 133 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 1023) 134 %r.val.sext = sext i1 %r.val to i32 135 store i32 %r.val.sext, i32 addrspace(1)* %r 136 ret void 137} 138 139; GCN-LABEL: {{^}}class_f16_nine_bit_mask 140; GCN: s_load_dword s[[SA_F16:[0-9]+]] 141; VI: v_mov_b32_e32 v[[MASK:[0-9]+]], 0x1ff{{$}} 142; VI: v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]] 143; VI: v_cmp_class_f16_e32 vcc, v[[VA_F16]], v[[MASK]] 144; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, vcc 145; GCN: buffer_store_dword v[[VR_I32]] 146; GCN: s_endpgm 147define void @class_f16_nine_bit_mask( 148 i32 addrspace(1)* %r, 149 half %a.val) { 150entry: 151 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 511) 152 %r.val.sext = sext i1 %r.val to i32 153 store i32 %r.val.sext, i32 addrspace(1)* %r 154 ret void 155} 156