1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #ifndef _RTE_ACL_VECT_H_ 6 #define _RTE_ACL_VECT_H_ 7 8 /** 9 * @file 10 * 11 * RTE ACL SSE/AVX related header. 12 */ 13 14 #ifdef __cplusplus 15 extern "C" { 16 #endif 17 18 19 /* 20 * Takes 2 SIMD registers containing N transitions each (tr0, tr1). 21 * Shuffles it into different representation: 22 * lo - contains low 32 bits of given N transitions. 23 * hi - contains high 32 bits of given N transitions. 24 */ 25 #define ACL_TR_HILO(P, TC, tr0, tr1, lo, hi) do { \ 26 lo = (typeof(lo))_##P##_shuffle_ps((TC)(tr0), (TC)(tr1), 0x88); \ 27 hi = (typeof(hi))_##P##_shuffle_ps((TC)(tr0), (TC)(tr1), 0xdd); \ 28 } while (0) 29 30 31 /* 32 * Calculate the address of the next transition for 33 * all types of nodes. Note that only DFA nodes and range 34 * nodes actually transition to another node. Match 35 * nodes not supposed to be encountered here. 36 * For quad range nodes: 37 * Calculate number of range boundaries that are less than the 38 * input value. Range boundaries for each node are in signed 8 bit, 39 * ordered from -128 to 127. 40 * This is effectively a popcnt of bytes that are greater than the 41 * input byte. 42 * Single nodes are processed in the same ways as quad range nodes. 43 */ 44 #define ACL_TR_CALC_ADDR(P, S, \ 45 addr, index_mask, next_input, shuffle_input, \ 46 ones_16, range_base, tr_lo, tr_hi) do { \ 47 \ 48 typeof(addr) in, node_type, r, t; \ 49 typeof(addr) dfa_msk, dfa_ofs, quad_ofs; \ 50 \ 51 t = _##P##_xor_si##S(index_mask, index_mask); \ 52 in = _##P##_shuffle_epi8(next_input, shuffle_input); \ 53 \ 54 /* Calc node type and node addr */ \ 55 node_type = _##P##_andnot_si##S(index_mask, tr_lo); \ 56 addr = _##P##_and_si##S(index_mask, tr_lo); \ 57 \ 58 /* mask for DFA type(0) nodes */ \ 59 dfa_msk = _##P##_cmpeq_epi32(node_type, t); \ 60 \ 61 /* DFA calculations. */ \ 62 r = _##P##_srli_epi32(in, 30); \ 63 r = _##P##_add_epi8(r, range_base); \ 64 t = _##P##_srli_epi32(in, 24); \ 65 r = _##P##_shuffle_epi8(tr_hi, r); \ 66 \ 67 dfa_ofs = _##P##_sub_epi32(t, r); \ 68 \ 69 /* QUAD/SINGLE calculations. */ \ 70 t = _##P##_cmpgt_epi8(in, tr_hi); \ 71 t = _##P##_sign_epi8(t, t); \ 72 t = _##P##_maddubs_epi16(t, t); \ 73 quad_ofs = _##P##_madd_epi16(t, ones_16); \ 74 \ 75 /* blend DFA and QUAD/SINGLE. */ \ 76 t = _##P##_blendv_epi8(quad_ofs, dfa_ofs, dfa_msk); \ 77 \ 78 /* calculate address for next transitions. */ \ 79 addr = _##P##_add_epi32(addr, t); \ 80 } while (0) 81 82 83 #ifdef __cplusplus 84 } 85 #endif 86 87 #endif /* _RTE_ACL_VECT_H_ */ 88