1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Intel Corporation 3 */ 4 5 #ifndef _DIST_PRIV_H_ 6 #define _DIST_PRIV_H_ 7 8 /** 9 * @file 10 * RTE distributor 11 * 12 * The distributor is a component which is designed to pass packets 13 * one-at-a-time to workers, with dynamic load balancing. 14 */ 15 16 #ifdef __cplusplus 17 extern "C" { 18 #endif 19 20 #define NO_FLAGS 0 21 #define RTE_DISTRIB_PREFIX "DT_" 22 23 /* 24 * We will use the bottom four bits of pointer for flags, shifting out 25 * the top four bits to make room (since a 64-bit pointer actually only uses 26 * 48 bits). An arithmetic-right-shift will then appropriately restore the 27 * original pointer value with proper sign extension into the top bits. 28 */ 29 #define RTE_DISTRIB_FLAG_BITS 4 30 #define RTE_DISTRIB_FLAGS_MASK (0x0F) 31 #define RTE_DISTRIB_NO_BUF 0 /**< empty flags: no buffer requested */ 32 #define RTE_DISTRIB_GET_BUF (1) /**< worker requests a buffer, returns old */ 33 #define RTE_DISTRIB_RETURN_BUF (2) /**< worker returns a buffer, no request */ 34 #define RTE_DISTRIB_VALID_BUF (4) /**< set if bufptr contains ptr */ 35 36 #define RTE_DISTRIB_BACKLOG_SIZE 8 37 #define RTE_DISTRIB_BACKLOG_MASK (RTE_DISTRIB_BACKLOG_SIZE - 1) 38 39 #define RTE_DISTRIB_MAX_RETURNS 128 40 #define RTE_DISTRIB_RETURNS_MASK (RTE_DISTRIB_MAX_RETURNS - 1) 41 42 /** 43 * Maximum number of workers allowed. 44 * Be aware of increasing the limit, because it is limited by how we track 45 * in-flight tags. See in_flight_bitmask and rte_distributor_process 46 */ 47 #define RTE_DISTRIB_MAX_WORKERS 64 48 49 #define RTE_DISTRIBUTOR_NAMESIZE 32 /**< Length of name for instance */ 50 51 /** 52 * Buffer structure used to pass the pointer data between cores. This is cache 53 * line aligned, but to improve performance and prevent adjacent cache-line 54 * prefetches of buffers for other workers, e.g. when worker 1's buffer is on 55 * the next cache line to worker 0, we pad this out to three cache lines. 56 * Only 64-bits of the memory is actually used though. 57 */ 58 union rte_distributor_buffer_single { 59 volatile int64_t bufptr64; 60 char pad[RTE_CACHE_LINE_SIZE*3]; 61 } __rte_cache_aligned; 62 63 /* 64 * Transfer up to 8 mbufs at a time to/from workers, and 65 * flow matching algorithm optimized for 8 flow IDs at a time 66 */ 67 #define RTE_DIST_BURST_SIZE 8 68 69 struct rte_distributor_backlog { 70 unsigned int start; 71 unsigned int count; 72 int64_t pkts[RTE_DIST_BURST_SIZE] __rte_cache_aligned; 73 uint16_t *tags; /* will point to second cacheline of inflights */ 74 } __rte_cache_aligned; 75 76 77 struct rte_distributor_returned_pkts { 78 unsigned int start; 79 unsigned int count; 80 struct rte_mbuf *mbufs[RTE_DISTRIB_MAX_RETURNS]; 81 }; 82 83 struct rte_distributor_single { 84 TAILQ_ENTRY(rte_distributor_single) next; /**< Next in list. */ 85 86 char name[RTE_DISTRIBUTOR_NAMESIZE]; /**< Name of the ring. */ 87 unsigned int num_workers; /**< Number of workers polling */ 88 89 uint32_t in_flight_tags[RTE_DISTRIB_MAX_WORKERS]; 90 /**< Tracks the tag being processed per core */ 91 uint64_t in_flight_bitmask; 92 /**< on/off bits for in-flight tags. 93 * Note that if RTE_DISTRIB_MAX_WORKERS is larger than 64 then 94 * the bitmask has to expand. 95 */ 96 97 struct rte_distributor_backlog backlog[RTE_DISTRIB_MAX_WORKERS]; 98 99 union rte_distributor_buffer_single bufs[RTE_DISTRIB_MAX_WORKERS]; 100 101 struct rte_distributor_returned_pkts returns; 102 }; 103 104 /* All different signature compare functions */ 105 enum rte_distributor_match_function { 106 RTE_DIST_MATCH_SCALAR = 0, 107 RTE_DIST_MATCH_VECTOR, 108 RTE_DIST_NUM_MATCH_FNS 109 }; 110 111 /** 112 * Buffer structure used to pass the pointer data between cores. This is cache 113 * line aligned, but to improve performance and prevent adjacent cache-line 114 * prefetches of buffers for other workers, e.g. when worker 1's buffer is on 115 * the next cache line to worker 0, we pad this out to two cache lines. 116 * We can pass up to 8 mbufs at a time in one cacheline. 117 * There is a separate cacheline for returns in the burst API. 118 */ 119 struct rte_distributor_buffer { 120 volatile int64_t bufptr64[RTE_DIST_BURST_SIZE] 121 __rte_cache_aligned; /* <= outgoing to worker */ 122 123 int64_t pad1 __rte_cache_aligned; /* <= one cache line */ 124 125 volatile int64_t retptr64[RTE_DIST_BURST_SIZE] 126 __rte_cache_aligned; /* <= incoming from worker */ 127 128 int64_t pad2 __rte_cache_aligned; /* <= one cache line */ 129 130 int count __rte_cache_aligned; /* <= number of current mbufs */ 131 }; 132 133 struct rte_distributor { 134 TAILQ_ENTRY(rte_distributor) next; /**< Next in list. */ 135 136 char name[RTE_DISTRIBUTOR_NAMESIZE]; /**< Name of the ring. */ 137 unsigned int num_workers; /**< Number of workers polling */ 138 unsigned int alg_type; /**< Number of alg types */ 139 140 /**> 141 * First cache line in the this array are the tags inflight 142 * on the worker core. Second cache line are the backlog 143 * that are going to go to the worker core. 144 */ 145 uint16_t in_flight_tags[RTE_DISTRIB_MAX_WORKERS][RTE_DIST_BURST_SIZE*2] 146 __rte_cache_aligned; 147 148 struct rte_distributor_backlog backlog[RTE_DISTRIB_MAX_WORKERS] 149 __rte_cache_aligned; 150 151 struct rte_distributor_buffer bufs[RTE_DISTRIB_MAX_WORKERS]; 152 153 struct rte_distributor_returned_pkts returns; 154 155 enum rte_distributor_match_function dist_match_fn; 156 157 struct rte_distributor_single *d_single; 158 159 uint8_t active[RTE_DISTRIB_MAX_WORKERS]; 160 uint8_t activesum; 161 }; 162 163 void 164 find_match_scalar(struct rte_distributor *d, 165 uint16_t *data_ptr, 166 uint16_t *output_ptr); 167 168 void 169 find_match_vec(struct rte_distributor *d, 170 uint16_t *data_ptr, 171 uint16_t *output_ptr); 172 173 #ifdef __cplusplus 174 } 175 #endif 176 177 #endif /* _DIST_PRIV_H_ */ 178