1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Intel Corporation 3 */ 4 5 #ifndef _DIST_PRIV_H_ 6 #define _DIST_PRIV_H_ 7 8 /** 9 * @file 10 * RTE distributor 11 * 12 * The distributor is a component which is designed to pass packets 13 * one-at-a-time to workers, with dynamic load balancing. 14 */ 15 16 #define NO_FLAGS 0 17 #define RTE_DISTRIB_PREFIX "DT_" 18 19 /* 20 * We will use the bottom four bits of pointer for flags, shifting out 21 * the top four bits to make room (since a 64-bit pointer actually only uses 22 * 48 bits). An arithmetic-right-shift will then appropriately restore the 23 * original pointer value with proper sign extension into the top bits. 24 */ 25 #define RTE_DISTRIB_FLAG_BITS 4 26 #define RTE_DISTRIB_FLAGS_MASK (0x0F) 27 #define RTE_DISTRIB_NO_BUF 0 /**< empty flags: no buffer requested */ 28 #define RTE_DISTRIB_GET_BUF (1) /**< worker requests a buffer, returns old */ 29 #define RTE_DISTRIB_RETURN_BUF (2) /**< worker returns a buffer, no request */ 30 #define RTE_DISTRIB_VALID_BUF (4) /**< set if bufptr contains ptr */ 31 32 #define RTE_DISTRIB_BACKLOG_SIZE 8 33 #define RTE_DISTRIB_BACKLOG_MASK (RTE_DISTRIB_BACKLOG_SIZE - 1) 34 35 #define RTE_DISTRIB_MAX_RETURNS 128 36 #define RTE_DISTRIB_RETURNS_MASK (RTE_DISTRIB_MAX_RETURNS - 1) 37 38 /** 39 * Maximum number of workers allowed. 40 * Be aware of increasing the limit, because it is limited by how we track 41 * in-flight tags. See in_flight_bitmask and rte_distributor_process 42 */ 43 #define RTE_DISTRIB_MAX_WORKERS 64 44 45 #define RTE_DISTRIBUTOR_NAMESIZE 32 /**< Length of name for instance */ 46 47 /** 48 * Buffer structure used to pass the pointer data between cores. This is cache 49 * line aligned, but to improve performance and prevent adjacent cache-line 50 * prefetches of buffers for other workers, e.g. when worker 1's buffer is on 51 * the next cache line to worker 0, we pad this out to three cache lines. 52 * Only 64-bits of the memory is actually used though. 53 */ 54 union rte_distributor_buffer_single { 55 volatile int64_t bufptr64; 56 char pad[RTE_CACHE_LINE_SIZE*3]; 57 } __rte_cache_aligned; 58 59 /* 60 * Transfer up to 8 mbufs at a time to/from workers, and 61 * flow matching algorithm optimized for 8 flow IDs at a time 62 */ 63 #define RTE_DIST_BURST_SIZE 8 64 65 struct rte_distributor_backlog { 66 unsigned int start; 67 unsigned int count; 68 int64_t pkts[RTE_DIST_BURST_SIZE] __rte_cache_aligned; 69 uint16_t *tags; /* will point to second cacheline of inflights */ 70 } __rte_cache_aligned; 71 72 73 struct rte_distributor_returned_pkts { 74 unsigned int start; 75 unsigned int count; 76 struct rte_mbuf *mbufs[RTE_DISTRIB_MAX_RETURNS]; 77 }; 78 79 struct rte_distributor_single { 80 TAILQ_ENTRY(rte_distributor_single) next; /**< Next in list. */ 81 82 char name[RTE_DISTRIBUTOR_NAMESIZE]; /**< Name of the ring. */ 83 unsigned int num_workers; /**< Number of workers polling */ 84 85 uint32_t in_flight_tags[RTE_DISTRIB_MAX_WORKERS]; 86 /**< Tracks the tag being processed per core */ 87 uint64_t in_flight_bitmask; 88 /**< on/off bits for in-flight tags. 89 * Note that if RTE_DISTRIB_MAX_WORKERS is larger than 64 then 90 * the bitmask has to expand. 91 */ 92 93 struct rte_distributor_backlog backlog[RTE_DISTRIB_MAX_WORKERS]; 94 95 union rte_distributor_buffer_single bufs[RTE_DISTRIB_MAX_WORKERS]; 96 97 struct rte_distributor_returned_pkts returns; 98 }; 99 100 /* All different signature compare functions */ 101 enum rte_distributor_match_function { 102 RTE_DIST_MATCH_SCALAR = 0, 103 RTE_DIST_MATCH_VECTOR, 104 RTE_DIST_NUM_MATCH_FNS 105 }; 106 107 /** 108 * Buffer structure used to pass the pointer data between cores. This is cache 109 * line aligned, but to improve performance and prevent adjacent cache-line 110 * prefetches of buffers for other workers, e.g. when worker 1's buffer is on 111 * the next cache line to worker 0, we pad this out to two cache lines. 112 * We can pass up to 8 mbufs at a time in one cacheline. 113 * There is a separate cacheline for returns in the burst API. 114 */ 115 struct rte_distributor_buffer { 116 volatile int64_t bufptr64[RTE_DIST_BURST_SIZE] 117 __rte_cache_aligned; /* <= outgoing to worker */ 118 119 int64_t pad1 __rte_cache_aligned; /* <= one cache line */ 120 121 volatile int64_t retptr64[RTE_DIST_BURST_SIZE] 122 __rte_cache_aligned; /* <= incoming from worker */ 123 124 int64_t pad2 __rte_cache_aligned; /* <= one cache line */ 125 126 int count __rte_cache_aligned; /* <= number of current mbufs */ 127 }; 128 129 struct rte_distributor { 130 TAILQ_ENTRY(rte_distributor) next; /**< Next in list. */ 131 132 char name[RTE_DISTRIBUTOR_NAMESIZE]; /**< Name of the ring. */ 133 unsigned int num_workers; /**< Number of workers polling */ 134 unsigned int alg_type; /**< Number of alg types */ 135 136 /**> 137 * First cache line in the this array are the tags inflight 138 * on the worker core. Second cache line are the backlog 139 * that are going to go to the worker core. 140 */ 141 uint16_t in_flight_tags[RTE_DISTRIB_MAX_WORKERS][RTE_DIST_BURST_SIZE*2] 142 __rte_cache_aligned; 143 144 struct rte_distributor_backlog backlog[RTE_DISTRIB_MAX_WORKERS] 145 __rte_cache_aligned; 146 147 struct rte_distributor_buffer bufs[RTE_DISTRIB_MAX_WORKERS]; 148 149 struct rte_distributor_returned_pkts returns; 150 151 enum rte_distributor_match_function dist_match_fn; 152 153 struct rte_distributor_single *d_single; 154 155 uint8_t active[RTE_DISTRIB_MAX_WORKERS]; 156 uint8_t activesum; 157 }; 158 159 void 160 find_match_scalar(struct rte_distributor *d, 161 uint16_t *data_ptr, 162 uint16_t *output_ptr); 163 164 void 165 find_match_vec(struct rte_distributor *d, 166 uint16_t *data_ptr, 167 uint16_t *output_ptr); 168 169 #endif /* _DIST_PRIV_H_ */ 170