1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2018 Ericsson AB 3 */ 4 5 #ifndef _DSW_EVDEV_H_ 6 #define _DSW_EVDEV_H_ 7 8 #include <eventdev_pmd.h> 9 10 #include <rte_event_ring.h> 11 #include <rte_eventdev.h> 12 13 #define DSW_PMD_NAME RTE_STR(event_dsw) 14 15 #define DSW_MAX_PORTS (64) 16 #define DSW_MAX_PORT_DEQUEUE_DEPTH (128) 17 #define DSW_MAX_PORT_ENQUEUE_DEPTH (128) 18 #define DSW_MAX_PORT_OUT_BUFFER (32) 19 20 #define DSW_MAX_QUEUES (16) 21 22 #define DSW_MAX_EVENTS (16384) 23 24 /* Multiple 24-bit flow ids will map to the same DSW-level flow. The 25 * number of DSW flows should be high enough make it unlikely that 26 * flow ids of several large flows hash to the same DSW-level flow. 27 * Such collisions will limit parallelism and thus the number of cores 28 * that may be utilized. However, configuring a large number of DSW 29 * flows might potentially, depending on traffic and actual 30 * application flow id value range, result in each such DSW-level flow 31 * being very small. The effect of migrating such flows will be small, 32 * in terms amount of processing load redistributed. This will in turn 33 * reduce the load balancing speed, since flow migration rate has an 34 * upper limit. Code changes are required to allow > 32k DSW-level 35 * flows. 36 */ 37 #define DSW_MAX_FLOWS_BITS (13) 38 #define DSW_MAX_FLOWS (1<<(DSW_MAX_FLOWS_BITS)) 39 #define DSW_MAX_FLOWS_MASK (DSW_MAX_FLOWS-1) 40 41 /* Eventdev RTE_SCHED_TYPE_PARALLEL doesn't have a concept of flows, 42 * but the 'dsw' scheduler (more or less) randomly assign flow id to 43 * events on parallel queues, to be able to reuse some of the 44 * migration mechanism and scheduling logic from 45 * RTE_SCHED_TYPE_ATOMIC. By moving one of the parallel "flows" from a 46 * particular port, the likely-hood of events being scheduled to this 47 * port is reduced, and thus a kind of statistical load balancing is 48 * achieved. 49 */ 50 #define DSW_PARALLEL_FLOWS (1024) 51 52 /* 'Background tasks' are polling the control rings for * 53 * migration-related messages, or flush the output buffer (so 54 * buffered events doesn't linger too long). Shouldn't be too low, 55 * since the system won't benefit from the 'batching' effects from 56 * the output buffer, and shouldn't be too high, since it will make 57 * buffered events linger too long in case the port goes idle. 58 */ 59 #define DSW_MAX_PORT_OPS_PER_BG_TASK (128) 60 61 /* Avoid making small 'loans' from the central in-flight event credit 62 * pool, to improve efficiency. 63 */ 64 #define DSW_MIN_CREDIT_LOAN (64) 65 #define DSW_PORT_MAX_CREDITS (2*DSW_MIN_CREDIT_LOAN) 66 #define DSW_PORT_MIN_CREDITS (DSW_MIN_CREDIT_LOAN) 67 68 /* The rings are dimensioned so that all in-flight events can reside 69 * on any one of the port rings, to avoid the trouble of having to 70 * care about the case where there's no room on the destination port's 71 * input ring. 72 */ 73 #define DSW_IN_RING_SIZE (DSW_MAX_EVENTS) 74 75 #define DSW_MAX_LOAD (INT16_MAX) 76 #define DSW_LOAD_FROM_PERCENT(x) ((int16_t)(((x)*DSW_MAX_LOAD)/100)) 77 #define DSW_LOAD_TO_PERCENT(x) ((100*x)/DSW_MAX_LOAD) 78 79 /* The thought behind keeping the load update interval shorter than 80 * the migration interval is that the load from newly migrated flows 81 * should 'show up' on the load measurement before new migrations are 82 * considered. This is to avoid having too many flows, from too many 83 * source ports, to be migrated too quickly to a lightly loaded port - 84 * in particular since this might cause the system to oscillate. 85 */ 86 #define DSW_LOAD_UPDATE_INTERVAL (DSW_MIGRATION_INTERVAL/4) 87 #define DSW_OLD_LOAD_WEIGHT (1) 88 89 /* The minimum time (in us) between two flow migrations. What puts an 90 * upper limit on the actual migration rate is primarily the pace in 91 * which the ports send and receive control messages, which in turn is 92 * largely a function of how much cycles are spent the processing of 93 * an event burst. 94 */ 95 #define DSW_MIGRATION_INTERVAL (1000) 96 #define DSW_MIN_SOURCE_LOAD_FOR_MIGRATION (DSW_LOAD_FROM_PERCENT(70)) 97 #define DSW_MAX_TARGET_LOAD_FOR_MIGRATION (DSW_LOAD_FROM_PERCENT(95)) 98 #define DSW_REBALANCE_THRESHOLD (DSW_LOAD_FROM_PERCENT(3)) 99 100 #define DSW_MAX_EVENTS_RECORDED (128) 101 102 #define DSW_MAX_FLOWS_PER_MIGRATION (8) 103 104 /* Only one outstanding migration per port is allowed */ 105 #define DSW_MAX_PAUSED_FLOWS (DSW_MAX_PORTS*DSW_MAX_FLOWS_PER_MIGRATION) 106 107 /* Enough room for pause request/confirm and unpaus request/confirm for 108 * all possible senders. 109 */ 110 #define DSW_CTL_IN_RING_SIZE ((DSW_MAX_PORTS-1)*4) 111 112 /* With DSW_SORT_DEQUEUED enabled, the scheduler will, at the point of 113 * dequeue(), arrange events so that events with the same flow id on 114 * the same queue forms a back-to-back "burst", and also so that such 115 * bursts of different flow ids, but on the same queue, also come 116 * consecutively. All this in an attempt to improve data and 117 * instruction cache usage for the application, at the cost of a 118 * scheduler overhead increase. 119 */ 120 121 /* #define DSW_SORT_DEQUEUED */ 122 123 struct dsw_queue_flow { 124 uint8_t queue_id; 125 uint16_t flow_hash; 126 }; 127 128 enum dsw_migration_state { 129 DSW_MIGRATION_STATE_IDLE, 130 DSW_MIGRATION_STATE_PAUSING, 131 DSW_MIGRATION_STATE_FORWARDING, 132 DSW_MIGRATION_STATE_UNPAUSING 133 }; 134 135 struct dsw_port { 136 uint16_t id; 137 138 /* Keeping a pointer here to avoid container_of() calls, which 139 * are expensive since they are very frequent and will result 140 * in an integer multiplication (since the port id is an index 141 * into the dsw_evdev port array). 142 */ 143 struct dsw_evdev *dsw; 144 145 uint16_t dequeue_depth; 146 uint16_t enqueue_depth; 147 148 int32_t inflight_credits; 149 150 int32_t new_event_threshold; 151 152 uint16_t pending_releases; 153 154 uint16_t next_parallel_flow_id; 155 156 uint16_t ops_since_bg_task; 157 158 /* most recent 'background' processing */ 159 uint64_t last_bg; 160 161 /* For port load measurement. */ 162 uint64_t next_load_update; 163 uint64_t load_update_interval; 164 uint64_t measurement_start; 165 uint64_t busy_start; 166 uint64_t busy_cycles; 167 uint64_t total_busy_cycles; 168 169 /* For the ctl interface and flow migration mechanism. */ 170 uint64_t next_emigration; 171 uint64_t migration_interval; 172 enum dsw_migration_state migration_state; 173 174 uint64_t emigration_start; 175 uint64_t emigrations; 176 uint64_t emigration_latency; 177 178 uint8_t emigration_target_port_ids[DSW_MAX_FLOWS_PER_MIGRATION]; 179 struct dsw_queue_flow 180 emigration_target_qfs[DSW_MAX_FLOWS_PER_MIGRATION]; 181 uint8_t emigration_targets_len; 182 uint8_t cfm_cnt; 183 184 uint64_t immigrations; 185 186 uint16_t paused_flows_len; 187 struct dsw_queue_flow paused_flows[DSW_MAX_PAUSED_FLOWS]; 188 189 /* In a very contrived worst case all inflight events can be 190 * laying around paused here. 191 */ 192 uint16_t paused_events_len; 193 struct rte_event paused_events[DSW_MAX_EVENTS]; 194 195 uint16_t seen_events_len; 196 uint16_t seen_events_idx; 197 struct dsw_queue_flow seen_events[DSW_MAX_EVENTS_RECORDED]; 198 199 uint64_t enqueue_calls; 200 uint64_t new_enqueued; 201 uint64_t forward_enqueued; 202 uint64_t release_enqueued; 203 uint64_t queue_enqueued[DSW_MAX_QUEUES]; 204 205 uint64_t dequeue_calls; 206 uint64_t dequeued; 207 uint64_t queue_dequeued[DSW_MAX_QUEUES]; 208 209 uint16_t out_buffer_len[DSW_MAX_PORTS]; 210 struct rte_event out_buffer[DSW_MAX_PORTS][DSW_MAX_PORT_OUT_BUFFER]; 211 212 uint16_t in_buffer_len; 213 uint16_t in_buffer_start; 214 /* This buffer may contain events that were read up from the 215 * in_ring during the flow migration process. 216 */ 217 struct rte_event in_buffer[DSW_MAX_EVENTS]; 218 219 struct rte_event_ring *in_ring __rte_cache_aligned; 220 221 struct rte_ring *ctl_in_ring __rte_cache_aligned; 222 223 /* Estimate of current port load. */ 224 int16_t load __rte_cache_aligned; 225 /* Estimate of flows currently migrating to this port. */ 226 int32_t immigration_load __rte_cache_aligned; 227 } __rte_cache_aligned; 228 229 struct dsw_queue { 230 uint8_t schedule_type; 231 uint8_t serving_ports[DSW_MAX_PORTS]; 232 uint16_t num_serving_ports; 233 234 uint8_t flow_to_port_map[DSW_MAX_FLOWS] __rte_cache_aligned; 235 }; 236 237 struct dsw_evdev { 238 struct rte_eventdev_data *data; 239 240 struct dsw_port ports[DSW_MAX_PORTS]; 241 uint16_t num_ports; 242 struct dsw_queue queues[DSW_MAX_QUEUES]; 243 uint8_t num_queues; 244 int32_t max_inflight; 245 246 int32_t credits_on_loan __rte_cache_aligned; 247 }; 248 249 #define DSW_CTL_PAUS_REQ (0) 250 #define DSW_CTL_UNPAUS_REQ (1) 251 #define DSW_CTL_CFM (2) 252 253 struct dsw_ctl_msg { 254 uint8_t type; 255 uint8_t originating_port_id; 256 uint8_t qfs_len; 257 struct dsw_queue_flow qfs[DSW_MAX_FLOWS_PER_MIGRATION]; 258 } __rte_aligned(4); 259 260 uint16_t dsw_event_enqueue(void *port, const struct rte_event *event); 261 uint16_t dsw_event_enqueue_burst(void *port, 262 const struct rte_event events[], 263 uint16_t events_len); 264 uint16_t dsw_event_enqueue_new_burst(void *port, 265 const struct rte_event events[], 266 uint16_t events_len); 267 uint16_t dsw_event_enqueue_forward_burst(void *port, 268 const struct rte_event events[], 269 uint16_t events_len); 270 271 uint16_t dsw_event_dequeue(void *port, struct rte_event *ev, uint64_t wait); 272 uint16_t dsw_event_dequeue_burst(void *port, struct rte_event *events, 273 uint16_t num, uint64_t wait); 274 void dsw_event_maintain(void *port, int op); 275 276 int dsw_xstats_get_names(const struct rte_eventdev *dev, 277 enum rte_event_dev_xstats_mode mode, 278 uint8_t queue_port_id, 279 struct rte_event_dev_xstats_name *xstats_names, 280 unsigned int *ids, unsigned int size); 281 int dsw_xstats_get(const struct rte_eventdev *dev, 282 enum rte_event_dev_xstats_mode mode, uint8_t queue_port_id, 283 const unsigned int ids[], uint64_t values[], unsigned int n); 284 uint64_t dsw_xstats_get_by_name(const struct rte_eventdev *dev, 285 const char *name, unsigned int *id); 286 287 static inline struct dsw_evdev * 288 dsw_pmd_priv(const struct rte_eventdev *eventdev) 289 { 290 return eventdev->data->dev_private; 291 } 292 293 #define DSW_LOG_DP(level, fmt, args...) \ 294 RTE_LOG_DP(level, EVENTDEV, "[%s] %s() line %u: " fmt, \ 295 DSW_PMD_NAME, \ 296 __func__, __LINE__, ## args) 297 298 #define DSW_LOG_DP_PORT(level, port_id, fmt, args...) \ 299 DSW_LOG_DP(level, "<Port %d> " fmt, port_id, ## args) 300 301 #endif 302