1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2020 Intel Corporation
3 */
4
5 #ifndef __RTE_PIE_H_INCLUDED__
6 #define __RTE_PIE_H_INCLUDED__
7
8 #ifdef __cplusplus
9 extern "C" {
10 #endif
11
12 /**
13 * @file
14 * Proportional Integral controller Enhanced (PIE)
15 **/
16
17 #include <stdint.h>
18
19 #include <rte_random.h>
20 #include <rte_debug.h>
21 #include <rte_cycles.h>
22
23 #define RTE_DQ_THRESHOLD 16384 /**< Queue length threshold (2^14)
24 * to start measurement cycle (bytes)
25 */
26 #define RTE_DQ_WEIGHT 0.25 /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
27 #define RTE_ALPHA 0.125 /**< Weights in drop probability calculations */
28 #define RTE_BETA 1.25 /**< Weights in drop probability calculations */
29 #define RTE_RAND_MAX ~0LLU /**< Max value of the random number */
30
31
32 /**
33 * PIE configuration parameters passed by user
34 *
35 */
36 struct rte_pie_params {
37 uint16_t qdelay_ref; /**< Latency Target (milliseconds) */
38 uint16_t dp_update_interval; /**< Update interval for drop probability (milliseconds) */
39 uint16_t max_burst; /**< Max Burst Allowance (milliseconds) */
40 uint16_t tailq_th; /**< Tailq drop threshold (packet counts) */
41 };
42
43 /**
44 * PIE configuration parameters
45 *
46 */
47 struct rte_pie_config {
48 uint64_t qdelay_ref; /**< Latency Target (in CPU cycles.) */
49 uint64_t dp_update_interval; /**< Update interval for drop probability (in CPU cycles) */
50 uint64_t max_burst; /**< Max Burst Allowance (in CPU cycles.) */
51 uint16_t tailq_th; /**< Tailq drop threshold (packet counts) */
52 };
53
54 /**
55 * PIE run-time data
56 */
57 struct rte_pie {
58 uint16_t active; /**< Flag for activating/deactivating pie */
59 uint16_t in_measurement; /**< Flag for activation of measurement cycle */
60 uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */
61 uint64_t start_measurement; /**< Time to start to measurement cycle (in cpu cycles) */
62 uint64_t last_measurement; /**< Time of last measurement (in cpu cycles) */
63 uint64_t qlen; /**< Queue length (packets count) */
64 uint64_t qlen_bytes; /**< Queue length (bytes count) */
65 uint64_t avg_dq_time; /**< Time averaged dequeue rate (in cpu cycles) */
66 uint32_t burst_allowance; /**< Current burst allowance (bytes) */
67 uint64_t qdelay_old; /**< Old queue delay (bytes) */
68 double drop_prob; /**< Current packet drop probability */
69 double accu_prob; /**< Accumulated packet drop probability */
70 };
71
72 /**
73 * @brief Initialises run-time data
74 *
75 * @param pie [in,out] data pointer to PIE runtime data
76 *
77 * @return Operation status
78 * @retval 0 success
79 * @retval !0 error
80 */
81 int
82 __rte_experimental
83 rte_pie_rt_data_init(struct rte_pie *pie);
84
85 /**
86 * @brief Configures a single PIE configuration parameter structure.
87 *
88 * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure
89 * @param qdelay_ref [in] latency target(milliseconds)
90 * @param dp_update_interval [in] update interval for drop probability (milliseconds)
91 * @param max_burst [in] maximum burst allowance (milliseconds)
92 * @param tailq_th [in] tail drop threshold for the queue (number of packets)
93 *
94 * @return Operation status
95 * @retval 0 success
96 * @retval !0 error
97 */
98 int
99 __rte_experimental
100 rte_pie_config_init(struct rte_pie_config *pie_cfg,
101 const uint16_t qdelay_ref,
102 const uint16_t dp_update_interval,
103 const uint16_t max_burst,
104 const uint16_t tailq_th);
105
106 /**
107 * @brief Decides packet enqueue when queue is empty
108 *
109 * Note: packet is never dropped in this particular case.
110 *
111 * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
112 * @param pie [in, out] data pointer to PIE runtime data
113 * @param pkt_len [in] packet length in bytes
114 *
115 * @return Operation status
116 * @retval 0 enqueue the packet
117 * @retval !0 drop the packet
118 */
119 static int
120 __rte_experimental
rte_pie_enqueue_empty(const struct rte_pie_config * pie_cfg,struct rte_pie * pie,uint32_t pkt_len)121 rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
122 struct rte_pie *pie,
123 uint32_t pkt_len)
124 {
125 RTE_ASSERT(pkt_len != 0);
126
127 /* Update the PIE qlen parameter */
128 pie->qlen++;
129 pie->qlen_bytes += pkt_len;
130
131 /**
132 * If the queue has been idle for a while, turn off PIE and Reset counters
133 */
134 if ((pie->active == 1) &&
135 (pie->qlen < (pie_cfg->tailq_th * 0.1))) {
136 pie->active = 0;
137 pie->in_measurement = 0;
138 }
139
140 return 0;
141 }
142
143 /**
144 * @brief make a decision to drop or enqueue a packet based on probability
145 * criteria
146 *
147 * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
148 * @param pie [in, out] data pointer to PIE runtime data
149 * @param time [in] current time (measured in cpu cycles)
150 */
151 static void
152 __rte_experimental
_calc_drop_probability(const struct rte_pie_config * pie_cfg,struct rte_pie * pie,uint64_t time)153 _calc_drop_probability(const struct rte_pie_config *pie_cfg,
154 struct rte_pie *pie, uint64_t time)
155 {
156 uint64_t qdelay_ref = pie_cfg->qdelay_ref;
157
158 /* Note: can be implemented using integer multiply.
159 * DQ_THRESHOLD is power of 2 value.
160 */
161 uint64_t current_qdelay = pie->qlen * (pie->avg_dq_time >> 14);
162
163 double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
164 RTE_BETA * (current_qdelay - pie->qdelay_old);
165
166 if (pie->drop_prob < 0.000001)
167 p = p * 0.00048828125; /* (1/2048) = 0.00048828125 */
168 else if (pie->drop_prob < 0.00001)
169 p = p * 0.001953125; /* (1/512) = 0.001953125 */
170 else if (pie->drop_prob < 0.0001)
171 p = p * 0.0078125; /* (1/128) = 0.0078125 */
172 else if (pie->drop_prob < 0.001)
173 p = p * 0.03125; /* (1/32) = 0.03125 */
174 else if (pie->drop_prob < 0.01)
175 p = p * 0.125; /* (1/8) = 0.125 */
176 else if (pie->drop_prob < 0.1)
177 p = p * 0.5; /* (1/2) = 0.5 */
178
179 if (pie->drop_prob >= 0.1 && p > 0.02)
180 p = 0.02;
181
182 pie->drop_prob += p;
183
184 double qdelay = qdelay_ref * 0.5;
185
186 /* Exponentially decay drop prob when congestion goes away */
187 if ((double)current_qdelay < qdelay && pie->qdelay_old < qdelay)
188 pie->drop_prob *= 0.98; /* 1 - 1/64 is sufficient */
189
190 /* Bound drop probability */
191 if (pie->drop_prob < 0)
192 pie->drop_prob = 0;
193 if (pie->drop_prob > 1)
194 pie->drop_prob = 1;
195
196 pie->qdelay_old = current_qdelay;
197 pie->last_measurement = time;
198
199 uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval;
200
201 pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
202 }
203
204 /**
205 * @brief make a decision to drop or enqueue a packet based on probability
206 * criteria
207 *
208 * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
209 * @param pie [in, out] data pointer to PIE runtime data
210 *
211 * @return operation status
212 * @retval 0 enqueue the packet
213 * @retval 1 drop the packet
214 */
215 static inline int
216 __rte_experimental
_rte_pie_drop(const struct rte_pie_config * pie_cfg,struct rte_pie * pie)217 _rte_pie_drop(const struct rte_pie_config *pie_cfg,
218 struct rte_pie *pie)
219 {
220 uint64_t rand_value;
221 double qdelay = pie_cfg->qdelay_ref * 0.5;
222
223 /* PIE is active but the queue is not congested: return 0 */
224 if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
225 (pie->qlen <= (pie_cfg->tailq_th * 0.1)))
226 return 0;
227
228 if (pie->drop_prob == 0)
229 pie->accu_prob = 0;
230
231 /* For practical reasons, drop probability can be further scaled according
232 * to packet size, but one needs to set a bound to avoid unnecessary bias
233 * Random drop
234 */
235 pie->accu_prob += pie->drop_prob;
236
237 if (pie->accu_prob < 0.85)
238 return 0;
239
240 if (pie->accu_prob >= 8.5)
241 return 1;
242
243 rand_value = rte_rand()/RTE_RAND_MAX;
244
245 if ((double)rand_value < pie->drop_prob) {
246 pie->accu_prob = 0;
247 return 1;
248 }
249
250 /* No drop */
251 return 0;
252 }
253
254 /**
255 * @brief Decides if new packet should be enqueued or dropped for non-empty queue
256 *
257 * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
258 * @param pie [in,out] data pointer to PIE runtime data
259 * @param pkt_len [in] packet length in bytes
260 * @param time [in] current time (measured in cpu cycles)
261 *
262 * @return Operation status
263 * @retval 0 enqueue the packet
264 * @retval 1 drop the packet based on max threshold criterion
265 * @retval 2 drop the packet based on mark probability criterion
266 */
267 static inline int
268 __rte_experimental
rte_pie_enqueue_nonempty(const struct rte_pie_config * pie_cfg,struct rte_pie * pie,uint32_t pkt_len,const uint64_t time)269 rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
270 struct rte_pie *pie,
271 uint32_t pkt_len,
272 const uint64_t time)
273 {
274 /* Check queue space against the tail drop threshold */
275 if (pie->qlen >= pie_cfg->tailq_th) {
276
277 pie->accu_prob = 0;
278 return 1;
279 }
280
281 if (pie->active) {
282 /* Update drop probability after certain interval */
283 if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval)
284 _calc_drop_probability(pie_cfg, pie, time);
285
286 /* Decide whether packet to be dropped or enqueued */
287 if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0)
288 return 2;
289 }
290
291 /* When queue occupancy is over a certain threshold, turn on PIE */
292 if ((pie->active == 0) &&
293 (pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
294 pie->active = 1;
295 pie->qdelay_old = 0;
296 pie->drop_prob = 0;
297 pie->in_measurement = 1;
298 pie->departed_bytes_count = 0;
299 pie->avg_dq_time = 0;
300 pie->last_measurement = time;
301 pie->burst_allowance = pie_cfg->max_burst;
302 pie->accu_prob = 0;
303 pie->start_measurement = time;
304 }
305
306 /* when queue has been idle for a while, turn off PIE and Reset counters */
307 if (pie->active == 1 &&
308 pie->qlen < (pie_cfg->tailq_th * 0.1)) {
309 pie->active = 0;
310 pie->in_measurement = 0;
311 }
312
313 /* Update PIE qlen parameter */
314 pie->qlen++;
315 pie->qlen_bytes += pkt_len;
316
317 /* No drop */
318 return 0;
319 }
320
321 /**
322 * @brief Decides if new packet should be enqueued or dropped
323 * Updates run time data and gives verdict whether to enqueue or drop the packet.
324 *
325 * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
326 * @param pie [in,out] data pointer to PIE runtime data
327 * @param qlen [in] queue length
328 * @param pkt_len [in] packet length in bytes
329 * @param time [in] current time stamp (measured in cpu cycles)
330 *
331 * @return Operation status
332 * @retval 0 enqueue the packet
333 * @retval 1 drop the packet based on drop probability criteria
334 */
335 static inline int
336 __rte_experimental
rte_pie_enqueue(const struct rte_pie_config * pie_cfg,struct rte_pie * pie,const unsigned int qlen,uint32_t pkt_len,const uint64_t time)337 rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
338 struct rte_pie *pie,
339 const unsigned int qlen,
340 uint32_t pkt_len,
341 const uint64_t time)
342 {
343 RTE_ASSERT(pie_cfg != NULL);
344 RTE_ASSERT(pie != NULL);
345
346 if (qlen != 0)
347 return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time);
348 else
349 return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
350 }
351
352 /**
353 * @brief PIE rate estimation method
354 * Called on each packet departure.
355 *
356 * @param pie [in] data pointer to PIE runtime data
357 * @param pkt_len [in] packet length in bytes
358 * @param time [in] current time stamp in cpu cycles
359 */
360 static inline void
361 __rte_experimental
rte_pie_dequeue(struct rte_pie * pie,uint32_t pkt_len,uint64_t time)362 rte_pie_dequeue(struct rte_pie *pie,
363 uint32_t pkt_len,
364 uint64_t time)
365 {
366 /* Dequeue rate estimation */
367 if (pie->in_measurement) {
368 pie->departed_bytes_count += pkt_len;
369
370 /* Start a new measurement cycle when enough packets */
371 if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
372 uint64_t dq_time = time - pie->start_measurement;
373
374 if (pie->avg_dq_time == 0)
375 pie->avg_dq_time = dq_time;
376 else
377 pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time
378 * (1 - RTE_DQ_WEIGHT);
379
380 pie->in_measurement = 0;
381 }
382 }
383
384 /* Start measurement cycle when enough data in the queue */
385 if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) {
386 pie->in_measurement = 1;
387 pie->start_measurement = time;
388 pie->departed_bytes_count = 0;
389 }
390 }
391
392 #ifdef __cplusplus
393 }
394 #endif
395
396 #endif /* __RTE_PIE_H_INCLUDED__ */
397