14418919fSjohnjiang /* SPDX-License-Identifier: BSD-3-Clause
24418919fSjohnjiang * Copyright(c) 2019 Intel Corporation
34418919fSjohnjiang */
44418919fSjohnjiang
54418919fSjohnjiang #ifndef _RTE_STACK_LF_GENERIC_H_
64418919fSjohnjiang #define _RTE_STACK_LF_GENERIC_H_
74418919fSjohnjiang
84418919fSjohnjiang #include <rte_branch_prediction.h>
94418919fSjohnjiang #include <rte_prefetch.h>
104418919fSjohnjiang
114418919fSjohnjiang static __rte_always_inline unsigned int
__rte_stack_lf_count(struct rte_stack * s)124418919fSjohnjiang __rte_stack_lf_count(struct rte_stack *s)
134418919fSjohnjiang {
144418919fSjohnjiang /* stack_lf_push() and stack_lf_pop() do not update the list's contents
154418919fSjohnjiang * and stack_lf->len atomically, which can cause the list to appear
164418919fSjohnjiang * shorter than it actually is if this function is called while other
174418919fSjohnjiang * threads are modifying the list.
184418919fSjohnjiang *
194418919fSjohnjiang * However, given the inherently approximate nature of the get_count
204418919fSjohnjiang * callback -- even if the list and its size were updated atomically,
214418919fSjohnjiang * the size could change between when get_count executes and when the
224418919fSjohnjiang * value is returned to the caller -- this is acceptable.
234418919fSjohnjiang *
244418919fSjohnjiang * The stack_lf->len updates are placed such that the list may appear to
254418919fSjohnjiang * have fewer elements than it does, but will never appear to have more
264418919fSjohnjiang * elements. If the mempool is near-empty to the point that this is a
274418919fSjohnjiang * concern, the user should consider increasing the mempool size.
284418919fSjohnjiang */
294418919fSjohnjiang return (unsigned int)rte_atomic64_read((rte_atomic64_t *)
304418919fSjohnjiang &s->stack_lf.used.len);
314418919fSjohnjiang }
324418919fSjohnjiang
334418919fSjohnjiang static __rte_always_inline void
__rte_stack_lf_push_elems(struct rte_stack_lf_list * list,struct rte_stack_lf_elem * first,struct rte_stack_lf_elem * last,unsigned int num)344418919fSjohnjiang __rte_stack_lf_push_elems(struct rte_stack_lf_list *list,
354418919fSjohnjiang struct rte_stack_lf_elem *first,
364418919fSjohnjiang struct rte_stack_lf_elem *last,
374418919fSjohnjiang unsigned int num)
384418919fSjohnjiang {
394418919fSjohnjiang struct rte_stack_lf_head old_head;
404418919fSjohnjiang int success;
414418919fSjohnjiang
424418919fSjohnjiang old_head = list->head;
434418919fSjohnjiang
444418919fSjohnjiang do {
454418919fSjohnjiang struct rte_stack_lf_head new_head;
464418919fSjohnjiang
474418919fSjohnjiang /* An acquire fence (or stronger) is needed for weak memory
484418919fSjohnjiang * models to establish a synchronized-with relationship between
494418919fSjohnjiang * the list->head load and store-release operations (as part of
504418919fSjohnjiang * the rte_atomic128_cmp_exchange()).
514418919fSjohnjiang */
524418919fSjohnjiang rte_smp_mb();
534418919fSjohnjiang
544418919fSjohnjiang /* Swing the top pointer to the first element in the list and
554418919fSjohnjiang * make the last element point to the old top.
564418919fSjohnjiang */
574418919fSjohnjiang new_head.top = first;
584418919fSjohnjiang new_head.cnt = old_head.cnt + 1;
594418919fSjohnjiang
604418919fSjohnjiang last->next = old_head.top;
614418919fSjohnjiang
624418919fSjohnjiang /* old_head is updated on failure */
634418919fSjohnjiang success = rte_atomic128_cmp_exchange(
644418919fSjohnjiang (rte_int128_t *)&list->head,
654418919fSjohnjiang (rte_int128_t *)&old_head,
664418919fSjohnjiang (rte_int128_t *)&new_head,
674418919fSjohnjiang 1, __ATOMIC_RELEASE,
684418919fSjohnjiang __ATOMIC_RELAXED);
694418919fSjohnjiang } while (success == 0);
704418919fSjohnjiang
714418919fSjohnjiang rte_atomic64_add((rte_atomic64_t *)&list->len, num);
724418919fSjohnjiang }
734418919fSjohnjiang
744418919fSjohnjiang static __rte_always_inline struct rte_stack_lf_elem *
__rte_stack_lf_pop_elems(struct rte_stack_lf_list * list,unsigned int num,void ** obj_table,struct rte_stack_lf_elem ** last)754418919fSjohnjiang __rte_stack_lf_pop_elems(struct rte_stack_lf_list *list,
764418919fSjohnjiang unsigned int num,
774418919fSjohnjiang void **obj_table,
784418919fSjohnjiang struct rte_stack_lf_elem **last)
794418919fSjohnjiang {
804418919fSjohnjiang struct rte_stack_lf_head old_head;
81*0c6bd470Sfengbojiang int success = 0;
824418919fSjohnjiang
834418919fSjohnjiang /* Reserve num elements, if available */
844418919fSjohnjiang while (1) {
854418919fSjohnjiang uint64_t len = rte_atomic64_read((rte_atomic64_t *)&list->len);
864418919fSjohnjiang
874418919fSjohnjiang /* Does the list contain enough elements? */
884418919fSjohnjiang if (unlikely(len < num))
894418919fSjohnjiang return NULL;
904418919fSjohnjiang
914418919fSjohnjiang if (rte_atomic64_cmpset((volatile uint64_t *)&list->len,
924418919fSjohnjiang len, len - num))
934418919fSjohnjiang break;
944418919fSjohnjiang }
954418919fSjohnjiang
964418919fSjohnjiang old_head = list->head;
974418919fSjohnjiang
984418919fSjohnjiang /* Pop num elements */
994418919fSjohnjiang do {
1004418919fSjohnjiang struct rte_stack_lf_head new_head;
1014418919fSjohnjiang struct rte_stack_lf_elem *tmp;
1024418919fSjohnjiang unsigned int i;
1034418919fSjohnjiang
1044418919fSjohnjiang /* An acquire fence (or stronger) is needed for weak memory
1054418919fSjohnjiang * models to ensure the LF LIFO element reads are properly
1064418919fSjohnjiang * ordered with respect to the head pointer read.
1074418919fSjohnjiang */
1084418919fSjohnjiang rte_smp_mb();
1094418919fSjohnjiang
1104418919fSjohnjiang rte_prefetch0(old_head.top);
1114418919fSjohnjiang
1124418919fSjohnjiang tmp = old_head.top;
1134418919fSjohnjiang
1144418919fSjohnjiang /* Traverse the list to find the new head. A next pointer will
1154418919fSjohnjiang * either point to another element or NULL; if a thread
1164418919fSjohnjiang * encounters a pointer that has already been popped, the CAS
1174418919fSjohnjiang * will fail.
1184418919fSjohnjiang */
1194418919fSjohnjiang for (i = 0; i < num && tmp != NULL; i++) {
1204418919fSjohnjiang rte_prefetch0(tmp->next);
1214418919fSjohnjiang if (obj_table)
1224418919fSjohnjiang obj_table[i] = tmp->data;
1234418919fSjohnjiang if (last)
1244418919fSjohnjiang *last = tmp;
1254418919fSjohnjiang tmp = tmp->next;
1264418919fSjohnjiang }
1274418919fSjohnjiang
1284418919fSjohnjiang /* If NULL was encountered, the list was modified while
1294418919fSjohnjiang * traversing it. Retry.
1304418919fSjohnjiang */
1314418919fSjohnjiang if (i != num)
1324418919fSjohnjiang continue;
1334418919fSjohnjiang
1344418919fSjohnjiang new_head.top = tmp;
1354418919fSjohnjiang new_head.cnt = old_head.cnt + 1;
1364418919fSjohnjiang
1374418919fSjohnjiang /* old_head is updated on failure */
1384418919fSjohnjiang success = rte_atomic128_cmp_exchange(
1394418919fSjohnjiang (rte_int128_t *)&list->head,
1404418919fSjohnjiang (rte_int128_t *)&old_head,
1414418919fSjohnjiang (rte_int128_t *)&new_head,
1424418919fSjohnjiang 1, __ATOMIC_RELEASE,
1434418919fSjohnjiang __ATOMIC_RELAXED);
1444418919fSjohnjiang } while (success == 0);
1454418919fSjohnjiang
1464418919fSjohnjiang return old_head.top;
1474418919fSjohnjiang }
1484418919fSjohnjiang
1494418919fSjohnjiang #endif /* _RTE_STACK_LF_GENERIC_H_ */
150