1*99a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
2*99a2dd95SBruce Richardson * Copyright(c) 2019 Intel Corporation
3*99a2dd95SBruce Richardson */
4*99a2dd95SBruce Richardson
5*99a2dd95SBruce Richardson #ifndef _RTE_STACK_LF_C11_H_
6*99a2dd95SBruce Richardson #define _RTE_STACK_LF_C11_H_
7*99a2dd95SBruce Richardson
8*99a2dd95SBruce Richardson #include <rte_branch_prediction.h>
9*99a2dd95SBruce Richardson #include <rte_prefetch.h>
10*99a2dd95SBruce Richardson
11*99a2dd95SBruce Richardson static __rte_always_inline unsigned int
__rte_stack_lf_count(struct rte_stack * s)12*99a2dd95SBruce Richardson __rte_stack_lf_count(struct rte_stack *s)
13*99a2dd95SBruce Richardson {
14*99a2dd95SBruce Richardson /* stack_lf_push() and stack_lf_pop() do not update the list's contents
15*99a2dd95SBruce Richardson * and stack_lf->len atomically, which can cause the list to appear
16*99a2dd95SBruce Richardson * shorter than it actually is if this function is called while other
17*99a2dd95SBruce Richardson * threads are modifying the list.
18*99a2dd95SBruce Richardson *
19*99a2dd95SBruce Richardson * However, given the inherently approximate nature of the get_count
20*99a2dd95SBruce Richardson * callback -- even if the list and its size were updated atomically,
21*99a2dd95SBruce Richardson * the size could change between when get_count executes and when the
22*99a2dd95SBruce Richardson * value is returned to the caller -- this is acceptable.
23*99a2dd95SBruce Richardson *
24*99a2dd95SBruce Richardson * The stack_lf->len updates are placed such that the list may appear to
25*99a2dd95SBruce Richardson * have fewer elements than it does, but will never appear to have more
26*99a2dd95SBruce Richardson * elements. If the mempool is near-empty to the point that this is a
27*99a2dd95SBruce Richardson * concern, the user should consider increasing the mempool size.
28*99a2dd95SBruce Richardson */
29*99a2dd95SBruce Richardson return (unsigned int)__atomic_load_n(&s->stack_lf.used.len,
30*99a2dd95SBruce Richardson __ATOMIC_RELAXED);
31*99a2dd95SBruce Richardson }
32*99a2dd95SBruce Richardson
33*99a2dd95SBruce Richardson static __rte_always_inline void
__rte_stack_lf_push_elems(struct rte_stack_lf_list * list,struct rte_stack_lf_elem * first,struct rte_stack_lf_elem * last,unsigned int num)34*99a2dd95SBruce Richardson __rte_stack_lf_push_elems(struct rte_stack_lf_list *list,
35*99a2dd95SBruce Richardson struct rte_stack_lf_elem *first,
36*99a2dd95SBruce Richardson struct rte_stack_lf_elem *last,
37*99a2dd95SBruce Richardson unsigned int num)
38*99a2dd95SBruce Richardson {
39*99a2dd95SBruce Richardson struct rte_stack_lf_head old_head;
40*99a2dd95SBruce Richardson int success;
41*99a2dd95SBruce Richardson
42*99a2dd95SBruce Richardson old_head = list->head;
43*99a2dd95SBruce Richardson
44*99a2dd95SBruce Richardson do {
45*99a2dd95SBruce Richardson struct rte_stack_lf_head new_head;
46*99a2dd95SBruce Richardson
47*99a2dd95SBruce Richardson /* Swing the top pointer to the first element in the list and
48*99a2dd95SBruce Richardson * make the last element point to the old top.
49*99a2dd95SBruce Richardson */
50*99a2dd95SBruce Richardson new_head.top = first;
51*99a2dd95SBruce Richardson new_head.cnt = old_head.cnt + 1;
52*99a2dd95SBruce Richardson
53*99a2dd95SBruce Richardson last->next = old_head.top;
54*99a2dd95SBruce Richardson
55*99a2dd95SBruce Richardson /* Use the release memmodel to ensure the writes to the LF LIFO
56*99a2dd95SBruce Richardson * elements are visible before the head pointer write.
57*99a2dd95SBruce Richardson */
58*99a2dd95SBruce Richardson success = rte_atomic128_cmp_exchange(
59*99a2dd95SBruce Richardson (rte_int128_t *)&list->head,
60*99a2dd95SBruce Richardson (rte_int128_t *)&old_head,
61*99a2dd95SBruce Richardson (rte_int128_t *)&new_head,
62*99a2dd95SBruce Richardson 1, __ATOMIC_RELEASE,
63*99a2dd95SBruce Richardson __ATOMIC_RELAXED);
64*99a2dd95SBruce Richardson } while (success == 0);
65*99a2dd95SBruce Richardson
66*99a2dd95SBruce Richardson /* Ensure the stack modifications are not reordered with respect
67*99a2dd95SBruce Richardson * to the LIFO len update.
68*99a2dd95SBruce Richardson */
69*99a2dd95SBruce Richardson __atomic_add_fetch(&list->len, num, __ATOMIC_RELEASE);
70*99a2dd95SBruce Richardson }
71*99a2dd95SBruce Richardson
72*99a2dd95SBruce Richardson static __rte_always_inline struct rte_stack_lf_elem *
__rte_stack_lf_pop_elems(struct rte_stack_lf_list * list,unsigned int num,void ** obj_table,struct rte_stack_lf_elem ** last)73*99a2dd95SBruce Richardson __rte_stack_lf_pop_elems(struct rte_stack_lf_list *list,
74*99a2dd95SBruce Richardson unsigned int num,
75*99a2dd95SBruce Richardson void **obj_table,
76*99a2dd95SBruce Richardson struct rte_stack_lf_elem **last)
77*99a2dd95SBruce Richardson {
78*99a2dd95SBruce Richardson struct rte_stack_lf_head old_head;
79*99a2dd95SBruce Richardson uint64_t len;
80*99a2dd95SBruce Richardson int success;
81*99a2dd95SBruce Richardson
82*99a2dd95SBruce Richardson /* Reserve num elements, if available */
83*99a2dd95SBruce Richardson len = __atomic_load_n(&list->len, __ATOMIC_RELAXED);
84*99a2dd95SBruce Richardson
85*99a2dd95SBruce Richardson while (1) {
86*99a2dd95SBruce Richardson /* Does the list contain enough elements? */
87*99a2dd95SBruce Richardson if (unlikely(len < num))
88*99a2dd95SBruce Richardson return NULL;
89*99a2dd95SBruce Richardson
90*99a2dd95SBruce Richardson /* len is updated on failure */
91*99a2dd95SBruce Richardson if (__atomic_compare_exchange_n(&list->len,
92*99a2dd95SBruce Richardson &len, len - num,
93*99a2dd95SBruce Richardson 1, __ATOMIC_ACQUIRE,
94*99a2dd95SBruce Richardson __ATOMIC_RELAXED))
95*99a2dd95SBruce Richardson break;
96*99a2dd95SBruce Richardson }
97*99a2dd95SBruce Richardson
98*99a2dd95SBruce Richardson /* If a torn read occurs, the CAS will fail and set old_head to the
99*99a2dd95SBruce Richardson * correct/latest value.
100*99a2dd95SBruce Richardson */
101*99a2dd95SBruce Richardson old_head = list->head;
102*99a2dd95SBruce Richardson
103*99a2dd95SBruce Richardson /* Pop num elements */
104*99a2dd95SBruce Richardson do {
105*99a2dd95SBruce Richardson struct rte_stack_lf_head new_head;
106*99a2dd95SBruce Richardson struct rte_stack_lf_elem *tmp;
107*99a2dd95SBruce Richardson unsigned int i;
108*99a2dd95SBruce Richardson
109*99a2dd95SBruce Richardson /* Use the acquire memmodel to ensure the reads to the LF LIFO
110*99a2dd95SBruce Richardson * elements are properly ordered with respect to the head
111*99a2dd95SBruce Richardson * pointer read.
112*99a2dd95SBruce Richardson */
113*99a2dd95SBruce Richardson __atomic_thread_fence(__ATOMIC_ACQUIRE);
114*99a2dd95SBruce Richardson
115*99a2dd95SBruce Richardson rte_prefetch0(old_head.top);
116*99a2dd95SBruce Richardson
117*99a2dd95SBruce Richardson tmp = old_head.top;
118*99a2dd95SBruce Richardson
119*99a2dd95SBruce Richardson /* Traverse the list to find the new head. A next pointer will
120*99a2dd95SBruce Richardson * either point to another element or NULL; if a thread
121*99a2dd95SBruce Richardson * encounters a pointer that has already been popped, the CAS
122*99a2dd95SBruce Richardson * will fail.
123*99a2dd95SBruce Richardson */
124*99a2dd95SBruce Richardson for (i = 0; i < num && tmp != NULL; i++) {
125*99a2dd95SBruce Richardson rte_prefetch0(tmp->next);
126*99a2dd95SBruce Richardson if (obj_table)
127*99a2dd95SBruce Richardson obj_table[i] = tmp->data;
128*99a2dd95SBruce Richardson if (last)
129*99a2dd95SBruce Richardson *last = tmp;
130*99a2dd95SBruce Richardson tmp = tmp->next;
131*99a2dd95SBruce Richardson }
132*99a2dd95SBruce Richardson
133*99a2dd95SBruce Richardson /* If NULL was encountered, the list was modified while
134*99a2dd95SBruce Richardson * traversing it. Retry.
135*99a2dd95SBruce Richardson */
136*99a2dd95SBruce Richardson if (i != num) {
137*99a2dd95SBruce Richardson old_head = list->head;
138*99a2dd95SBruce Richardson continue;
139*99a2dd95SBruce Richardson }
140*99a2dd95SBruce Richardson
141*99a2dd95SBruce Richardson new_head.top = tmp;
142*99a2dd95SBruce Richardson new_head.cnt = old_head.cnt + 1;
143*99a2dd95SBruce Richardson
144*99a2dd95SBruce Richardson /*
145*99a2dd95SBruce Richardson * The CAS should have release semantics to ensure that
146*99a2dd95SBruce Richardson * items are read before the head is updated.
147*99a2dd95SBruce Richardson * But this function is internal, and items are read
148*99a2dd95SBruce Richardson * only when __rte_stack_lf_pop calls this function to
149*99a2dd95SBruce Richardson * pop items from used list.
150*99a2dd95SBruce Richardson * Then, those items are pushed to the free list.
151*99a2dd95SBruce Richardson * Push uses a CAS store-release on head, which makes
152*99a2dd95SBruce Richardson * sure that items are read before they are pushed to
153*99a2dd95SBruce Richardson * the free list, without need for a CAS release here.
154*99a2dd95SBruce Richardson * This CAS could also be used to ensure that the new
155*99a2dd95SBruce Richardson * length is visible before the head update, but
156*99a2dd95SBruce Richardson * acquire semantics on the length update is enough.
157*99a2dd95SBruce Richardson */
158*99a2dd95SBruce Richardson success = rte_atomic128_cmp_exchange(
159*99a2dd95SBruce Richardson (rte_int128_t *)&list->head,
160*99a2dd95SBruce Richardson (rte_int128_t *)&old_head,
161*99a2dd95SBruce Richardson (rte_int128_t *)&new_head,
162*99a2dd95SBruce Richardson 0, __ATOMIC_RELAXED,
163*99a2dd95SBruce Richardson __ATOMIC_RELAXED);
164*99a2dd95SBruce Richardson } while (success == 0);
165*99a2dd95SBruce Richardson
166*99a2dd95SBruce Richardson return old_head.top;
167*99a2dd95SBruce Richardson }
168*99a2dd95SBruce Richardson
169*99a2dd95SBruce Richardson #endif /* _RTE_STACK_LF_C11_H_ */
170