1 /*
2 * CDDL HEADER START
3 *
4 * This file and its contents are supplied under the terms of the
5 * Common Development and Distribution License ("CDDL"), version 1.0.
6 * You may only use this file in accordance with the terms of version
7 * 1.0 of the CDDL.
8 *
9 * A full copy of the text of the CDDL should have accompanied this
10 * source. A copy of the CDDL is also available via the Internet at
11 * http://www.illumos.org/license/CDDL.
12 *
13 * CDDL HEADER END
14 */
15 /*
16 * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
17 */
18
19 #include <sys/zfs_context.h>
20 #include <sys/multilist.h>
21 #include <sys/trace_zfs.h>
22
23 /* needed for spa_get_random() */
24 #include <sys/spa.h>
25
26 /*
27 * This overrides the number of sublists in each multilist_t, which defaults
28 * to the number of CPUs in the system (see multilist_create()).
29 */
30 int zfs_multilist_num_sublists = 0;
31
32 /*
33 * Given the object contained on the list, return a pointer to the
34 * object's multilist_node_t structure it contains.
35 */
36 #ifdef ZFS_DEBUG
37 static multilist_node_t *
multilist_d2l(multilist_t * ml,void * obj)38 multilist_d2l(multilist_t *ml, void *obj)
39 {
40 return ((multilist_node_t *)((char *)obj + ml->ml_offset));
41 }
42 #endif
43
44 /*
45 * Initialize a new mutlilist using the parameters specified.
46 *
47 * - 'size' denotes the size of the structure containing the
48 * multilist_node_t.
49 * - 'offset' denotes the byte offset of the mutlilist_node_t within
50 * the structure that contains it.
51 * - 'num' specifies the number of internal sublists to create.
52 * - 'index_func' is used to determine which sublist to insert into
53 * when the multilist_insert() function is called; as well as which
54 * sublist to remove from when multilist_remove() is called. The
55 * requirements this function must meet, are the following:
56 *
57 * - It must always return the same value when called on the same
58 * object (to ensure the object is removed from the list it was
59 * inserted into).
60 *
61 * - It must return a value in the range [0, number of sublists).
62 * The multilist_get_num_sublists() function may be used to
63 * determine the number of sublists in the multilist.
64 *
65 * Also, in order to reduce internal contention between the sublists
66 * during insertion and removal, this function should choose evenly
67 * between all available sublists when inserting. This isn't a hard
68 * requirement, but a general rule of thumb in order to garner the
69 * best multi-threaded performance out of the data structure.
70 */
71 static multilist_t *
multilist_create_impl(size_t size,size_t offset,unsigned int num,multilist_sublist_index_func_t * index_func)72 multilist_create_impl(size_t size, size_t offset,
73 unsigned int num, multilist_sublist_index_func_t *index_func)
74 {
75 ASSERT3U(size, >, 0);
76 ASSERT3U(size, >=, offset + sizeof (multilist_node_t));
77 ASSERT3U(num, >, 0);
78 ASSERT3P(index_func, !=, NULL);
79
80 multilist_t *ml = kmem_alloc(sizeof (*ml), KM_SLEEP);
81 ml->ml_offset = offset;
82 ml->ml_num_sublists = num;
83 ml->ml_index_func = index_func;
84
85 ml->ml_sublists = kmem_zalloc(sizeof (multilist_sublist_t) *
86 ml->ml_num_sublists, KM_SLEEP);
87
88 ASSERT3P(ml->ml_sublists, !=, NULL);
89
90 for (int i = 0; i < ml->ml_num_sublists; i++) {
91 multilist_sublist_t *mls = &ml->ml_sublists[i];
92 mutex_init(&mls->mls_lock, NULL, MUTEX_NOLOCKDEP, NULL);
93 list_create(&mls->mls_list, size, offset);
94 }
95 return (ml);
96 }
97
98 /*
99 * Allocate a new multilist, using the default number of sublists (the number
100 * of CPUs, or at least 4, or the tunable zfs_multilist_num_sublists). Note
101 * that the multilists do not expand if more CPUs are hot-added. In that case,
102 * we will have less fanout than boot_ncpus, but we don't want to always
103 * reserve the RAM necessary to create the extra slots for additional CPUs up
104 * front, and dynamically adding them is a complex task.
105 */
106 multilist_t *
multilist_create(size_t size,size_t offset,multilist_sublist_index_func_t * index_func)107 multilist_create(size_t size, size_t offset,
108 multilist_sublist_index_func_t *index_func)
109 {
110 int num_sublists;
111
112 if (zfs_multilist_num_sublists > 0) {
113 num_sublists = zfs_multilist_num_sublists;
114 } else {
115 num_sublists = MAX(boot_ncpus, 4);
116 }
117
118 return (multilist_create_impl(size, offset, num_sublists, index_func));
119 }
120
121 /*
122 * Destroy the given multilist object, and free up any memory it holds.
123 */
124 void
multilist_destroy(multilist_t * ml)125 multilist_destroy(multilist_t *ml)
126 {
127 ASSERT(multilist_is_empty(ml));
128
129 for (int i = 0; i < ml->ml_num_sublists; i++) {
130 multilist_sublist_t *mls = &ml->ml_sublists[i];
131
132 ASSERT(list_is_empty(&mls->mls_list));
133
134 list_destroy(&mls->mls_list);
135 mutex_destroy(&mls->mls_lock);
136 }
137
138 ASSERT3P(ml->ml_sublists, !=, NULL);
139 kmem_free(ml->ml_sublists,
140 sizeof (multilist_sublist_t) * ml->ml_num_sublists);
141
142 ml->ml_num_sublists = 0;
143 ml->ml_offset = 0;
144 kmem_free(ml, sizeof (multilist_t));
145 }
146
147 /*
148 * Insert the given object into the multilist.
149 *
150 * This function will insert the object specified into the sublist
151 * determined using the function given at multilist creation time.
152 *
153 * The sublist locks are automatically acquired if not already held, to
154 * ensure consistency when inserting and removing from multiple threads.
155 */
156 void
multilist_insert(multilist_t * ml,void * obj)157 multilist_insert(multilist_t *ml, void *obj)
158 {
159 unsigned int sublist_idx = ml->ml_index_func(ml, obj);
160 multilist_sublist_t *mls;
161 boolean_t need_lock;
162
163 DTRACE_PROBE3(multilist__insert, multilist_t *, ml,
164 unsigned int, sublist_idx, void *, obj);
165
166 ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
167
168 mls = &ml->ml_sublists[sublist_idx];
169
170 /*
171 * Note: Callers may already hold the sublist lock by calling
172 * multilist_sublist_lock(). Here we rely on MUTEX_HELD()
173 * returning TRUE if and only if the current thread holds the
174 * lock. While it's a little ugly to make the lock recursive in
175 * this way, it works and allows the calling code to be much
176 * simpler -- otherwise it would have to pass around a flag
177 * indicating that it already has the lock.
178 */
179 need_lock = !MUTEX_HELD(&mls->mls_lock);
180
181 if (need_lock)
182 mutex_enter(&mls->mls_lock);
183
184 ASSERT(!multilist_link_active(multilist_d2l(ml, obj)));
185
186 multilist_sublist_insert_head(mls, obj);
187
188 if (need_lock)
189 mutex_exit(&mls->mls_lock);
190 }
191
192 /*
193 * Remove the given object from the multilist.
194 *
195 * This function will remove the object specified from the sublist
196 * determined using the function given at multilist creation time.
197 *
198 * The necessary sublist locks are automatically acquired, to ensure
199 * consistency when inserting and removing from multiple threads.
200 */
201 void
multilist_remove(multilist_t * ml,void * obj)202 multilist_remove(multilist_t *ml, void *obj)
203 {
204 unsigned int sublist_idx = ml->ml_index_func(ml, obj);
205 multilist_sublist_t *mls;
206 boolean_t need_lock;
207
208 DTRACE_PROBE3(multilist__remove, multilist_t *, ml,
209 unsigned int, sublist_idx, void *, obj);
210
211 ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
212
213 mls = &ml->ml_sublists[sublist_idx];
214 /* See comment in multilist_insert(). */
215 need_lock = !MUTEX_HELD(&mls->mls_lock);
216
217 if (need_lock)
218 mutex_enter(&mls->mls_lock);
219
220 ASSERT(multilist_link_active(multilist_d2l(ml, obj)));
221
222 multilist_sublist_remove(mls, obj);
223
224 if (need_lock)
225 mutex_exit(&mls->mls_lock);
226 }
227
228 /*
229 * Check to see if this multilist object is empty.
230 *
231 * This will return TRUE if it finds all of the sublists of this
232 * multilist to be empty, and FALSE otherwise. Each sublist lock will be
233 * automatically acquired as necessary.
234 *
235 * If concurrent insertions and removals are occurring, the semantics
236 * of this function become a little fuzzy. Instead of locking all
237 * sublists for the entire call time of the function, each sublist is
238 * only locked as it is individually checked for emptiness. Thus, it's
239 * possible for this function to return TRUE with non-empty sublists at
240 * the time the function returns. This would be due to another thread
241 * inserting into a given sublist, after that specific sublist was check
242 * and deemed empty, but before all sublists have been checked.
243 */
244 int
multilist_is_empty(multilist_t * ml)245 multilist_is_empty(multilist_t *ml)
246 {
247 for (int i = 0; i < ml->ml_num_sublists; i++) {
248 multilist_sublist_t *mls = &ml->ml_sublists[i];
249 /* See comment in multilist_insert(). */
250 boolean_t need_lock = !MUTEX_HELD(&mls->mls_lock);
251
252 if (need_lock)
253 mutex_enter(&mls->mls_lock);
254
255 if (!list_is_empty(&mls->mls_list)) {
256 if (need_lock)
257 mutex_exit(&mls->mls_lock);
258
259 return (FALSE);
260 }
261
262 if (need_lock)
263 mutex_exit(&mls->mls_lock);
264 }
265
266 return (TRUE);
267 }
268
269 /* Return the number of sublists composing this multilist */
270 unsigned int
multilist_get_num_sublists(multilist_t * ml)271 multilist_get_num_sublists(multilist_t *ml)
272 {
273 return (ml->ml_num_sublists);
274 }
275
276 /* Return a randomly selected, valid sublist index for this multilist */
277 unsigned int
multilist_get_random_index(multilist_t * ml)278 multilist_get_random_index(multilist_t *ml)
279 {
280 return (spa_get_random(ml->ml_num_sublists));
281 }
282
283 /* Lock and return the sublist specified at the given index */
284 multilist_sublist_t *
multilist_sublist_lock(multilist_t * ml,unsigned int sublist_idx)285 multilist_sublist_lock(multilist_t *ml, unsigned int sublist_idx)
286 {
287 multilist_sublist_t *mls;
288
289 ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
290 mls = &ml->ml_sublists[sublist_idx];
291 mutex_enter(&mls->mls_lock);
292
293 return (mls);
294 }
295
296 /* Lock and return the sublist that would be used to store the specified obj */
297 multilist_sublist_t *
multilist_sublist_lock_obj(multilist_t * ml,void * obj)298 multilist_sublist_lock_obj(multilist_t *ml, void *obj)
299 {
300 return (multilist_sublist_lock(ml, ml->ml_index_func(ml, obj)));
301 }
302
303 void
multilist_sublist_unlock(multilist_sublist_t * mls)304 multilist_sublist_unlock(multilist_sublist_t *mls)
305 {
306 mutex_exit(&mls->mls_lock);
307 }
308
309 /*
310 * We're allowing any object to be inserted into this specific sublist,
311 * but this can lead to trouble if multilist_remove() is called to
312 * remove this object. Specifically, if calling ml_index_func on this
313 * object returns an index for sublist different than what is passed as
314 * a parameter here, any call to multilist_remove() with this newly
315 * inserted object is undefined! (the call to multilist_remove() will
316 * remove the object from a list that it isn't contained in)
317 */
318 void
multilist_sublist_insert_head(multilist_sublist_t * mls,void * obj)319 multilist_sublist_insert_head(multilist_sublist_t *mls, void *obj)
320 {
321 ASSERT(MUTEX_HELD(&mls->mls_lock));
322 list_insert_head(&mls->mls_list, obj);
323 }
324
325 /* please see comment above multilist_sublist_insert_head */
326 void
multilist_sublist_insert_tail(multilist_sublist_t * mls,void * obj)327 multilist_sublist_insert_tail(multilist_sublist_t *mls, void *obj)
328 {
329 ASSERT(MUTEX_HELD(&mls->mls_lock));
330 list_insert_tail(&mls->mls_list, obj);
331 }
332
333 /*
334 * Move the object one element forward in the list.
335 *
336 * This function will move the given object forward in the list (towards
337 * the head) by one object. So, in essence, it will swap its position in
338 * the list with its "prev" pointer. If the given object is already at the
339 * head of the list, it cannot be moved forward any more than it already
340 * is, so no action is taken.
341 *
342 * NOTE: This function **must not** remove any object from the list other
343 * than the object given as the parameter. This is relied upon in
344 * arc_evict_state_impl().
345 */
346 void
multilist_sublist_move_forward(multilist_sublist_t * mls,void * obj)347 multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj)
348 {
349 void *prev = list_prev(&mls->mls_list, obj);
350
351 ASSERT(MUTEX_HELD(&mls->mls_lock));
352 ASSERT(!list_is_empty(&mls->mls_list));
353
354 /* 'obj' must be at the head of the list, nothing to do */
355 if (prev == NULL)
356 return;
357
358 list_remove(&mls->mls_list, obj);
359 list_insert_before(&mls->mls_list, prev, obj);
360 }
361
362 void
multilist_sublist_remove(multilist_sublist_t * mls,void * obj)363 multilist_sublist_remove(multilist_sublist_t *mls, void *obj)
364 {
365 ASSERT(MUTEX_HELD(&mls->mls_lock));
366 list_remove(&mls->mls_list, obj);
367 }
368
369 int
multilist_sublist_is_empty(multilist_sublist_t * mls)370 multilist_sublist_is_empty(multilist_sublist_t *mls)
371 {
372 ASSERT(MUTEX_HELD(&mls->mls_lock));
373 return (list_is_empty(&mls->mls_list));
374 }
375
376 int
multilist_sublist_is_empty_idx(multilist_t * ml,unsigned int sublist_idx)377 multilist_sublist_is_empty_idx(multilist_t *ml, unsigned int sublist_idx)
378 {
379 multilist_sublist_t *mls;
380 int empty;
381
382 ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
383 mls = &ml->ml_sublists[sublist_idx];
384 ASSERT(!MUTEX_HELD(&mls->mls_lock));
385 mutex_enter(&mls->mls_lock);
386 empty = list_is_empty(&mls->mls_list);
387 mutex_exit(&mls->mls_lock);
388 return (empty);
389 }
390
391 void *
multilist_sublist_head(multilist_sublist_t * mls)392 multilist_sublist_head(multilist_sublist_t *mls)
393 {
394 ASSERT(MUTEX_HELD(&mls->mls_lock));
395 return (list_head(&mls->mls_list));
396 }
397
398 void *
multilist_sublist_tail(multilist_sublist_t * mls)399 multilist_sublist_tail(multilist_sublist_t *mls)
400 {
401 ASSERT(MUTEX_HELD(&mls->mls_lock));
402 return (list_tail(&mls->mls_list));
403 }
404
405 void *
multilist_sublist_next(multilist_sublist_t * mls,void * obj)406 multilist_sublist_next(multilist_sublist_t *mls, void *obj)
407 {
408 ASSERT(MUTEX_HELD(&mls->mls_lock));
409 return (list_next(&mls->mls_list, obj));
410 }
411
412 void *
multilist_sublist_prev(multilist_sublist_t * mls,void * obj)413 multilist_sublist_prev(multilist_sublist_t *mls, void *obj)
414 {
415 ASSERT(MUTEX_HELD(&mls->mls_lock));
416 return (list_prev(&mls->mls_list, obj));
417 }
418
419 void
multilist_link_init(multilist_node_t * link)420 multilist_link_init(multilist_node_t *link)
421 {
422 list_link_init(link);
423 }
424
425 int
multilist_link_active(multilist_node_t * link)426 multilist_link_active(multilist_node_t *link)
427 {
428 return (list_link_active(link));
429 }
430
431 /* BEGIN CSTYLED */
432 ZFS_MODULE_PARAM(zfs, zfs_, multilist_num_sublists, INT, ZMOD_RW,
433 "Number of sublists used in each multilist");
434 /* END CSTYLED */
435