xref: /freebsd-12.1/contrib/ntp/lib/isc/rwlock.c (revision 2b15cb3d)
1 /*
2  * Copyright (C) 2004, 2005, 2007, 2009, 2011, 2012  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1998-2001, 2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 /* $Id$ */
19 
20 /*! \file */
21 
22 #include <config.h>
23 
24 #include <stddef.h>
25 
26 #include <isc/atomic.h>
27 #include <isc/magic.h>
28 #include <isc/msgs.h>
29 #include <isc/platform.h>
30 #include <isc/rwlock.h>
31 #include <isc/util.h>
32 
33 #define RWLOCK_MAGIC		ISC_MAGIC('R', 'W', 'L', 'k')
34 #define VALID_RWLOCK(rwl)	ISC_MAGIC_VALID(rwl, RWLOCK_MAGIC)
35 
36 #ifdef ISC_PLATFORM_USETHREADS
37 
38 #ifndef RWLOCK_DEFAULT_READ_QUOTA
39 #define RWLOCK_DEFAULT_READ_QUOTA 4
40 #endif
41 
42 #ifndef RWLOCK_DEFAULT_WRITE_QUOTA
43 #define RWLOCK_DEFAULT_WRITE_QUOTA 4
44 #endif
45 
46 #ifdef ISC_RWLOCK_TRACE
47 #include <stdio.h>		/* Required for fprintf/stderr. */
48 #include <isc/thread.h>		/* Required for isc_thread_self(). */
49 
50 static void
print_lock(const char * operation,isc_rwlock_t * rwl,isc_rwlocktype_t type)51 print_lock(const char *operation, isc_rwlock_t *rwl, isc_rwlocktype_t type) {
52 	fprintf(stderr,
53 		isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
54 			       ISC_MSG_PRINTLOCK,
55 			       "rwlock %p thread %lu %s(%s): %s, %u active, "
56 			       "%u granted, %u rwaiting, %u wwaiting\n"),
57 		rwl, isc_thread_self(), operation,
58 		(type == isc_rwlocktype_read ?
59 		 isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
60 				ISC_MSG_READ, "read") :
61 		 isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
62 				ISC_MSG_WRITE, "write")),
63 		(rwl->type == isc_rwlocktype_read ?
64 		 isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
65 				ISC_MSG_READING, "reading") :
66 		 isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
67 				ISC_MSG_WRITING, "writing")),
68 		rwl->active, rwl->granted, rwl->readers_waiting,
69 		rwl->writers_waiting);
70 }
71 #endif
72 
73 isc_result_t
isc_rwlock_init(isc_rwlock_t * rwl,unsigned int read_quota,unsigned int write_quota)74 isc_rwlock_init(isc_rwlock_t *rwl, unsigned int read_quota,
75 		unsigned int write_quota)
76 {
77 	isc_result_t result;
78 
79 	REQUIRE(rwl != NULL);
80 
81 	/*
82 	 * In case there's trouble initializing, we zero magic now.  If all
83 	 * goes well, we'll set it to RWLOCK_MAGIC.
84 	 */
85 	rwl->magic = 0;
86 
87 #if defined(ISC_PLATFORM_HAVEXADD) && defined(ISC_PLATFORM_HAVECMPXCHG)
88 	rwl->write_requests = 0;
89 	rwl->write_completions = 0;
90 	rwl->cnt_and_flag = 0;
91 	rwl->readers_waiting = 0;
92 	rwl->write_granted = 0;
93 	if (read_quota != 0) {
94 		UNEXPECTED_ERROR(__FILE__, __LINE__,
95 				 "read quota is not supported");
96 	}
97 	if (write_quota == 0)
98 		write_quota = RWLOCK_DEFAULT_WRITE_QUOTA;
99 	rwl->write_quota = write_quota;
100 #else
101 	rwl->type = isc_rwlocktype_read;
102 	rwl->original = isc_rwlocktype_none;
103 	rwl->active = 0;
104 	rwl->granted = 0;
105 	rwl->readers_waiting = 0;
106 	rwl->writers_waiting = 0;
107 	if (read_quota == 0)
108 		read_quota = RWLOCK_DEFAULT_READ_QUOTA;
109 	rwl->read_quota = read_quota;
110 	if (write_quota == 0)
111 		write_quota = RWLOCK_DEFAULT_WRITE_QUOTA;
112 	rwl->write_quota = write_quota;
113 #endif
114 
115 	result = isc_mutex_init(&rwl->lock);
116 	if (result != ISC_R_SUCCESS)
117 		return (result);
118 
119 	result = isc_condition_init(&rwl->readable);
120 	if (result != ISC_R_SUCCESS) {
121 		UNEXPECTED_ERROR(__FILE__, __LINE__,
122 				 "isc_condition_init(readable) %s: %s",
123 				 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
124 						ISC_MSG_FAILED, "failed"),
125 				 isc_result_totext(result));
126 		result = ISC_R_UNEXPECTED;
127 		goto destroy_lock;
128 	}
129 	result = isc_condition_init(&rwl->writeable);
130 	if (result != ISC_R_SUCCESS) {
131 		UNEXPECTED_ERROR(__FILE__, __LINE__,
132 				 "isc_condition_init(writeable) %s: %s",
133 				 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
134 						ISC_MSG_FAILED, "failed"),
135 				 isc_result_totext(result));
136 		result = ISC_R_UNEXPECTED;
137 		goto destroy_rcond;
138 	}
139 
140 	rwl->magic = RWLOCK_MAGIC;
141 
142 	return (ISC_R_SUCCESS);
143 
144   destroy_rcond:
145 	(void)isc_condition_destroy(&rwl->readable);
146   destroy_lock:
147 	DESTROYLOCK(&rwl->lock);
148 
149 	return (result);
150 }
151 
152 void
isc_rwlock_destroy(isc_rwlock_t * rwl)153 isc_rwlock_destroy(isc_rwlock_t *rwl) {
154 	REQUIRE(VALID_RWLOCK(rwl));
155 
156 #if defined(ISC_PLATFORM_HAVEXADD) && defined(ISC_PLATFORM_HAVECMPXCHG)
157 	REQUIRE(rwl->write_requests == rwl->write_completions &&
158 		rwl->cnt_and_flag == 0 && rwl->readers_waiting == 0);
159 #else
160 	LOCK(&rwl->lock);
161 	REQUIRE(rwl->active == 0 &&
162 		rwl->readers_waiting == 0 &&
163 		rwl->writers_waiting == 0);
164 	UNLOCK(&rwl->lock);
165 #endif
166 
167 	rwl->magic = 0;
168 	(void)isc_condition_destroy(&rwl->readable);
169 	(void)isc_condition_destroy(&rwl->writeable);
170 	DESTROYLOCK(&rwl->lock);
171 }
172 
173 #if defined(ISC_PLATFORM_HAVEXADD) && defined(ISC_PLATFORM_HAVECMPXCHG)
174 
175 /*
176  * When some architecture-dependent atomic operations are available,
177  * rwlock can be more efficient than the generic algorithm defined below.
178  * The basic algorithm is described in the following URL:
179  *   http://www.cs.rochester.edu/u/scott/synchronization/pseudocode/rw.html
180  *
181  * The key is to use the following integer variables modified atomically:
182  *   write_requests, write_completions, and cnt_and_flag.
183  *
184  * write_requests and write_completions act as a waiting queue for writers
185  * in order to ensure the FIFO order.  Both variables begin with the initial
186  * value of 0.  When a new writer tries to get a write lock, it increments
187  * write_requests and gets the previous value of the variable as a "ticket".
188  * When write_completions reaches the ticket number, the new writer can start
189  * writing.  When the writer completes its work, it increments
190  * write_completions so that another new writer can start working.  If the
191  * write_requests is not equal to write_completions, it means a writer is now
192  * working or waiting.  In this case, a new readers cannot start reading, or
193  * in other words, this algorithm basically prefers writers.
194  *
195  * cnt_and_flag is a "lock" shared by all readers and writers.  This integer
196  * variable is a kind of structure with two members: writer_flag (1 bit) and
197  * reader_count (31 bits).  The writer_flag shows whether a writer is working,
198  * and the reader_count shows the number of readers currently working or almost
199  * ready for working.  A writer who has the current "ticket" tries to get the
200  * lock by exclusively setting the writer_flag to 1, provided that the whole
201  * 32-bit is 0 (meaning no readers or writers working).  On the other hand,
202  * a new reader tries to increment the "reader_count" field provided that
203  * the writer_flag is 0 (meaning there is no writer working).
204  *
205  * If some of the above operations fail, the reader or the writer sleeps
206  * until the related condition changes.  When a working reader or writer
207  * completes its work, some readers or writers are sleeping, and the condition
208  * that suspended the reader or writer has changed, it wakes up the sleeping
209  * readers or writers.
210  *
211  * As already noted, this algorithm basically prefers writers.  In order to
212  * prevent readers from starving, however, the algorithm also introduces the
213  * "writer quota" (Q).  When Q consecutive writers have completed their work,
214  * suspending readers, the last writer will wake up the readers, even if a new
215  * writer is waiting.
216  *
217  * Implementation specific note: due to the combination of atomic operations
218  * and a mutex lock, ordering between the atomic operation and locks can be
219  * very sensitive in some cases.  In particular, it is generally very important
220  * to check the atomic variable that requires a reader or writer to sleep after
221  * locking the mutex and before actually sleeping; otherwise, it could be very
222  * likely to cause a deadlock.  For example, assume "var" is a variable
223  * atomically modified, then the corresponding code would be:
224  *	if (var == need_sleep) {
225  *		LOCK(lock);
226  *		if (var == need_sleep)
227  *			WAIT(cond, lock);
228  *		UNLOCK(lock);
229  *	}
230  * The second check is important, since "var" is protected by the atomic
231  * operation, not by the mutex, and can be changed just before sleeping.
232  * (The first "if" could be omitted, but this is also important in order to
233  * make the code efficient by avoiding the use of the mutex unless it is
234  * really necessary.)
235  */
236 
237 #define WRITER_ACTIVE	0x1
238 #define READER_INCR	0x2
239 
240 isc_result_t
isc_rwlock_lock(isc_rwlock_t * rwl,isc_rwlocktype_t type)241 isc_rwlock_lock(isc_rwlock_t *rwl, isc_rwlocktype_t type) {
242 	isc_int32_t cntflag;
243 
244 	REQUIRE(VALID_RWLOCK(rwl));
245 
246 #ifdef ISC_RWLOCK_TRACE
247 	print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
248 				  ISC_MSG_PRELOCK, "prelock"), rwl, type);
249 #endif
250 
251 	if (type == isc_rwlocktype_read) {
252 		if (rwl->write_requests != rwl->write_completions) {
253 			/* there is a waiting or active writer */
254 			LOCK(&rwl->lock);
255 			if (rwl->write_requests != rwl->write_completions) {
256 				rwl->readers_waiting++;
257 				WAIT(&rwl->readable, &rwl->lock);
258 				rwl->readers_waiting--;
259 			}
260 			UNLOCK(&rwl->lock);
261 		}
262 
263 		cntflag = isc_atomic_xadd(&rwl->cnt_and_flag, READER_INCR);
264 		POST(cntflag);
265 		while (1) {
266 			if ((rwl->cnt_and_flag & WRITER_ACTIVE) == 0)
267 				break;
268 
269 			/* A writer is still working */
270 			LOCK(&rwl->lock);
271 			rwl->readers_waiting++;
272 			if ((rwl->cnt_and_flag & WRITER_ACTIVE) != 0)
273 				WAIT(&rwl->readable, &rwl->lock);
274 			rwl->readers_waiting--;
275 			UNLOCK(&rwl->lock);
276 
277 			/*
278 			 * Typically, the reader should be able to get a lock
279 			 * at this stage:
280 			 *   (1) there should have been no pending writer when
281 			 *       the reader was trying to increment the
282 			 *       counter; otherwise, the writer should be in
283 			 *       the waiting queue, preventing the reader from
284 			 *       proceeding to this point.
285 			 *   (2) once the reader increments the counter, no
286 			 *       more writer can get a lock.
287 			 * Still, it is possible another writer can work at
288 			 * this point, e.g. in the following scenario:
289 			 *   A previous writer unlocks the writer lock.
290 			 *   This reader proceeds to point (1).
291 			 *   A new writer appears, and gets a new lock before
292 			 *   the reader increments the counter.
293 			 *   The reader then increments the counter.
294 			 *   The previous writer notices there is a waiting
295 			 *   reader who is almost ready, and wakes it up.
296 			 * So, the reader needs to confirm whether it can now
297 			 * read explicitly (thus we loop).  Note that this is
298 			 * not an infinite process, since the reader has
299 			 * incremented the counter at this point.
300 			 */
301 		}
302 
303 		/*
304 		 * If we are temporarily preferred to writers due to the writer
305 		 * quota, reset the condition (race among readers doesn't
306 		 * matter).
307 		 */
308 		rwl->write_granted = 0;
309 	} else {
310 		isc_int32_t prev_writer;
311 
312 		/* enter the waiting queue, and wait for our turn */
313 		prev_writer = isc_atomic_xadd(&rwl->write_requests, 1);
314 		while (rwl->write_completions != prev_writer) {
315 			LOCK(&rwl->lock);
316 			if (rwl->write_completions != prev_writer) {
317 				WAIT(&rwl->writeable, &rwl->lock);
318 				UNLOCK(&rwl->lock);
319 				continue;
320 			}
321 			UNLOCK(&rwl->lock);
322 			break;
323 		}
324 
325 		while (1) {
326 			cntflag = isc_atomic_cmpxchg(&rwl->cnt_and_flag, 0,
327 						     WRITER_ACTIVE);
328 			if (cntflag == 0)
329 				break;
330 
331 			/* Another active reader or writer is working. */
332 			LOCK(&rwl->lock);
333 			if (rwl->cnt_and_flag != 0)
334 				WAIT(&rwl->writeable, &rwl->lock);
335 			UNLOCK(&rwl->lock);
336 		}
337 
338 		INSIST((rwl->cnt_and_flag & WRITER_ACTIVE) != 0);
339 		rwl->write_granted++;
340 	}
341 
342 #ifdef ISC_RWLOCK_TRACE
343 	print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
344 				  ISC_MSG_POSTLOCK, "postlock"), rwl, type);
345 #endif
346 
347 	return (ISC_R_SUCCESS);
348 }
349 
350 isc_result_t
isc_rwlock_trylock(isc_rwlock_t * rwl,isc_rwlocktype_t type)351 isc_rwlock_trylock(isc_rwlock_t *rwl, isc_rwlocktype_t type) {
352 	isc_int32_t cntflag;
353 
354 	REQUIRE(VALID_RWLOCK(rwl));
355 
356 #ifdef ISC_RWLOCK_TRACE
357 	print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
358 				  ISC_MSG_PRELOCK, "prelock"), rwl, type);
359 #endif
360 
361 	if (type == isc_rwlocktype_read) {
362 		/* If a writer is waiting or working, we fail. */
363 		if (rwl->write_requests != rwl->write_completions)
364 			return (ISC_R_LOCKBUSY);
365 
366 		/* Otherwise, be ready for reading. */
367 		cntflag = isc_atomic_xadd(&rwl->cnt_and_flag, READER_INCR);
368 		if ((cntflag & WRITER_ACTIVE) != 0) {
369 			/*
370 			 * A writer is working.  We lose, and cancel the read
371 			 * request.
372 			 */
373 			cntflag = isc_atomic_xadd(&rwl->cnt_and_flag,
374 						  -READER_INCR);
375 			/*
376 			 * If no other readers are waiting and we've suspended
377 			 * new writers in this short period, wake them up.
378 			 */
379 			if (cntflag == READER_INCR &&
380 			    rwl->write_completions != rwl->write_requests) {
381 				LOCK(&rwl->lock);
382 				BROADCAST(&rwl->writeable);
383 				UNLOCK(&rwl->lock);
384 			}
385 
386 			return (ISC_R_LOCKBUSY);
387 		}
388 	} else {
389 		/* Try locking without entering the waiting queue. */
390 		cntflag = isc_atomic_cmpxchg(&rwl->cnt_and_flag, 0,
391 					     WRITER_ACTIVE);
392 		if (cntflag != 0)
393 			return (ISC_R_LOCKBUSY);
394 
395 		/*
396 		 * XXXJT: jump into the queue, possibly breaking the writer
397 		 * order.
398 		 */
399 		(void)isc_atomic_xadd(&rwl->write_completions, -1);
400 
401 		rwl->write_granted++;
402 	}
403 
404 #ifdef ISC_RWLOCK_TRACE
405 	print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
406 				  ISC_MSG_POSTLOCK, "postlock"), rwl, type);
407 #endif
408 
409 	return (ISC_R_SUCCESS);
410 }
411 
412 isc_result_t
isc_rwlock_tryupgrade(isc_rwlock_t * rwl)413 isc_rwlock_tryupgrade(isc_rwlock_t *rwl) {
414 	isc_int32_t prevcnt;
415 
416 	REQUIRE(VALID_RWLOCK(rwl));
417 
418 	/* Try to acquire write access. */
419 	prevcnt = isc_atomic_cmpxchg(&rwl->cnt_and_flag,
420 				     READER_INCR, WRITER_ACTIVE);
421 	/*
422 	 * There must have been no writer, and there must have been at least
423 	 * one reader.
424 	 */
425 	INSIST((prevcnt & WRITER_ACTIVE) == 0 &&
426 	       (prevcnt & ~WRITER_ACTIVE) != 0);
427 
428 	if (prevcnt == READER_INCR) {
429 		/*
430 		 * We are the only reader and have been upgraded.
431 		 * Now jump into the head of the writer waiting queue.
432 		 */
433 		(void)isc_atomic_xadd(&rwl->write_completions, -1);
434 	} else
435 		return (ISC_R_LOCKBUSY);
436 
437 	return (ISC_R_SUCCESS);
438 
439 }
440 
441 void
isc_rwlock_downgrade(isc_rwlock_t * rwl)442 isc_rwlock_downgrade(isc_rwlock_t *rwl) {
443 	isc_int32_t prev_readers;
444 
445 	REQUIRE(VALID_RWLOCK(rwl));
446 
447 	/* Become an active reader. */
448 	prev_readers = isc_atomic_xadd(&rwl->cnt_and_flag, READER_INCR);
449 	/* We must have been a writer. */
450 	INSIST((prev_readers & WRITER_ACTIVE) != 0);
451 
452 	/* Complete write */
453 	(void)isc_atomic_xadd(&rwl->cnt_and_flag, -WRITER_ACTIVE);
454 	(void)isc_atomic_xadd(&rwl->write_completions, 1);
455 
456 	/* Resume other readers */
457 	LOCK(&rwl->lock);
458 	if (rwl->readers_waiting > 0)
459 		BROADCAST(&rwl->readable);
460 	UNLOCK(&rwl->lock);
461 }
462 
463 isc_result_t
isc_rwlock_unlock(isc_rwlock_t * rwl,isc_rwlocktype_t type)464 isc_rwlock_unlock(isc_rwlock_t *rwl, isc_rwlocktype_t type) {
465 	isc_int32_t prev_cnt;
466 
467 	REQUIRE(VALID_RWLOCK(rwl));
468 
469 #ifdef ISC_RWLOCK_TRACE
470 	print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
471 				  ISC_MSG_PREUNLOCK, "preunlock"), rwl, type);
472 #endif
473 
474 	if (type == isc_rwlocktype_read) {
475 		prev_cnt = isc_atomic_xadd(&rwl->cnt_and_flag, -READER_INCR);
476 
477 		/*
478 		 * If we're the last reader and any writers are waiting, wake
479 		 * them up.  We need to wake up all of them to ensure the
480 		 * FIFO order.
481 		 */
482 		if (prev_cnt == READER_INCR &&
483 		    rwl->write_completions != rwl->write_requests) {
484 			LOCK(&rwl->lock);
485 			BROADCAST(&rwl->writeable);
486 			UNLOCK(&rwl->lock);
487 		}
488 	} else {
489 		isc_boolean_t wakeup_writers = ISC_TRUE;
490 
491 		/*
492 		 * Reset the flag, and (implicitly) tell other writers
493 		 * we are done.
494 		 */
495 		(void)isc_atomic_xadd(&rwl->cnt_and_flag, -WRITER_ACTIVE);
496 		(void)isc_atomic_xadd(&rwl->write_completions, 1);
497 
498 		if (rwl->write_granted >= rwl->write_quota ||
499 		    rwl->write_requests == rwl->write_completions ||
500 		    (rwl->cnt_and_flag & ~WRITER_ACTIVE) != 0) {
501 			/*
502 			 * We have passed the write quota, no writer is
503 			 * waiting, or some readers are almost ready, pending
504 			 * possible writers.  Note that the last case can
505 			 * happen even if write_requests != write_completions
506 			 * (which means a new writer in the queue), so we need
507 			 * to catch the case explicitly.
508 			 */
509 			LOCK(&rwl->lock);
510 			if (rwl->readers_waiting > 0) {
511 				wakeup_writers = ISC_FALSE;
512 				BROADCAST(&rwl->readable);
513 			}
514 			UNLOCK(&rwl->lock);
515 		}
516 
517 		if (rwl->write_requests != rwl->write_completions &&
518 		    wakeup_writers) {
519 			LOCK(&rwl->lock);
520 			BROADCAST(&rwl->writeable);
521 			UNLOCK(&rwl->lock);
522 		}
523 	}
524 
525 #ifdef ISC_RWLOCK_TRACE
526 	print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
527 				  ISC_MSG_POSTUNLOCK, "postunlock"),
528 		   rwl, type);
529 #endif
530 
531 	return (ISC_R_SUCCESS);
532 }
533 
534 #else /* ISC_PLATFORM_HAVEXADD && ISC_PLATFORM_HAVECMPXCHG */
535 
536 static isc_result_t
doit(isc_rwlock_t * rwl,isc_rwlocktype_t type,isc_boolean_t nonblock)537 doit(isc_rwlock_t *rwl, isc_rwlocktype_t type, isc_boolean_t nonblock) {
538 	isc_boolean_t skip = ISC_FALSE;
539 	isc_boolean_t done = ISC_FALSE;
540 	isc_result_t result = ISC_R_SUCCESS;
541 
542 	REQUIRE(VALID_RWLOCK(rwl));
543 
544 	LOCK(&rwl->lock);
545 
546 #ifdef ISC_RWLOCK_TRACE
547 	print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
548 				  ISC_MSG_PRELOCK, "prelock"), rwl, type);
549 #endif
550 
551 	if (type == isc_rwlocktype_read) {
552 		if (rwl->readers_waiting != 0)
553 			skip = ISC_TRUE;
554 		while (!done) {
555 			if (!skip &&
556 			    ((rwl->active == 0 ||
557 			      (rwl->type == isc_rwlocktype_read &&
558 			       (rwl->writers_waiting == 0 ||
559 				rwl->granted < rwl->read_quota)))))
560 			{
561 				rwl->type = isc_rwlocktype_read;
562 				rwl->active++;
563 				rwl->granted++;
564 				done = ISC_TRUE;
565 			} else if (nonblock) {
566 				result = ISC_R_LOCKBUSY;
567 				done = ISC_TRUE;
568 			} else {
569 				skip = ISC_FALSE;
570 				rwl->readers_waiting++;
571 				WAIT(&rwl->readable, &rwl->lock);
572 				rwl->readers_waiting--;
573 			}
574 		}
575 	} else {
576 		if (rwl->writers_waiting != 0)
577 			skip = ISC_TRUE;
578 		while (!done) {
579 			if (!skip && rwl->active == 0) {
580 				rwl->type = isc_rwlocktype_write;
581 				rwl->active = 1;
582 				rwl->granted++;
583 				done = ISC_TRUE;
584 			} else if (nonblock) {
585 				result = ISC_R_LOCKBUSY;
586 				done = ISC_TRUE;
587 			} else {
588 				skip = ISC_FALSE;
589 				rwl->writers_waiting++;
590 				WAIT(&rwl->writeable, &rwl->lock);
591 				rwl->writers_waiting--;
592 			}
593 		}
594 	}
595 
596 #ifdef ISC_RWLOCK_TRACE
597 	print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
598 				  ISC_MSG_POSTLOCK, "postlock"), rwl, type);
599 #endif
600 
601 	UNLOCK(&rwl->lock);
602 
603 	return (result);
604 }
605 
606 isc_result_t
isc_rwlock_lock(isc_rwlock_t * rwl,isc_rwlocktype_t type)607 isc_rwlock_lock(isc_rwlock_t *rwl, isc_rwlocktype_t type) {
608 	return (doit(rwl, type, ISC_FALSE));
609 }
610 
611 isc_result_t
isc_rwlock_trylock(isc_rwlock_t * rwl,isc_rwlocktype_t type)612 isc_rwlock_trylock(isc_rwlock_t *rwl, isc_rwlocktype_t type) {
613 	return (doit(rwl, type, ISC_TRUE));
614 }
615 
616 isc_result_t
isc_rwlock_tryupgrade(isc_rwlock_t * rwl)617 isc_rwlock_tryupgrade(isc_rwlock_t *rwl) {
618 	isc_result_t result = ISC_R_SUCCESS;
619 
620 	REQUIRE(VALID_RWLOCK(rwl));
621 	LOCK(&rwl->lock);
622 	REQUIRE(rwl->type == isc_rwlocktype_read);
623 	REQUIRE(rwl->active != 0);
624 
625 	/* If we are the only reader then succeed. */
626 	if (rwl->active == 1) {
627 		rwl->original = (rwl->original == isc_rwlocktype_none) ?
628 				isc_rwlocktype_read : isc_rwlocktype_none;
629 		rwl->type = isc_rwlocktype_write;
630 	} else
631 		result = ISC_R_LOCKBUSY;
632 
633 	UNLOCK(&rwl->lock);
634 	return (result);
635 }
636 
637 void
isc_rwlock_downgrade(isc_rwlock_t * rwl)638 isc_rwlock_downgrade(isc_rwlock_t *rwl) {
639 
640 	REQUIRE(VALID_RWLOCK(rwl));
641 	LOCK(&rwl->lock);
642 	REQUIRE(rwl->type == isc_rwlocktype_write);
643 	REQUIRE(rwl->active == 1);
644 
645 	rwl->type = isc_rwlocktype_read;
646 	rwl->original = (rwl->original == isc_rwlocktype_none) ?
647 			isc_rwlocktype_write : isc_rwlocktype_none;
648 	/*
649 	 * Resume processing any read request that were blocked when
650 	 * we upgraded.
651 	 */
652 	if (rwl->original == isc_rwlocktype_none &&
653 	    (rwl->writers_waiting == 0 || rwl->granted < rwl->read_quota) &&
654 	    rwl->readers_waiting > 0)
655 		BROADCAST(&rwl->readable);
656 
657 	UNLOCK(&rwl->lock);
658 }
659 
660 isc_result_t
isc_rwlock_unlock(isc_rwlock_t * rwl,isc_rwlocktype_t type)661 isc_rwlock_unlock(isc_rwlock_t *rwl, isc_rwlocktype_t type) {
662 
663 	REQUIRE(VALID_RWLOCK(rwl));
664 	LOCK(&rwl->lock);
665 	REQUIRE(rwl->type == type);
666 
667 	UNUSED(type);
668 
669 #ifdef ISC_RWLOCK_TRACE
670 	print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
671 				  ISC_MSG_PREUNLOCK, "preunlock"), rwl, type);
672 #endif
673 
674 	INSIST(rwl->active > 0);
675 	rwl->active--;
676 	if (rwl->active == 0) {
677 		if (rwl->original != isc_rwlocktype_none) {
678 			rwl->type = rwl->original;
679 			rwl->original = isc_rwlocktype_none;
680 		}
681 		if (rwl->type == isc_rwlocktype_read) {
682 			rwl->granted = 0;
683 			if (rwl->writers_waiting > 0) {
684 				rwl->type = isc_rwlocktype_write;
685 				SIGNAL(&rwl->writeable);
686 			} else if (rwl->readers_waiting > 0) {
687 				/* Does this case ever happen? */
688 				BROADCAST(&rwl->readable);
689 			}
690 		} else {
691 			if (rwl->readers_waiting > 0) {
692 				if (rwl->writers_waiting > 0 &&
693 				    rwl->granted < rwl->write_quota) {
694 					SIGNAL(&rwl->writeable);
695 				} else {
696 					rwl->granted = 0;
697 					rwl->type = isc_rwlocktype_read;
698 					BROADCAST(&rwl->readable);
699 				}
700 			} else if (rwl->writers_waiting > 0) {
701 				rwl->granted = 0;
702 				SIGNAL(&rwl->writeable);
703 			} else {
704 				rwl->granted = 0;
705 			}
706 		}
707 	}
708 	INSIST(rwl->original == isc_rwlocktype_none);
709 
710 #ifdef ISC_RWLOCK_TRACE
711 	print_lock(isc_msgcat_get(isc_msgcat, ISC_MSGSET_RWLOCK,
712 				  ISC_MSG_POSTUNLOCK, "postunlock"),
713 		   rwl, type);
714 #endif
715 
716 	UNLOCK(&rwl->lock);
717 
718 	return (ISC_R_SUCCESS);
719 }
720 
721 #endif /* ISC_PLATFORM_HAVEXADD && ISC_PLATFORM_HAVECMPXCHG */
722 #else /* ISC_PLATFORM_USETHREADS */
723 
724 isc_result_t
isc_rwlock_init(isc_rwlock_t * rwl,unsigned int read_quota,unsigned int write_quota)725 isc_rwlock_init(isc_rwlock_t *rwl, unsigned int read_quota,
726 		unsigned int write_quota)
727 {
728 	REQUIRE(rwl != NULL);
729 
730 	UNUSED(read_quota);
731 	UNUSED(write_quota);
732 
733 	rwl->type = isc_rwlocktype_read;
734 	rwl->active = 0;
735 	rwl->magic = RWLOCK_MAGIC;
736 
737 	return (ISC_R_SUCCESS);
738 }
739 
740 isc_result_t
isc_rwlock_lock(isc_rwlock_t * rwl,isc_rwlocktype_t type)741 isc_rwlock_lock(isc_rwlock_t *rwl, isc_rwlocktype_t type) {
742 	REQUIRE(VALID_RWLOCK(rwl));
743 
744 	if (type == isc_rwlocktype_read) {
745 		if (rwl->type != isc_rwlocktype_read && rwl->active != 0)
746 			return (ISC_R_LOCKBUSY);
747 		rwl->type = isc_rwlocktype_read;
748 		rwl->active++;
749 	} else {
750 		if (rwl->active != 0)
751 			return (ISC_R_LOCKBUSY);
752 		rwl->type = isc_rwlocktype_write;
753 		rwl->active = 1;
754 	}
755 	return (ISC_R_SUCCESS);
756 }
757 
758 isc_result_t
isc_rwlock_trylock(isc_rwlock_t * rwl,isc_rwlocktype_t type)759 isc_rwlock_trylock(isc_rwlock_t *rwl, isc_rwlocktype_t type) {
760 	return (isc_rwlock_lock(rwl, type));
761 }
762 
763 isc_result_t
isc_rwlock_tryupgrade(isc_rwlock_t * rwl)764 isc_rwlock_tryupgrade(isc_rwlock_t *rwl) {
765 	isc_result_t result = ISC_R_SUCCESS;
766 
767 	REQUIRE(VALID_RWLOCK(rwl));
768 	REQUIRE(rwl->type == isc_rwlocktype_read);
769 	REQUIRE(rwl->active != 0);
770 
771 	/* If we are the only reader then succeed. */
772 	if (rwl->active == 1)
773 		rwl->type = isc_rwlocktype_write;
774 	else
775 		result = ISC_R_LOCKBUSY;
776 	return (result);
777 }
778 
779 void
isc_rwlock_downgrade(isc_rwlock_t * rwl)780 isc_rwlock_downgrade(isc_rwlock_t *rwl) {
781 
782 	REQUIRE(VALID_RWLOCK(rwl));
783 	REQUIRE(rwl->type == isc_rwlocktype_write);
784 	REQUIRE(rwl->active == 1);
785 
786 	rwl->type = isc_rwlocktype_read;
787 }
788 
789 isc_result_t
isc_rwlock_unlock(isc_rwlock_t * rwl,isc_rwlocktype_t type)790 isc_rwlock_unlock(isc_rwlock_t *rwl, isc_rwlocktype_t type) {
791 	REQUIRE(VALID_RWLOCK(rwl));
792 	REQUIRE(rwl->type == type);
793 
794 	UNUSED(type);
795 
796 	INSIST(rwl->active > 0);
797 	rwl->active--;
798 
799 	return (ISC_R_SUCCESS);
800 }
801 
802 void
isc_rwlock_destroy(isc_rwlock_t * rwl)803 isc_rwlock_destroy(isc_rwlock_t *rwl) {
804 	REQUIRE(rwl != NULL);
805 	REQUIRE(rwl->active == 0);
806 	rwl->magic = 0;
807 }
808 
809 #endif /* ISC_PLATFORM_USETHREADS */
810