xref: /linux-6.15/fs/orangefs/waitqueue.c (revision 6ebcc3fc)
1 /*
2  * (C) 2001 Clemson University and The University of Chicago
3  * (C) 2011 Omnibond Systems
4  *
5  * Changes by Acxiom Corporation to implement generic service_operation()
6  * function, Copyright Acxiom Corporation, 2005.
7  *
8  * See COPYING in top-level directory.
9  */
10 
11 /*
12  *  In-kernel waitqueue operations.
13  */
14 
15 #include "protocol.h"
16 #include "orangefs-kernel.h"
17 #include "orangefs-bufmap.h"
18 
19 static int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *);
20 static int wait_for_matching_downcall(struct orangefs_kernel_op_s *);
21 
22 /*
23  * What we do in this function is to walk the list of operations that are
24  * present in the request queue and mark them as purged.
25  * NOTE: This is called from the device close after client-core has
26  * guaranteed that no new operations could appear on the list since the
27  * client-core is anyway going to exit.
28  */
29 void purge_waiting_ops(void)
30 {
31 	struct orangefs_kernel_op_s *op;
32 
33 	spin_lock(&orangefs_request_list_lock);
34 	list_for_each_entry(op, &orangefs_request_list, list) {
35 		gossip_debug(GOSSIP_WAIT_DEBUG,
36 			     "pvfs2-client-core: purging op tag %llu %s\n",
37 			     llu(op->tag),
38 			     get_opname_string(op));
39 		spin_lock(&op->lock);
40 		set_op_state_purged(op);
41 		spin_unlock(&op->lock);
42 	}
43 	spin_unlock(&orangefs_request_list_lock);
44 }
45 
46 static inline void
47 add_op_to_request_list(struct orangefs_kernel_op_s *op)
48 {
49 	spin_lock(&orangefs_request_list_lock);
50 	spin_lock(&op->lock);
51 	set_op_state_waiting(op);
52 	list_add_tail(&op->list, &orangefs_request_list);
53 	spin_unlock(&orangefs_request_list_lock);
54 	spin_unlock(&op->lock);
55 	wake_up_interruptible(&orangefs_request_list_waitq);
56 }
57 
58 static inline
59 void add_priority_op_to_request_list(struct orangefs_kernel_op_s *op)
60 {
61 	spin_lock(&orangefs_request_list_lock);
62 	spin_lock(&op->lock);
63 	set_op_state_waiting(op);
64 
65 	list_add(&op->list, &orangefs_request_list);
66 	spin_unlock(&orangefs_request_list_lock);
67 	spin_unlock(&op->lock);
68 	wake_up_interruptible(&orangefs_request_list_waitq);
69 }
70 
71 /*
72  * submits a ORANGEFS operation and waits for it to complete
73  *
74  * Note op->downcall.status will contain the status of the operation (in
75  * errno format), whether provided by pvfs2-client or a result of failure to
76  * service the operation.  If the caller wishes to distinguish, then
77  * op->state can be checked to see if it was serviced or not.
78  *
79  * Returns contents of op->downcall.status for convenience
80  */
81 int service_operation(struct orangefs_kernel_op_s *op,
82 		      const char *op_name,
83 		      int flags)
84 {
85 	/* flags to modify behavior */
86 	sigset_t orig_sigset;
87 	int ret = 0;
88 
89 	DEFINE_WAIT(wait_entry);
90 
91 	op->upcall.tgid = current->tgid;
92 	op->upcall.pid = current->pid;
93 
94 retry_servicing:
95 	op->downcall.status = 0;
96 	gossip_debug(GOSSIP_WAIT_DEBUG,
97 		     "orangefs: service_operation: %s %p\n",
98 		     op_name,
99 		     op);
100 	gossip_debug(GOSSIP_WAIT_DEBUG,
101 		     "orangefs: operation posted by process: %s, pid: %i\n",
102 		     current->comm,
103 		     current->pid);
104 
105 	/* mask out signals if this operation is not to be interrupted */
106 	if (!(flags & ORANGEFS_OP_INTERRUPTIBLE))
107 		orangefs_block_signals(&orig_sigset);
108 
109 	if (!(flags & ORANGEFS_OP_NO_SEMAPHORE)) {
110 		ret = mutex_lock_interruptible(&request_mutex);
111 		/*
112 		 * check to see if we were interrupted while waiting for
113 		 * semaphore
114 		 */
115 		if (ret < 0) {
116 			if (!(flags & ORANGEFS_OP_INTERRUPTIBLE))
117 				orangefs_set_signals(&orig_sigset);
118 			op->downcall.status = ret;
119 			gossip_debug(GOSSIP_WAIT_DEBUG,
120 				     "orangefs: service_operation interrupted.\n");
121 			return ret;
122 		}
123 	}
124 
125 	gossip_debug(GOSSIP_WAIT_DEBUG,
126 		     "%s:About to call is_daemon_in_service().\n",
127 		     __func__);
128 
129 	if (is_daemon_in_service() < 0) {
130 		/*
131 		 * By incrementing the per-operation attempt counter, we
132 		 * directly go into the timeout logic while waiting for
133 		 * the matching downcall to be read
134 		 */
135 		gossip_debug(GOSSIP_WAIT_DEBUG,
136 			     "%s:client core is NOT in service(%d).\n",
137 			     __func__,
138 			     is_daemon_in_service());
139 		op->attempts++;
140 	}
141 
142 	/* queue up the operation */
143 	if (flags & ORANGEFS_OP_PRIORITY) {
144 		add_priority_op_to_request_list(op);
145 	} else {
146 		gossip_debug(GOSSIP_WAIT_DEBUG,
147 			     "%s:About to call add_op_to_request_list().\n",
148 			     __func__);
149 		add_op_to_request_list(op);
150 	}
151 
152 	if (!(flags & ORANGEFS_OP_NO_SEMAPHORE))
153 		mutex_unlock(&request_mutex);
154 
155 	/*
156 	 * If we are asked to service an asynchronous operation from
157 	 * VFS perspective, we are done.
158 	 */
159 	if (flags & ORANGEFS_OP_ASYNC)
160 		return 0;
161 
162 	if (flags & ORANGEFS_OP_CANCELLATION) {
163 		gossip_debug(GOSSIP_WAIT_DEBUG,
164 			     "%s:"
165 			     "About to call wait_for_cancellation_downcall.\n",
166 			     __func__);
167 		ret = wait_for_cancellation_downcall(op);
168 	} else {
169 		ret = wait_for_matching_downcall(op);
170 	}
171 
172 	if (ret < 0) {
173 		/* failed to get matching downcall */
174 		if (ret == -ETIMEDOUT) {
175 			gossip_err("orangefs: %s -- wait timed out; aborting attempt.\n",
176 				   op_name);
177 		}
178 		op->downcall.status = ret;
179 	} else {
180 		/* got matching downcall; make sure status is in errno format */
181 		op->downcall.status =
182 		    orangefs_normalize_to_errno(op->downcall.status);
183 		ret = op->downcall.status;
184 	}
185 
186 	if (!(flags & ORANGEFS_OP_INTERRUPTIBLE))
187 		orangefs_set_signals(&orig_sigset);
188 
189 	BUG_ON(ret != op->downcall.status);
190 	/* retry if operation has not been serviced and if requested */
191 	if (!op_state_serviced(op) && op->downcall.status == -EAGAIN) {
192 		gossip_debug(GOSSIP_WAIT_DEBUG,
193 			     "orangefs: tag %llu (%s)"
194 			     " -- operation to be retried (%d attempt)\n",
195 			     llu(op->tag),
196 			     op_name,
197 			     op->attempts + 1);
198 
199 		if (!op->uses_shared_memory)
200 			/*
201 			 * this operation doesn't use the shared memory
202 			 * system
203 			 */
204 			goto retry_servicing;
205 
206 		/* op uses shared memory */
207 		if (orangefs_get_bufmap_init() == 0) {
208 			WARN_ON(1);
209 			/*
210 			 * This operation uses the shared memory system AND
211 			 * the system is not yet ready. This situation occurs
212 			 * when the client-core is restarted AND there were
213 			 * operations waiting to be processed or were already
214 			 * in process.
215 			 */
216 			gossip_debug(GOSSIP_WAIT_DEBUG,
217 				     "uses_shared_memory is true.\n");
218 			gossip_debug(GOSSIP_WAIT_DEBUG,
219 				     "Client core in-service status(%d).\n",
220 				     is_daemon_in_service());
221 			gossip_debug(GOSSIP_WAIT_DEBUG, "bufmap_init:%d.\n",
222 				     orangefs_get_bufmap_init());
223 			gossip_debug(GOSSIP_WAIT_DEBUG,
224 				     "operation's status is 0x%0x.\n",
225 				     op->op_state);
226 
227 			/*
228 			 * let process sleep for a few seconds so shared
229 			 * memory system can be initialized.
230 			 */
231 			prepare_to_wait(&orangefs_bufmap_init_waitq,
232 					&wait_entry,
233 					TASK_INTERRUPTIBLE);
234 
235 			/*
236 			 * Wait for orangefs_bufmap_initialize() to wake me up
237 			 * within the allotted time.
238 			 */
239 			ret = schedule_timeout(
240 				ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS * HZ);
241 
242 			gossip_debug(GOSSIP_WAIT_DEBUG,
243 				     "Value returned from schedule_timeout:"
244 				     "%d.\n",
245 				     ret);
246 			gossip_debug(GOSSIP_WAIT_DEBUG,
247 				     "Is shared memory available? (%d).\n",
248 				     orangefs_get_bufmap_init());
249 
250 			finish_wait(&orangefs_bufmap_init_waitq, &wait_entry);
251 
252 			if (orangefs_get_bufmap_init() == 0) {
253 				gossip_err("%s:The shared memory system has not started in %d seconds after the client core restarted.  Aborting user's request(%s).\n",
254 					   __func__,
255 					   ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS,
256 					   get_opname_string(op));
257 				return -EIO;
258 			}
259 
260 			/*
261 			 * Return to the calling function and re-populate a
262 			 * shared memory buffer.
263 			 */
264 			return -EAGAIN;
265 		}
266 	}
267 
268 	gossip_debug(GOSSIP_WAIT_DEBUG,
269 		     "orangefs: service_operation %s returning: %d for %p.\n",
270 		     op_name,
271 		     ret,
272 		     op);
273 	return ret;
274 }
275 
276 static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op)
277 {
278 	/*
279 	 * handle interrupted cases depending on what state we were in when
280 	 * the interruption is detected.  there is a coarse grained lock
281 	 * across the operation.
282 	 *
283 	 * Called with op->lock held.
284 	 */
285 	op->op_state |= OP_VFS_STATE_GIVEN_UP;
286 
287 	if (op_state_waiting(op)) {
288 		/*
289 		 * upcall hasn't been read; remove op from upcall request
290 		 * list.
291 		 */
292 		spin_unlock(&op->lock);
293 		spin_lock(&orangefs_request_list_lock);
294 		list_del(&op->list);
295 		spin_unlock(&orangefs_request_list_lock);
296 		gossip_debug(GOSSIP_WAIT_DEBUG,
297 			     "Interrupted: Removed op %p from request_list\n",
298 			     op);
299 	} else if (op_state_in_progress(op)) {
300 		/* op must be removed from the in progress htable */
301 		spin_unlock(&op->lock);
302 		spin_lock(&htable_ops_in_progress_lock);
303 		list_del(&op->list);
304 		spin_unlock(&htable_ops_in_progress_lock);
305 		gossip_debug(GOSSIP_WAIT_DEBUG,
306 			     "Interrupted: Removed op %p"
307 			     " from htable_ops_in_progress\n",
308 			     op);
309 	} else if (!op_state_serviced(op)) {
310 		spin_unlock(&op->lock);
311 		gossip_err("interrupted operation is in a weird state 0x%x\n",
312 			   op->op_state);
313 	} else {
314 		/*
315 		 * It is not intended for execution to flow here,
316 		 * but having this unlock here makes sparse happy.
317 		 */
318 		gossip_err("%s: can't get here.\n", __func__);
319 		spin_unlock(&op->lock);
320 	}
321 }
322 
323 /*
324  * sleeps on waitqueue waiting for matching downcall.
325  * if client-core finishes servicing, then we are good to go.
326  * else if client-core exits, we get woken up here, and retry with a timeout
327  *
328  * Post when this call returns to the caller, the specified op will no
329  * longer be on any list or htable.
330  *
331  * Returns 0 on success and -errno on failure
332  * Errors are:
333  * EAGAIN in case we want the caller to requeue and try again..
334  * EINTR/EIO/ETIMEDOUT indicating we are done trying to service this
335  * operation since client-core seems to be exiting too often
336  * or if we were interrupted.
337  */
338 static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op)
339 {
340 	int ret = -EINVAL;
341 	DEFINE_WAIT(wait_entry);
342 
343 	while (1) {
344 		spin_lock(&op->lock);
345 		prepare_to_wait(&op->waitq, &wait_entry, TASK_INTERRUPTIBLE);
346 		if (op_state_serviced(op)) {
347 			spin_unlock(&op->lock);
348 			ret = 0;
349 			break;
350 		}
351 
352 		if (unlikely(signal_pending(current))) {
353 			gossip_debug(GOSSIP_WAIT_DEBUG,
354 				     "*** %s:"
355 				     " operation interrupted by a signal (tag "
356 				     "%llu, op %p)\n",
357 				     __func__,
358 				     llu(op->tag),
359 				     op);
360 			orangefs_clean_up_interrupted_operation(op);
361 			ret = -EINTR;
362 			break;
363 		}
364 
365 		/*
366 		 * if this was our first attempt and client-core
367 		 * has not purged our operation, we are happy to
368 		 * simply wait
369 		 */
370 		if (op->attempts == 0 && !op_state_purged(op)) {
371 			spin_unlock(&op->lock);
372 			schedule();
373 		} else {
374 			spin_unlock(&op->lock);
375 			/*
376 			 * subsequent attempts, we retry exactly once
377 			 * with timeouts
378 			 */
379 			if (!schedule_timeout(op_timeout_secs * HZ)) {
380 				gossip_debug(GOSSIP_WAIT_DEBUG,
381 					     "*** %s:"
382 					     " operation timed out (tag"
383 					     " %llu, %p, att %d)\n",
384 					     __func__,
385 					     llu(op->tag),
386 					     op,
387 					     op->attempts);
388 				ret = -ETIMEDOUT;
389 				spin_lock(&op->lock);
390 				orangefs_clean_up_interrupted_operation(op);
391 				break;
392 			}
393 		}
394 		spin_lock(&op->lock);
395 		op->attempts++;
396 		/*
397 		 * if the operation was purged in the meantime, it
398 		 * is better to requeue it afresh but ensure that
399 		 * we have not been purged repeatedly. This could
400 		 * happen if client-core crashes when an op
401 		 * is being serviced, so we requeue the op, client
402 		 * core crashes again so we requeue the op, client
403 		 * core starts, and so on...
404 		 */
405 		if (op_state_purged(op)) {
406 			ret = (op->attempts < ORANGEFS_PURGE_RETRY_COUNT) ?
407 				 -EAGAIN :
408 				 -EIO;
409 			gossip_debug(GOSSIP_WAIT_DEBUG,
410 				     "*** %s:"
411 				     " operation purged (tag "
412 				     "%llu, %p, att %d)\n",
413 				     __func__,
414 				     llu(op->tag),
415 				     op,
416 				     op->attempts);
417 			orangefs_clean_up_interrupted_operation(op);
418 			break;
419 		}
420 		spin_unlock(&op->lock);
421 	}
422 
423 	spin_lock(&op->lock);
424 	finish_wait(&op->waitq, &wait_entry);
425 	spin_unlock(&op->lock);
426 
427 	return ret;
428 }
429 
430 /*
431  * similar to wait_for_matching_downcall(), but used in the special case
432  * of I/O cancellations.
433  *
434  * Note we need a special wait function because if this is called we already
435  *      know that a signal is pending in current and need to service the
436  *      cancellation upcall anyway.  the only way to exit this is to either
437  *      timeout or have the cancellation be serviced properly.
438  */
439 static int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *op)
440 {
441 	int ret = -EINVAL;
442 	DEFINE_WAIT(wait_entry);
443 
444 	while (1) {
445 		spin_lock(&op->lock);
446 		prepare_to_wait(&op->waitq, &wait_entry, TASK_INTERRUPTIBLE);
447 		if (op_state_serviced(op)) {
448 			gossip_debug(GOSSIP_WAIT_DEBUG,
449 				     "%s:op-state is SERVICED.\n",
450 				     __func__);
451 			spin_unlock(&op->lock);
452 			ret = 0;
453 			break;
454 		}
455 
456 		if (signal_pending(current)) {
457 			gossip_debug(GOSSIP_WAIT_DEBUG,
458 				     "%s:operation interrupted by a signal (tag"
459 				     " %llu, op %p)\n",
460 				     __func__,
461 				     llu(op->tag),
462 				     op);
463 			orangefs_clean_up_interrupted_operation(op);
464 			ret = -EINTR;
465 			break;
466 		}
467 
468 		gossip_debug(GOSSIP_WAIT_DEBUG,
469 			     "%s:About to call schedule_timeout.\n",
470 			     __func__);
471 		spin_unlock(&op->lock);
472 		ret = schedule_timeout(op_timeout_secs * HZ);
473 
474 		gossip_debug(GOSSIP_WAIT_DEBUG,
475 			     "%s:Value returned from schedule_timeout(%d).\n",
476 			     __func__,
477 			     ret);
478 		if (!ret) {
479 			gossip_debug(GOSSIP_WAIT_DEBUG,
480 				     "%s:*** operation timed out: %p\n",
481 				     __func__,
482 				     op);
483 			spin_lock(&op->lock);
484 			orangefs_clean_up_interrupted_operation(op);
485 			ret = -ETIMEDOUT;
486 			break;
487 		}
488 
489 		gossip_debug(GOSSIP_WAIT_DEBUG,
490 			     "%s:Breaking out of loop, regardless of value returned by schedule_timeout.\n",
491 			     __func__);
492 		ret = -ETIMEDOUT;
493 		break;
494 	}
495 
496 	spin_lock(&op->lock);
497 	finish_wait(&op->waitq, &wait_entry);
498 	spin_unlock(&op->lock);
499 
500 	gossip_debug(GOSSIP_WAIT_DEBUG,
501 		     "%s:returning ret(%d)\n",
502 		     __func__,
503 		     ret);
504 
505 	return ret;
506 }
507