1 /* 2 * (C) 2001 Clemson University and The University of Chicago 3 * (C) 2011 Omnibond Systems 4 * 5 * Changes by Acxiom Corporation to implement generic service_operation() 6 * function, Copyright Acxiom Corporation, 2005. 7 * 8 * See COPYING in top-level directory. 9 */ 10 11 /* 12 * In-kernel waitqueue operations. 13 */ 14 15 #include "protocol.h" 16 #include "orangefs-kernel.h" 17 #include "orangefs-bufmap.h" 18 19 static int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *); 20 static int wait_for_matching_downcall(struct orangefs_kernel_op_s *); 21 22 /* 23 * What we do in this function is to walk the list of operations that are 24 * present in the request queue and mark them as purged. 25 * NOTE: This is called from the device close after client-core has 26 * guaranteed that no new operations could appear on the list since the 27 * client-core is anyway going to exit. 28 */ 29 void purge_waiting_ops(void) 30 { 31 struct orangefs_kernel_op_s *op; 32 33 spin_lock(&orangefs_request_list_lock); 34 list_for_each_entry(op, &orangefs_request_list, list) { 35 gossip_debug(GOSSIP_WAIT_DEBUG, 36 "pvfs2-client-core: purging op tag %llu %s\n", 37 llu(op->tag), 38 get_opname_string(op)); 39 spin_lock(&op->lock); 40 set_op_state_purged(op); 41 spin_unlock(&op->lock); 42 } 43 spin_unlock(&orangefs_request_list_lock); 44 } 45 46 static inline void 47 add_op_to_request_list(struct orangefs_kernel_op_s *op) 48 { 49 spin_lock(&orangefs_request_list_lock); 50 spin_lock(&op->lock); 51 set_op_state_waiting(op); 52 list_add_tail(&op->list, &orangefs_request_list); 53 spin_unlock(&orangefs_request_list_lock); 54 spin_unlock(&op->lock); 55 wake_up_interruptible(&orangefs_request_list_waitq); 56 } 57 58 static inline 59 void add_priority_op_to_request_list(struct orangefs_kernel_op_s *op) 60 { 61 spin_lock(&orangefs_request_list_lock); 62 spin_lock(&op->lock); 63 set_op_state_waiting(op); 64 65 list_add(&op->list, &orangefs_request_list); 66 spin_unlock(&orangefs_request_list_lock); 67 spin_unlock(&op->lock); 68 wake_up_interruptible(&orangefs_request_list_waitq); 69 } 70 71 /* 72 * submits a ORANGEFS operation and waits for it to complete 73 * 74 * Note op->downcall.status will contain the status of the operation (in 75 * errno format), whether provided by pvfs2-client or a result of failure to 76 * service the operation. If the caller wishes to distinguish, then 77 * op->state can be checked to see if it was serviced or not. 78 * 79 * Returns contents of op->downcall.status for convenience 80 */ 81 int service_operation(struct orangefs_kernel_op_s *op, 82 const char *op_name, 83 int flags) 84 { 85 /* flags to modify behavior */ 86 sigset_t orig_sigset; 87 int ret = 0; 88 89 DEFINE_WAIT(wait_entry); 90 91 op->upcall.tgid = current->tgid; 92 op->upcall.pid = current->pid; 93 94 retry_servicing: 95 op->downcall.status = 0; 96 gossip_debug(GOSSIP_WAIT_DEBUG, 97 "orangefs: service_operation: %s %p\n", 98 op_name, 99 op); 100 gossip_debug(GOSSIP_WAIT_DEBUG, 101 "orangefs: operation posted by process: %s, pid: %i\n", 102 current->comm, 103 current->pid); 104 105 /* mask out signals if this operation is not to be interrupted */ 106 if (!(flags & ORANGEFS_OP_INTERRUPTIBLE)) 107 orangefs_block_signals(&orig_sigset); 108 109 if (!(flags & ORANGEFS_OP_NO_SEMAPHORE)) { 110 ret = mutex_lock_interruptible(&request_mutex); 111 /* 112 * check to see if we were interrupted while waiting for 113 * semaphore 114 */ 115 if (ret < 0) { 116 if (!(flags & ORANGEFS_OP_INTERRUPTIBLE)) 117 orangefs_set_signals(&orig_sigset); 118 op->downcall.status = ret; 119 gossip_debug(GOSSIP_WAIT_DEBUG, 120 "orangefs: service_operation interrupted.\n"); 121 return ret; 122 } 123 } 124 125 gossip_debug(GOSSIP_WAIT_DEBUG, 126 "%s:About to call is_daemon_in_service().\n", 127 __func__); 128 129 if (is_daemon_in_service() < 0) { 130 /* 131 * By incrementing the per-operation attempt counter, we 132 * directly go into the timeout logic while waiting for 133 * the matching downcall to be read 134 */ 135 gossip_debug(GOSSIP_WAIT_DEBUG, 136 "%s:client core is NOT in service(%d).\n", 137 __func__, 138 is_daemon_in_service()); 139 op->attempts++; 140 } 141 142 /* queue up the operation */ 143 if (flags & ORANGEFS_OP_PRIORITY) { 144 add_priority_op_to_request_list(op); 145 } else { 146 gossip_debug(GOSSIP_WAIT_DEBUG, 147 "%s:About to call add_op_to_request_list().\n", 148 __func__); 149 add_op_to_request_list(op); 150 } 151 152 if (!(flags & ORANGEFS_OP_NO_SEMAPHORE)) 153 mutex_unlock(&request_mutex); 154 155 /* 156 * If we are asked to service an asynchronous operation from 157 * VFS perspective, we are done. 158 */ 159 if (flags & ORANGEFS_OP_ASYNC) 160 return 0; 161 162 if (flags & ORANGEFS_OP_CANCELLATION) { 163 gossip_debug(GOSSIP_WAIT_DEBUG, 164 "%s:" 165 "About to call wait_for_cancellation_downcall.\n", 166 __func__); 167 ret = wait_for_cancellation_downcall(op); 168 } else { 169 ret = wait_for_matching_downcall(op); 170 } 171 172 if (ret < 0) { 173 /* failed to get matching downcall */ 174 if (ret == -ETIMEDOUT) { 175 gossip_err("orangefs: %s -- wait timed out; aborting attempt.\n", 176 op_name); 177 } 178 op->downcall.status = ret; 179 } else { 180 /* got matching downcall; make sure status is in errno format */ 181 op->downcall.status = 182 orangefs_normalize_to_errno(op->downcall.status); 183 ret = op->downcall.status; 184 } 185 186 if (!(flags & ORANGEFS_OP_INTERRUPTIBLE)) 187 orangefs_set_signals(&orig_sigset); 188 189 BUG_ON(ret != op->downcall.status); 190 /* retry if operation has not been serviced and if requested */ 191 if (!op_state_serviced(op) && op->downcall.status == -EAGAIN) { 192 gossip_debug(GOSSIP_WAIT_DEBUG, 193 "orangefs: tag %llu (%s)" 194 " -- operation to be retried (%d attempt)\n", 195 llu(op->tag), 196 op_name, 197 op->attempts + 1); 198 199 if (!op->uses_shared_memory) 200 /* 201 * this operation doesn't use the shared memory 202 * system 203 */ 204 goto retry_servicing; 205 206 /* op uses shared memory */ 207 if (orangefs_get_bufmap_init() == 0) { 208 WARN_ON(1); 209 /* 210 * This operation uses the shared memory system AND 211 * the system is not yet ready. This situation occurs 212 * when the client-core is restarted AND there were 213 * operations waiting to be processed or were already 214 * in process. 215 */ 216 gossip_debug(GOSSIP_WAIT_DEBUG, 217 "uses_shared_memory is true.\n"); 218 gossip_debug(GOSSIP_WAIT_DEBUG, 219 "Client core in-service status(%d).\n", 220 is_daemon_in_service()); 221 gossip_debug(GOSSIP_WAIT_DEBUG, "bufmap_init:%d.\n", 222 orangefs_get_bufmap_init()); 223 gossip_debug(GOSSIP_WAIT_DEBUG, 224 "operation's status is 0x%0x.\n", 225 op->op_state); 226 227 /* 228 * let process sleep for a few seconds so shared 229 * memory system can be initialized. 230 */ 231 prepare_to_wait(&orangefs_bufmap_init_waitq, 232 &wait_entry, 233 TASK_INTERRUPTIBLE); 234 235 /* 236 * Wait for orangefs_bufmap_initialize() to wake me up 237 * within the allotted time. 238 */ 239 ret = schedule_timeout( 240 ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS * HZ); 241 242 gossip_debug(GOSSIP_WAIT_DEBUG, 243 "Value returned from schedule_timeout:" 244 "%d.\n", 245 ret); 246 gossip_debug(GOSSIP_WAIT_DEBUG, 247 "Is shared memory available? (%d).\n", 248 orangefs_get_bufmap_init()); 249 250 finish_wait(&orangefs_bufmap_init_waitq, &wait_entry); 251 252 if (orangefs_get_bufmap_init() == 0) { 253 gossip_err("%s:The shared memory system has not started in %d seconds after the client core restarted. Aborting user's request(%s).\n", 254 __func__, 255 ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS, 256 get_opname_string(op)); 257 return -EIO; 258 } 259 260 /* 261 * Return to the calling function and re-populate a 262 * shared memory buffer. 263 */ 264 return -EAGAIN; 265 } 266 } 267 268 gossip_debug(GOSSIP_WAIT_DEBUG, 269 "orangefs: service_operation %s returning: %d for %p.\n", 270 op_name, 271 ret, 272 op); 273 return ret; 274 } 275 276 static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op) 277 { 278 /* 279 * handle interrupted cases depending on what state we were in when 280 * the interruption is detected. there is a coarse grained lock 281 * across the operation. 282 * 283 * Called with op->lock held. 284 */ 285 op->op_state |= OP_VFS_STATE_GIVEN_UP; 286 287 if (op_state_waiting(op)) { 288 /* 289 * upcall hasn't been read; remove op from upcall request 290 * list. 291 */ 292 spin_unlock(&op->lock); 293 spin_lock(&orangefs_request_list_lock); 294 list_del(&op->list); 295 spin_unlock(&orangefs_request_list_lock); 296 gossip_debug(GOSSIP_WAIT_DEBUG, 297 "Interrupted: Removed op %p from request_list\n", 298 op); 299 } else if (op_state_in_progress(op)) { 300 /* op must be removed from the in progress htable */ 301 spin_unlock(&op->lock); 302 spin_lock(&htable_ops_in_progress_lock); 303 list_del(&op->list); 304 spin_unlock(&htable_ops_in_progress_lock); 305 gossip_debug(GOSSIP_WAIT_DEBUG, 306 "Interrupted: Removed op %p" 307 " from htable_ops_in_progress\n", 308 op); 309 } else if (!op_state_serviced(op)) { 310 spin_unlock(&op->lock); 311 gossip_err("interrupted operation is in a weird state 0x%x\n", 312 op->op_state); 313 } else { 314 /* 315 * It is not intended for execution to flow here, 316 * but having this unlock here makes sparse happy. 317 */ 318 gossip_err("%s: can't get here.\n", __func__); 319 spin_unlock(&op->lock); 320 } 321 } 322 323 /* 324 * sleeps on waitqueue waiting for matching downcall. 325 * if client-core finishes servicing, then we are good to go. 326 * else if client-core exits, we get woken up here, and retry with a timeout 327 * 328 * Post when this call returns to the caller, the specified op will no 329 * longer be on any list or htable. 330 * 331 * Returns 0 on success and -errno on failure 332 * Errors are: 333 * EAGAIN in case we want the caller to requeue and try again.. 334 * EINTR/EIO/ETIMEDOUT indicating we are done trying to service this 335 * operation since client-core seems to be exiting too often 336 * or if we were interrupted. 337 */ 338 static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op) 339 { 340 int ret = -EINVAL; 341 DEFINE_WAIT(wait_entry); 342 343 while (1) { 344 spin_lock(&op->lock); 345 prepare_to_wait(&op->waitq, &wait_entry, TASK_INTERRUPTIBLE); 346 if (op_state_serviced(op)) { 347 spin_unlock(&op->lock); 348 ret = 0; 349 break; 350 } 351 352 if (unlikely(signal_pending(current))) { 353 gossip_debug(GOSSIP_WAIT_DEBUG, 354 "*** %s:" 355 " operation interrupted by a signal (tag " 356 "%llu, op %p)\n", 357 __func__, 358 llu(op->tag), 359 op); 360 orangefs_clean_up_interrupted_operation(op); 361 ret = -EINTR; 362 break; 363 } 364 365 /* 366 * if this was our first attempt and client-core 367 * has not purged our operation, we are happy to 368 * simply wait 369 */ 370 if (op->attempts == 0 && !op_state_purged(op)) { 371 spin_unlock(&op->lock); 372 schedule(); 373 } else { 374 spin_unlock(&op->lock); 375 /* 376 * subsequent attempts, we retry exactly once 377 * with timeouts 378 */ 379 if (!schedule_timeout(op_timeout_secs * HZ)) { 380 gossip_debug(GOSSIP_WAIT_DEBUG, 381 "*** %s:" 382 " operation timed out (tag" 383 " %llu, %p, att %d)\n", 384 __func__, 385 llu(op->tag), 386 op, 387 op->attempts); 388 ret = -ETIMEDOUT; 389 spin_lock(&op->lock); 390 orangefs_clean_up_interrupted_operation(op); 391 break; 392 } 393 } 394 spin_lock(&op->lock); 395 op->attempts++; 396 /* 397 * if the operation was purged in the meantime, it 398 * is better to requeue it afresh but ensure that 399 * we have not been purged repeatedly. This could 400 * happen if client-core crashes when an op 401 * is being serviced, so we requeue the op, client 402 * core crashes again so we requeue the op, client 403 * core starts, and so on... 404 */ 405 if (op_state_purged(op)) { 406 ret = (op->attempts < ORANGEFS_PURGE_RETRY_COUNT) ? 407 -EAGAIN : 408 -EIO; 409 gossip_debug(GOSSIP_WAIT_DEBUG, 410 "*** %s:" 411 " operation purged (tag " 412 "%llu, %p, att %d)\n", 413 __func__, 414 llu(op->tag), 415 op, 416 op->attempts); 417 orangefs_clean_up_interrupted_operation(op); 418 break; 419 } 420 spin_unlock(&op->lock); 421 } 422 423 spin_lock(&op->lock); 424 finish_wait(&op->waitq, &wait_entry); 425 spin_unlock(&op->lock); 426 427 return ret; 428 } 429 430 /* 431 * similar to wait_for_matching_downcall(), but used in the special case 432 * of I/O cancellations. 433 * 434 * Note we need a special wait function because if this is called we already 435 * know that a signal is pending in current and need to service the 436 * cancellation upcall anyway. the only way to exit this is to either 437 * timeout or have the cancellation be serviced properly. 438 */ 439 static int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *op) 440 { 441 int ret = -EINVAL; 442 DEFINE_WAIT(wait_entry); 443 444 while (1) { 445 spin_lock(&op->lock); 446 prepare_to_wait(&op->waitq, &wait_entry, TASK_INTERRUPTIBLE); 447 if (op_state_serviced(op)) { 448 gossip_debug(GOSSIP_WAIT_DEBUG, 449 "%s:op-state is SERVICED.\n", 450 __func__); 451 spin_unlock(&op->lock); 452 ret = 0; 453 break; 454 } 455 456 if (signal_pending(current)) { 457 gossip_debug(GOSSIP_WAIT_DEBUG, 458 "%s:operation interrupted by a signal (tag" 459 " %llu, op %p)\n", 460 __func__, 461 llu(op->tag), 462 op); 463 orangefs_clean_up_interrupted_operation(op); 464 ret = -EINTR; 465 break; 466 } 467 468 gossip_debug(GOSSIP_WAIT_DEBUG, 469 "%s:About to call schedule_timeout.\n", 470 __func__); 471 spin_unlock(&op->lock); 472 ret = schedule_timeout(op_timeout_secs * HZ); 473 474 gossip_debug(GOSSIP_WAIT_DEBUG, 475 "%s:Value returned from schedule_timeout(%d).\n", 476 __func__, 477 ret); 478 if (!ret) { 479 gossip_debug(GOSSIP_WAIT_DEBUG, 480 "%s:*** operation timed out: %p\n", 481 __func__, 482 op); 483 spin_lock(&op->lock); 484 orangefs_clean_up_interrupted_operation(op); 485 ret = -ETIMEDOUT; 486 break; 487 } 488 489 gossip_debug(GOSSIP_WAIT_DEBUG, 490 "%s:Breaking out of loop, regardless of value returned by schedule_timeout.\n", 491 __func__); 492 ret = -ETIMEDOUT; 493 break; 494 } 495 496 spin_lock(&op->lock); 497 finish_wait(&op->waitq, &wait_entry); 498 spin_unlock(&op->lock); 499 500 gossip_debug(GOSSIP_WAIT_DEBUG, 501 "%s:returning ret(%d)\n", 502 __func__, 503 ret); 504 505 return ret; 506 } 507