fs/orangefs/waitqueue.c

   1 /*
   2  * (C) 2001 Clemson University and The University of Chicago
   3  * (C) 2011 Omnibond Systems
   4  *
   5  * Changes by Acxiom Corporation to implement generic service_operation()
   6  * function, Copyright Acxiom Corporation, 2005.
   7  *
   8  * See COPYING in top-level directory.
   9  */
  10
  11 /*
  12  *  In-kernel waitqueue operations.
  13  */
  14
  15 #include "protocol.h"
  16 #include "orangefs-kernel.h"
  17 #include "orangefs-bufmap.h"
  18
  19 /*
  20  * What we do in this function is to walk the list of operations that are
  21  * present in the request queue and mark them as purged.
  22  * NOTE: This is called from the device close after client-core has
  23  * guaranteed that no new operations could appear on the list since the
  24  * client-core is anyway going to exit.
  25  */
  26 void purge_waiting_ops(void)
  27 {
  28         struct orangefs_kernel_op_s *op;
  29
  30         spin_lock(&orangefs_request_list_lock);
  31         list_for_each_entry(op, &orangefs_request_list, list) {
  32                 gossip_debug(GOSSIP_WAIT_DEBUG,
  33                              "pvfs2-client-core: purging op tag %llu %s\n",
  34                              llu(op->tag),
  35                              get_opname_string(op));
  36                 spin_lock(&op->lock);
  37                 set_op_state_purged(op);
  38                 spin_unlock(&op->lock);
  39                 wake_up_interruptible(&op->waitq);
  40         }
  41         spin_unlock(&orangefs_request_list_lock);
  42 }
  43
  44 /*
  45  * submits a ORANGEFS operation and waits for it to complete
  46  *
  47  * Note op->downcall.status will contain the status of the operation (in
  48  * errno format), whether provided by pvfs2-client or a result of failure to
  49  * service the operation.  If the caller wishes to distinguish, then
  50  * op->state can be checked to see if it was serviced or not.
  51  *
  52  * Returns contents of op->downcall.status for convenience
  53  */
  54 int service_operation(struct orangefs_kernel_op_s *op,
  55                       const char *op_name,
  56                       int flags)
  57 {
  58         /* flags to modify behavior */
  59         sigset_t orig_sigset;
  60         int ret = 0;
  61
  62         /* irqflags and wait_entry are only used IF the client-core aborts */
  63         unsigned long irqflags;
  64
  65         DECLARE_WAITQUEUE(wait_entry, current);
  66
  67         op->upcall.tgid = current->tgid;
  68         op->upcall.pid = current->pid;
  69
  70 retry_servicing:
  71         op->downcall.status = 0;
  72         gossip_debug(GOSSIP_WAIT_DEBUG,
  73                      "orangefs: service_operation: %s %p\n",
  74                      op_name,
  75                      op);
  76         gossip_debug(GOSSIP_WAIT_DEBUG,
  77                      "orangefs: operation posted by process: %s, pid: %i\n",
  78                      current->comm,
  79                      current->pid);
  80
  81         /* mask out signals if this operation is not to be interrupted */
  82         if (!(flags & ORANGEFS_OP_INTERRUPTIBLE))
  83                 block_signals(&orig_sigset);
  84
  85         if (!(flags & ORANGEFS_OP_NO_SEMAPHORE)) {
  86                 ret = mutex_lock_interruptible(&request_mutex);
  87                 /*
  88                  * check to see if we were interrupted while waiting for
  89                  * semaphore
  90                  */
  91                 if (ret < 0) {
  92                         if (!(flags & ORANGEFS_OP_INTERRUPTIBLE))
  93                                 set_signals(&orig_sigset);
  94                         op->downcall.status = ret;
  95                         gossip_debug(GOSSIP_WAIT_DEBUG,
  96                                      "orangefs: service_operation interrupted.\n");
  97                         return ret;
  98                 }
  99         }
 100
 101         gossip_debug(GOSSIP_WAIT_DEBUG,
 102                      "%s:About to call is_daemon_in_service().\n",
 103                      __func__);
 104
 105         if (is_daemon_in_service() < 0) {
 106                 /*
 107                  * By incrementing the per-operation attempt counter, we
 108                  * directly go into the timeout logic while waiting for
 109                  * the matching downcall to be read
 110                  */
 111                 gossip_debug(GOSSIP_WAIT_DEBUG,
 112                              "%s:client core is NOT in service(%d).\n",
 113                              __func__,
 114                              is_daemon_in_service());
 115                 op->attempts++;
 116         }
 117
 118         /* queue up the operation */
 119         if (flags & ORANGEFS_OP_PRIORITY) {
 120                 add_priority_op_to_request_list(op);
 121         } else {
 122                 gossip_debug(GOSSIP_WAIT_DEBUG,
 123                              "%s:About to call add_op_to_request_list().\n",
 124                              __func__);
 125                 add_op_to_request_list(op);
 126         }
 127
 128         if (!(flags & ORANGEFS_OP_NO_SEMAPHORE))
 129                 mutex_unlock(&request_mutex);
 130
 131         /*
 132          * If we are asked to service an asynchronous operation from
 133          * VFS perspective, we are done.
 134          */
 135         if (flags & ORANGEFS_OP_ASYNC)
 136                 return 0;
 137
 138         if (flags & ORANGEFS_OP_CANCELLATION) {
 139                 gossip_debug(GOSSIP_WAIT_DEBUG,
 140                              "%s:"
 141                              "About to call wait_for_cancellation_downcall.\n",
 142                              __func__);
 143                 ret = wait_for_cancellation_downcall(op);
 144         } else {
 145                 ret = wait_for_matching_downcall(op);
 146         }
 147
 148         if (ret < 0) {
 149                 /* failed to get matching downcall */
 150                 if (ret == -ETIMEDOUT) {
 151                         gossip_err("orangefs: %s -- wait timed out; aborting attempt.\n",
 152                                    op_name);
 153                 }
 154                 op->downcall.status = ret;
 155         } else {
 156                 /* got matching downcall; make sure status is in errno format */
 157                 op->downcall.status =
 158                     orangefs_normalize_to_errno(op->downcall.status);
 159                 ret = op->downcall.status;
 160         }
 161
 162         if (!(flags & ORANGEFS_OP_INTERRUPTIBLE))
 163                 set_signals(&orig_sigset);
 164
 165         BUG_ON(ret != op->downcall.status);
 166         /* retry if operation has not been serviced and if requested */
 167         if (!op_state_serviced(op) && op->downcall.status == -EAGAIN) {
 168                 gossip_debug(GOSSIP_WAIT_DEBUG,
 169                              "orangefs: tag %llu (%s)"
 170                              " -- operation to be retried (%d attempt)\n",
 171                              llu(op->tag),
 172                              op_name,
 173                              op->attempts + 1);
 174
 175                 if (!op->uses_shared_memory)
 176                         /*
 177                          * this operation doesn't use the shared memory
 178                          * system
 179                          */
 180                         goto retry_servicing;
 181
 182                 /* op uses shared memory */
 183                 if (get_bufmap_init() == 0) {
 184                         /*
 185                          * This operation uses the shared memory system AND
 186                          * the system is not yet ready. This situation occurs
 187                          * when the client-core is restarted AND there were
 188                          * operations waiting to be processed or were already
 189                          * in process.
 190                          */
 191                         gossip_debug(GOSSIP_WAIT_DEBUG,
 192                                      "uses_shared_memory is true.\n");
 193                         gossip_debug(GOSSIP_WAIT_DEBUG,
 194                                      "Client core in-service status(%d).\n",
 195                                      is_daemon_in_service());
 196                         gossip_debug(GOSSIP_WAIT_DEBUG, "bufmap_init:%d.\n",
 197                                      get_bufmap_init());
 198                         gossip_debug(GOSSIP_WAIT_DEBUG,
 199                                      "operation's status is 0x%0x.\n",
 200                                      op->op_state);
 201
 202                         /*
 203                          * let process sleep for a few seconds so shared
 204                          * memory system can be initialized.
 205                          */
 206                         spin_lock_irqsave(&op->lock, irqflags);
 207                         add_wait_queue(&orangefs_bufmap_init_waitq, &wait_entry);
 208                         spin_unlock_irqrestore(&op->lock, irqflags);
 209
 210                         set_current_state(TASK_INTERRUPTIBLE);
 211
 212                         /*
 213                          * Wait for orangefs_bufmap_initialize() to wake me up
 214                          * within the allotted time.
 215                          */
 216                         ret = schedule_timeout(MSECS_TO_JIFFIES
 217                                 (1000 * ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS));
 218
 219                         gossip_debug(GOSSIP_WAIT_DEBUG,
 220                                      "Value returned from schedule_timeout:"
 221                                      "%d.\n",
 222                                      ret);
 223                         gossip_debug(GOSSIP_WAIT_DEBUG,
 224                                      "Is shared memory available? (%d).\n",
 225                                      get_bufmap_init());
 226
 227                         spin_lock_irqsave(&op->lock, irqflags);
 228                         remove_wait_queue(&orangefs_bufmap_init_waitq,
 229                                           &wait_entry);
 230                         spin_unlock_irqrestore(&op->lock, irqflags);
 231
 232                         if (get_bufmap_init() == 0) {
 233                                 gossip_err("%s:The shared memory system has not started in %d seconds after the client core restarted.  Aborting user's request(%s).\n",
 234                                            __func__,
 235                                            ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS,
 236                                            get_opname_string(op));
 237                                 return -EIO;
 238                         }
 239
 240                         /*
 241                          * Return to the calling function and re-populate a
 242                          * shared memory buffer.
 243                          */
 244                         return -EAGAIN;
 245                 }
 246         }
 247
 248         gossip_debug(GOSSIP_WAIT_DEBUG,
 249                      "orangefs: service_operation %s returning: %d for %p.\n",
 250                      op_name,
 251                      ret,
 252                      op);
 253         return ret;
 254 }
 255
 256 void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op)
 257 {
 258         /*
 259          * handle interrupted cases depending on what state we were in when
 260          * the interruption is detected.  there is a coarse grained lock
 261          * across the operation.
 262          *
 263          * NOTE: be sure not to reverse lock ordering by locking an op lock
 264          * while holding the request_list lock.  Here, we first lock the op
 265          * and then lock the appropriate list.
 266          */
 267         if (!op) {
 268                 gossip_debug(GOSSIP_WAIT_DEBUG,
 269                             "%s: op is null, ignoring\n",
 270                              __func__);
 271                 return;
 272         }
 273
 274         /*
 275          * one more sanity check, make sure it's in one of the possible states
 276          * or don't try to cancel it
 277          */
 278         if (!(op_state_waiting(op) ||
 279               op_state_in_progress(op) ||
 280               op_state_serviced(op) ||
 281               op_state_purged(op))) {
 282                 gossip_debug(GOSSIP_WAIT_DEBUG,
 283                              "%s: op %p not in a valid state (%0x), "
 284                              "ignoring\n",
 285                              __func__,
 286                              op,
 287                              op->op_state);
 288                 return;
 289         }
 290
 291         spin_lock(&op->lock);
 292
 293         if (op_state_waiting(op)) {
 294                 /*
 295                  * upcall hasn't been read; remove op from upcall request
 296                  * list.
 297                  */
 298                 spin_unlock(&op->lock);
 299                 remove_op_from_request_list(op);
 300                 gossip_debug(GOSSIP_WAIT_DEBUG,
 301                              "Interrupted: Removed op %p from request_list\n",
 302                              op);
 303         } else if (op_state_in_progress(op)) {
 304                 /* op must be removed from the in progress htable */
 305                 spin_unlock(&op->lock);
 306                 spin_lock(&htable_ops_in_progress_lock);
 307                 list_del(&op->list);
 308                 spin_unlock(&htable_ops_in_progress_lock);
 309                 gossip_debug(GOSSIP_WAIT_DEBUG,
 310                              "Interrupted: Removed op %p"
 311                              " from htable_ops_in_progress\n",
 312                              op);
 313         } else if (!op_state_serviced(op)) {
 314                 spin_unlock(&op->lock);
 315                 gossip_err("interrupted operation is in a weird state 0x%x\n",
 316                            op->op_state);
 317         } else {
 318                 /*
 319                  * It is not intended for execution to flow here,
 320                  * but having this unlock here makes sparse happy.
 321                  */
 322                 gossip_err("%s: can't get here.\n", __func__);
 323                 spin_unlock(&op->lock);
 324         }
 325 }
 326
 327 /*
 328  * sleeps on waitqueue waiting for matching downcall.
 329  * if client-core finishes servicing, then we are good to go.
 330  * else if client-core exits, we get woken up here, and retry with a timeout
 331  *
 332  * Post when this call returns to the caller, the specified op will no
 333  * longer be on any list or htable.
 334  *
 335  * Returns 0 on success and -errno on failure
 336  * Errors are:
 337  * EAGAIN in case we want the caller to requeue and try again..
 338  * EINTR/EIO/ETIMEDOUT indicating we are done trying to service this
 339  * operation since client-core seems to be exiting too often
 340  * or if we were interrupted.
 341  */
 342 int wait_for_matching_downcall(struct orangefs_kernel_op_s *op)
 343 {
 344         int ret = -EINVAL;
 345         DECLARE_WAITQUEUE(wait_entry, current);
 346
 347         spin_lock(&op->lock);
 348         add_wait_queue(&op->waitq, &wait_entry);
 349         spin_unlock(&op->lock);
 350
 351         while (1) {
 352                 set_current_state(TASK_INTERRUPTIBLE);
 353
 354                 spin_lock(&op->lock);
 355                 if (op_state_serviced(op)) {
 356                         spin_unlock(&op->lock);
 357                         ret = 0;
 358                         break;
 359                 }
 360                 spin_unlock(&op->lock);
 361
 362                 if (!signal_pending(current)) {
 363                         /*
 364                          * if this was our first attempt and client-core
 365                          * has not purged our operation, we are happy to
 366                          * simply wait
 367                          */
 368                         spin_lock(&op->lock);
 369                         if (op->attempts == 0 && !op_state_purged(op)) {
 370                                 spin_unlock(&op->lock);
 371                                 schedule();
 372                         } else {
 373                                 spin_unlock(&op->lock);
 374                                 /*
 375                                  * subsequent attempts, we retry exactly once
 376                                  * with timeouts
 377                                  */
 378                                 if (!schedule_timeout(MSECS_TO_JIFFIES
 379                                       (1000 * op_timeout_secs))) {
 380                                         gossip_debug(GOSSIP_WAIT_DEBUG,
 381                                                      "*** %s:"
 382                                                      " operation timed out (tag"
 383                                                      " %llu, %p, att %d)\n",
 384                                                      __func__,
 385                                                      llu(op->tag),
 386                                                      op,
 387                                                      op->attempts);
 388                                         ret = -ETIMEDOUT;
 389                                         orangefs_clean_up_interrupted_operation
 390                                             (op);
 391                                         break;
 392                                 }
 393                         }
 394                         spin_lock(&op->lock);
 395                         op->attempts++;
 396                         /*
 397                          * if the operation was purged in the meantime, it
 398                          * is better to requeue it afresh but ensure that
 399                          * we have not been purged repeatedly. This could
 400                          * happen if client-core crashes when an op
 401                          * is being serviced, so we requeue the op, client
 402                          * core crashes again so we requeue the op, client
 403                          * core starts, and so on...
 404                          */
 405                         if (op_state_purged(op)) {
 406                                 ret = (op->attempts < ORANGEFS_PURGE_RETRY_COUNT) ?
 407                                          -EAGAIN :
 408                                          -EIO;
 409                                 spin_unlock(&op->lock);
 410                                 gossip_debug(GOSSIP_WAIT_DEBUG,
 411                                              "*** %s:"
 412                                              " operation purged (tag "
 413                                              "%llu, %p, att %d)\n",
 414                                              __func__,
 415                                              llu(op->tag),
 416                                              op,
 417                                              op->attempts);
 418                                 orangefs_clean_up_interrupted_operation(op);
 419                                 break;
 420                         }
 421                         spin_unlock(&op->lock);
 422                         continue;
 423                 }
 424
 425                 gossip_debug(GOSSIP_WAIT_DEBUG,
 426                              "*** %s:"
 427                              " operation interrupted by a signal (tag "
 428                              "%llu, op %p)\n",
 429                              __func__,
 430                              llu(op->tag),
 431                              op);
 432                 orangefs_clean_up_interrupted_operation(op);
 433                 ret = -EINTR;
 434                 break;
 435         }
 436
 437         set_current_state(TASK_RUNNING);
 438
 439         spin_lock(&op->lock);
 440         remove_wait_queue(&op->waitq, &wait_entry);
 441         spin_unlock(&op->lock);
 442
 443         return ret;
 444 }
 445
 446 /*
 447  * similar to wait_for_matching_downcall(), but used in the special case
 448  * of I/O cancellations.
 449  *
 450  * Note we need a special wait function because if this is called we already
 451  *      know that a signal is pending in current and need to service the
 452  *      cancellation upcall anyway.  the only way to exit this is to either
 453  *      timeout or have the cancellation be serviced properly.
 454  */
 455 int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *op)
 456 {
 457         int ret = -EINVAL;
 458         DECLARE_WAITQUEUE(wait_entry, current);
 459
 460         spin_lock(&op->lock);
 461         add_wait_queue(&op->waitq, &wait_entry);
 462         spin_unlock(&op->lock);
 463
 464         while (1) {
 465                 set_current_state(TASK_INTERRUPTIBLE);
 466
 467                 spin_lock(&op->lock);
 468                 if (op_state_serviced(op)) {
 469                         gossip_debug(GOSSIP_WAIT_DEBUG,
 470                                      "%s:op-state is SERVICED.\n",
 471                                      __func__);
 472                         spin_unlock(&op->lock);
 473                         ret = 0;
 474                         break;
 475                 }
 476                 spin_unlock(&op->lock);
 477
 478                 if (signal_pending(current)) {
 479                         gossip_debug(GOSSIP_WAIT_DEBUG,
 480                                      "%s:operation interrupted by a signal (tag"
 481                                      " %llu, op %p)\n",
 482                                      __func__,
 483                                      llu(op->tag),
 484                                      op);
 485                         orangefs_clean_up_interrupted_operation(op);
 486                         ret = -EINTR;
 487                         break;
 488                 }
 489
 490                 gossip_debug(GOSSIP_WAIT_DEBUG,
 491                              "%s:About to call schedule_timeout.\n",
 492                              __func__);
 493                 ret =
 494                     schedule_timeout(MSECS_TO_JIFFIES(1000 * op_timeout_secs));
 495
 496                 gossip_debug(GOSSIP_WAIT_DEBUG,
 497                              "%s:Value returned from schedule_timeout(%d).\n",
 498                              __func__,
 499                              ret);
 500                 if (!ret) {
 501                         gossip_debug(GOSSIP_WAIT_DEBUG,
 502                                      "%s:*** operation timed out: %p\n",
 503                                      __func__,
 504                                      op);
 505                         orangefs_clean_up_interrupted_operation(op);
 506                         ret = -ETIMEDOUT;
 507                         break;
 508                 }
 509
 510                 gossip_debug(GOSSIP_WAIT_DEBUG,
 511                              "%s:Breaking out of loop, regardless of value returned by schedule_timeout.\n",
 512                              __func__);
 513                 ret = -ETIMEDOUT;
 514                 break;
 515         }
 516
 517         set_current_state(TASK_RUNNING);
 518
 519         spin_lock(&op->lock);
 520         remove_wait_queue(&op->waitq, &wait_entry);
 521         spin_unlock(&op->lock);
 522
 523         gossip_debug(GOSSIP_WAIT_DEBUG,
 524                      "%s:returning ret(%d)\n",
 525                      __func__,
 526                      ret);
 527
 528         return ret;
 529 }