Merge branch 'for-2.6.37' of git://linux-nfs.org/~bfields/linux

[deliverable/linux.git] / net / sunrpc / cache.c
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c

index da872f9fe1e06e9f81de17019591ea0b373401e2..e433e7580e27b221eff94d8b95ceaea703440618 100644 (file)
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -28,16 +28,16 @@
  #include <linux/workqueue.h>
  #include <linux/mutex.h>
  #include <linux/pagemap.h>
-#include <linux/smp_lock.h>
  #include <asm/ioctls.h>
  #include <linux/sunrpc/types.h>
  #include <linux/sunrpc/cache.h>
  #include <linux/sunrpc/stats.h>
  #include <linux/sunrpc/rpc_pipe_fs.h>
+#include "netns.h"
  
  #define         RPCDBG_FACILITY RPCDBG_CACHE
  
-static int cache_defer_req(struct cache_req *req, struct cache_head *item);
+static void cache_defer_req(struct cache_req *req, struct cache_head *item);
  static void cache_revisit_request(struct cache_head *item);
  
  static void cache_init(struct cache_head *h)
@@ -268,7 +268,8 @@ int cache_check(struct cache_detail *detail,
         }
  
         if (rv == -EAGAIN) {
-               if (cache_defer_req(rqstp, h) < 0) {
+               cache_defer_req(rqstp, h);
+               if (!test_bit(CACHE_PENDING, &h->flags)) {
                         /* Request is not deferred */
                         rv = cache_is_valid(detail, h);
                         if (rv == -EAGAIN)
@@ -506,29 +507,30 @@ EXPORT_SYMBOL_GPL(cache_purge);
  
  static DEFINE_SPINLOCK(cache_defer_lock);
  static LIST_HEAD(cache_defer_list);
-static struct list_head cache_defer_hash[DFR_HASHSIZE];
+static struct hlist_head cache_defer_hash[DFR_HASHSIZE];
  static int cache_defer_cnt;
  
  static void __unhash_deferred_req(struct cache_deferred_req *dreq)
  {
-       list_del_init(&dreq->recent);
-       list_del_init(&dreq->hash);
-       cache_defer_cnt--;
+       hlist_del_init(&dreq->hash);
+       if (!list_empty(&dreq->recent)) {
+               list_del_init(&dreq->recent);
+               cache_defer_cnt--;
+       }
  }
  
  static void __hash_deferred_req(struct cache_deferred_req *dreq, struct cache_head *item)
  {
         int hash = DFR_HASH(item);
  
-       list_add(&dreq->recent, &cache_defer_list);
-       if (cache_defer_hash[hash].next == NULL)
-               INIT_LIST_HEAD(&cache_defer_hash[hash]);
-       list_add(&dreq->hash, &cache_defer_hash[hash]);
+       INIT_LIST_HEAD(&dreq->recent);
+       hlist_add_head(&dreq->hash, &cache_defer_hash[hash]);
  }
  
-static int setup_deferral(struct cache_deferred_req *dreq, struct cache_head *item)
+static void setup_deferral(struct cache_deferred_req *dreq,
+                          struct cache_head *item,
+                          int count_me)
  {
-       struct cache_deferred_req *discard;
  
         dreq->item = item;
  
@@ -536,25 +538,13 @@ static int setup_deferral(struct cache_deferred_req *dreq, struct cache_head *it
  
         __hash_deferred_req(dreq, item);
  
-       /* it is in, now maybe clean up */
-       discard = NULL;
-       if (++cache_defer_cnt > DFR_MAX) {
-               discard = list_entry(cache_defer_list.prev,
-                                    struct cache_deferred_req, recent);
-               __unhash_deferred_req(discard);
+       if (count_me) {
+               cache_defer_cnt++;
+               list_add(&dreq->recent, &cache_defer_list);
         }
-       spin_unlock(&cache_defer_lock);
  
-       if (discard)
-               /* there was one too many */
-               discard->revisit(discard, 1);
+       spin_unlock(&cache_defer_lock);
  
-       if (!test_bit(CACHE_PENDING, &item->flags)) {
-               /* must have just been validated... */
-               cache_revisit_request(item);
-               return -EAGAIN;
-       }
-       return 0;
  }
  
  struct thread_deferred_req {
@@ -569,26 +559,24 @@ static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many)
         complete(&dr->completion);
  }
  
-static int cache_wait_req(struct cache_req *req, struct cache_head *item)
+static void cache_wait_req(struct cache_req *req, struct cache_head *item)
  {
         struct thread_deferred_req sleeper;
         struct cache_deferred_req *dreq = &sleeper.handle;
-       int ret;
  
         sleeper.completion = COMPLETION_INITIALIZER_ONSTACK(sleeper.completion);
         dreq->revisit = cache_restart_thread;
  
-       ret = setup_deferral(dreq, item);
-       if (ret)
-               return ret;
+       setup_deferral(dreq, item, 0);
  
-       if (wait_for_completion_interruptible_timeout(
+       if (!test_bit(CACHE_PENDING, &item->flags) ||
+           wait_for_completion_interruptible_timeout(
                     &sleeper.completion, req->thread_wait) <= 0) {
                 /* The completion wasn't completed, so we need
                  * to clean up
                  */
                 spin_lock(&cache_defer_lock);
-               if (!list_empty(&sleeper.handle.hash)) {
+               if (!hlist_unhashed(&sleeper.handle.hash)) {
                         __unhash_deferred_req(&sleeper.handle);
                         spin_unlock(&cache_defer_lock);
                 } else {
@@ -601,65 +589,73 @@ static int cache_wait_req(struct cache_req *req, struct cache_head *item)
                         wait_for_completion(&sleeper.completion);
                 }
         }
-       if (test_bit(CACHE_PENDING, &item->flags)) {
-               /* item is still pending, try request
-                * deferral
-                */
-               return -ETIMEDOUT;
-       }
-       /* only return success if we actually deferred the
-        * request.  In this case we waited until it was
-        * answered so no deferral has happened - rather
-        * an answer already exists.
+}
+
+static void cache_limit_defers(void)
+{
+       /* Make sure we haven't exceed the limit of allowed deferred
+        * requests.
          */
-       return -EEXIST;
+       struct cache_deferred_req *discard = NULL;
+
+       if (cache_defer_cnt <= DFR_MAX)
+               return;
+
+       spin_lock(&cache_defer_lock);
+
+       /* Consider removing either the first or the last */
+       if (cache_defer_cnt > DFR_MAX) {
+               if (net_random() & 1)
+                       discard = list_entry(cache_defer_list.next,
+                                            struct cache_deferred_req, recent);
+               else
+                       discard = list_entry(cache_defer_list.prev,
+                                            struct cache_deferred_req, recent);
+               __unhash_deferred_req(discard);
+       }
+       spin_unlock(&cache_defer_lock);
+       if (discard)
+               discard->revisit(discard, 1);
  }
  
-static int cache_defer_req(struct cache_req *req, struct cache_head *item)
+static void cache_defer_req(struct cache_req *req, struct cache_head *item)
  {
         struct cache_deferred_req *dreq;
-       int ret;
  
-       if (cache_defer_cnt >= DFR_MAX) {
-               /* too much in the cache, randomly drop this one,
-                * or continue and drop the oldest
-                */
-               if (net_random()&1)
-                       return -ENOMEM;
-       }
         if (req->thread_wait) {
-               ret = cache_wait_req(req, item);
-               if (ret != -ETIMEDOUT)
-                       return ret;
+               cache_wait_req(req, item);
+               if (!test_bit(CACHE_PENDING, &item->flags))
+                       return;
         }
         dreq = req->defer(req);
         if (dreq == NULL)
-               return -ENOMEM;
-       return setup_deferral(dreq, item);
+               return;
+       setup_deferral(dreq, item, 1);
+       if (!test_bit(CACHE_PENDING, &item->flags))
+               /* Bit could have been cleared before we managed to
+                * set up the deferral, so need to revisit just in case
+                */
+               cache_revisit_request(item);
+
+       cache_limit_defers();
  }
  
  static void cache_revisit_request(struct cache_head *item)
  {
         struct cache_deferred_req *dreq;
         struct list_head pending;
-
-       struct list_head *lp;
+       struct hlist_node *lp, *tmp;
         int hash = DFR_HASH(item);
  
         INIT_LIST_HEAD(&pending);
         spin_lock(&cache_defer_lock);
  
-       lp = cache_defer_hash[hash].next;
-       if (lp) {
-               while (lp != &cache_defer_hash[hash]) {
-                       dreq = list_entry(lp, struct cache_deferred_req, hash);
-                       lp = lp->next;
-                       if (dreq->item == item) {
-                               __unhash_deferred_req(dreq);
-                               list_add(&dreq->recent, &pending);
-                       }
+       hlist_for_each_entry_safe(dreq, lp, tmp, &cache_defer_hash[hash], hash)
+               if (dreq->item == item) {
+                       __unhash_deferred_req(dreq);
+                       list_add(&dreq->recent, &pending);
                 }
-       }
+
         spin_unlock(&cache_defer_lock);
  
         while (!list_empty(&pending)) {
@@ -679,8 +675,10 @@ void cache_clean_deferred(void *owner)
         spin_lock(&cache_defer_lock);
  
         list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) {
-               if (dreq->owner == owner)
+               if (dreq->owner == owner) {
                         __unhash_deferred_req(dreq);
+                       list_add(&dreq->recent, &pending);
+               }
         }
         spin_unlock(&cache_defer_lock);
  
@@ -1091,6 +1089,23 @@ static void warn_no_listener(struct cache_detail *detail)
         }
  }
  
+static bool cache_listeners_exist(struct cache_detail *detail)
+{
+       if (atomic_read(&detail->readers))
+               return true;
+       if (detail->last_close == 0)
+               /* This cache was never opened */
+               return false;
+       if (detail->last_close < seconds_since_boot() - 30)
+               /*
+                * We allow for the possibility that someone might
+                * restart a userspace daemon without restarting the
+                * server; but after 30 seconds, we give up.
+                */
+                return false;
+       return true;
+}
+
  /*
   * register an upcall request to user-space and queue it up for read() by the
   * upcall daemon.
@@ -1109,10 +1124,9 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h,
         char *bp;
         int len;
  
-       if (atomic_read(&detail->readers) == 0 &&
-           detail->last_close < seconds_since_boot() - 30) {
-                       warn_no_listener(detail);
-                       return -EINVAL;
+       if (!cache_listeners_exist(detail)) {
+               warn_no_listener(detail);
+               return -EINVAL;
         }
  
         buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
@@ -1171,13 +1185,19 @@ int qword_get(char **bpp, char *dest, int bufsize)
         if (bp[0] == '\\' && bp[1] == 'x') {
                 /* HEX STRING */
                 bp += 2;
-               while (isxdigit(bp[0]) && isxdigit(bp[1]) && len < bufsize) {
-                       int byte = isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10;
-                       bp++;
-                       byte <<= 4;
-                       byte |= isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10;
-                       *dest++ = byte;
-                       bp++;
+               while (len < bufsize) {
+                       int h, l;
+
+                       h = hex_to_bin(bp[0]);
+                       if (h < 0)
+                               break;
+
+                       l = hex_to_bin(bp[1]);
+                       if (l < 0)
+                               break;
+
+                       *dest++ = (h << 4) | l;
+                       bp += 2;
                         len++;
                 }
         } else {
@@ -1426,15 +1446,10 @@ static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait)
  static long cache_ioctl_procfs(struct file *filp,
                                unsigned int cmd, unsigned long arg)
  {
-       long ret;
         struct inode *inode = filp->f_path.dentry->d_inode;
         struct cache_detail *cd = PDE(inode)->data;
  
-       lock_kernel();
-       ret = cache_ioctl(inode, filp, cmd, arg, cd);
-       unlock_kernel();
-
-       return ret;
+       return cache_ioctl(inode, filp, cmd, arg, cd);
  }
  
  static int cache_open_procfs(struct inode *inode, struct file *filp)
@@ -1519,10 +1534,13 @@ static const struct file_operations cache_flush_operations_procfs = {
         .read           = read_flush_procfs,
         .write          = write_flush_procfs,
         .release        = release_flush_procfs,
+       .llseek         = no_llseek,
  };
  
-static void remove_cache_proc_entries(struct cache_detail *cd)
+static void remove_cache_proc_entries(struct cache_detail *cd, struct net *net)
  {
+       struct sunrpc_net *sn;
+
         if (cd->u.procfs.proc_ent == NULL)
                 return;
         if (cd->u.procfs.flush_ent)
@@ -1532,15 +1550,18 @@ static void remove_cache_proc_entries(struct cache_detail *cd)
         if (cd->u.procfs.content_ent)
                 remove_proc_entry("content", cd->u.procfs.proc_ent);
         cd->u.procfs.proc_ent = NULL;
-       remove_proc_entry(cd->name, proc_net_rpc);
+       sn = net_generic(net, sunrpc_net_id);
+       remove_proc_entry(cd->name, sn->proc_net_rpc);
  }
  
  #ifdef CONFIG_PROC_FS
-static int create_cache_proc_entries(struct cache_detail *cd)
+static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
  {
         struct proc_dir_entry *p;
+       struct sunrpc_net *sn;
  
-       cd->u.procfs.proc_ent = proc_mkdir(cd->name, proc_net_rpc);
+       sn = net_generic(net, sunrpc_net_id);
+       cd->u.procfs.proc_ent = proc_mkdir(cd->name, sn->proc_net_rpc);
         if (cd->u.procfs.proc_ent == NULL)
                 goto out_nomem;
         cd->u.procfs.channel_ent = NULL;
@@ -1571,11 +1592,11 @@ static int create_cache_proc_entries(struct cache_detail *cd)
         }
         return 0;
  out_nomem:
-       remove_cache_proc_entries(cd);
+       remove_cache_proc_entries(cd, net);
         return -ENOMEM;
  }
  #else /* CONFIG_PROC_FS */
-static int create_cache_proc_entries(struct cache_detail *cd)
+static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
  {
         return 0;
  }
@@ -1586,23 +1607,33 @@ void __init cache_initialize(void)
         INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean);
  }
  
-int cache_register(struct cache_detail *cd)
+int cache_register_net(struct cache_detail *cd, struct net *net)
  {
         int ret;
  
         sunrpc_init_cache_detail(cd);
-       ret = create_cache_proc_entries(cd);
+       ret = create_cache_proc_entries(cd, net);
         if (ret)
                 sunrpc_destroy_cache_detail(cd);
         return ret;
  }
+
+int cache_register(struct cache_detail *cd)
+{
+       return cache_register_net(cd, &init_net);
+}
  EXPORT_SYMBOL_GPL(cache_register);
  
-void cache_unregister(struct cache_detail *cd)
+void cache_unregister_net(struct cache_detail *cd, struct net *net)
  {
-       remove_cache_proc_entries(cd);
+       remove_cache_proc_entries(cd, net);
         sunrpc_destroy_cache_detail(cd);
  }
+
+void cache_unregister(struct cache_detail *cd)
+{
+       cache_unregister_net(cd, &init_net);
+}
  EXPORT_SYMBOL_GPL(cache_unregister);
  
  static ssize_t cache_read_pipefs(struct file *filp, char __user *buf,
@@ -1633,13 +1664,8 @@ static long cache_ioctl_pipefs(struct file *filp,
  {
         struct inode *inode = filp->f_dentry->d_inode;
         struct cache_detail *cd = RPC_I(inode)->private;
-       long ret;
-
-       lock_kernel();
-       ret = cache_ioctl(inode, filp, cmd, arg, cd);
-       unlock_kernel();
  
-       return ret;
+       return cache_ioctl(inode, filp, cmd, arg, cd);
  }
  
  static int cache_open_pipefs(struct inode *inode, struct file *filp)
@@ -1724,6 +1750,7 @@ const struct file_operations cache_flush_operations_pipefs = {
         .read           = read_flush_pipefs,
         .write          = write_flush_pipefs,
         .release        = release_flush_pipefs,
+       .llseek         = no_llseek,
  };
  
  int sunrpc_cache_register_pipefs(struct dentry *parent,