lib/percpu-refcount.c

   1 #define pr_fmt(fmt) "%s: " fmt "\n", __func__
   2
   3 #include <linux/kernel.h>
   4 #include <linux/percpu-refcount.h>
   5
   6 /*
   7  * Initially, a percpu refcount is just a set of percpu counters. Initially, we
   8  * don't try to detect the ref hitting 0 - which means that get/put can just
   9  * increment or decrement the local counter. Note that the counter on a
  10  * particular cpu can (and will) wrap - this is fine, when we go to shutdown the
  11  * percpu counters will all sum to the correct value
  12  *
  13  * (More precisely: because moduler arithmatic is commutative the sum of all the
  14  * percpu_count vars will be equal to what it would have been if all the gets
  15  * and puts were done to a single integer, even if some of the percpu integers
  16  * overflow or underflow).
  17  *
  18  * The real trick to implementing percpu refcounts is shutdown. We can't detect
  19  * the ref hitting 0 on every put - this would require global synchronization
  20  * and defeat the whole purpose of using percpu refs.
  21  *
  22  * What we do is require the user to keep track of the initial refcount; we know
  23  * the ref can't hit 0 before the user drops the initial ref, so as long as we
  24  * convert to non percpu mode before the initial ref is dropped everything
  25  * works.
  26  *
  27  * Converting to non percpu mode is done with some RCUish stuff in
  28  * percpu_ref_kill. Additionally, we need a bias value so that the
  29  * atomic_long_t can't hit 0 before we've added up all the percpu refs.
  30  */
  31
  32 #define PERCPU_COUNT_BIAS       (1LU << (BITS_PER_LONG - 1))
  33
  34 static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
  35 {
  36         return (unsigned long __percpu *)
  37                 (ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC);
  38 }
  39
  40 /**
  41  * percpu_ref_init - initialize a percpu refcount
  42  * @ref: percpu_ref to initialize
  43  * @release: function which will be called when refcount hits 0
  44  * @gfp: allocation mask to use
  45  *
  46  * Initializes the refcount in single atomic counter mode with a refcount of 1;
  47  * analagous to atomic_long_set(ref, 1).
  48  *
  49  * Note that @release must not sleep - it may potentially be called from RCU
  50  * callback context by percpu_ref_kill().
  51  */
  52 int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
  53                     gfp_t gfp)
  54 {
  55         atomic_long_set(&ref->count, 1 + PERCPU_COUNT_BIAS);
  56
  57         ref->percpu_count_ptr =
  58                 (unsigned long)alloc_percpu_gfp(unsigned long, gfp);
  59         if (!ref->percpu_count_ptr)
  60                 return -ENOMEM;
  61
  62         ref->release = release;
  63         return 0;
  64 }
  65 EXPORT_SYMBOL_GPL(percpu_ref_init);
  66
  67 /**
  68  * percpu_ref_exit - undo percpu_ref_init()
  69  * @ref: percpu_ref to exit
  70  *
  71  * This function exits @ref.  The caller is responsible for ensuring that
  72  * @ref is no longer in active use.  The usual places to invoke this
  73  * function from are the @ref->release() callback or in init failure path
  74  * where percpu_ref_init() succeeded but other parts of the initialization
  75  * of the embedding object failed.
  76  */
  77 void percpu_ref_exit(struct percpu_ref *ref)
  78 {
  79         unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
  80
  81         if (percpu_count) {
  82                 free_percpu(percpu_count);
  83                 ref->percpu_count_ptr = __PERCPU_REF_ATOMIC;
  84         }
  85 }
  86 EXPORT_SYMBOL_GPL(percpu_ref_exit);
  87
  88 static void percpu_ref_kill_rcu(struct rcu_head *rcu)
  89 {
  90         struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
  91         unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
  92         unsigned long count = 0;
  93         int cpu;
  94
  95         for_each_possible_cpu(cpu)
  96                 count += *per_cpu_ptr(percpu_count, cpu);
  97
  98         pr_debug("global %ld percpu %ld",
  99                  atomic_long_read(&ref->count), (long)count);
 100
 101         /*
 102          * It's crucial that we sum the percpu counters _before_ adding the sum
 103          * to &ref->count; since gets could be happening on one cpu while puts
 104          * happen on another, adding a single cpu's count could cause
 105          * @ref->count to hit 0 before we've got a consistent value - but the
 106          * sum of all the counts will be consistent and correct.
 107          *
 108          * Subtracting the bias value then has to happen _after_ adding count to
 109          * &ref->count; we need the bias value to prevent &ref->count from
 110          * reaching 0 before we add the percpu counts. But doing it at the same
 111          * time is equivalent and saves us atomic operations:
 112          */
 113         atomic_long_add((long)count - PERCPU_COUNT_BIAS, &ref->count);
 114
 115         WARN_ONCE(atomic_long_read(&ref->count) <= 0,
 116                   "percpu ref (%pf) <= 0 (%ld) after killed",
 117                   ref->release, atomic_long_read(&ref->count));
 118
 119         /* @ref is viewed as dead on all CPUs, send out kill confirmation */
 120         if (ref->confirm_switch)
 121                 ref->confirm_switch(ref);
 122
 123         /*
 124          * Now we're in single atomic_long_t mode with a consistent
 125          * refcount, so it's safe to drop our initial ref:
 126          */
 127         percpu_ref_put(ref);
 128 }
 129
 130 /**
 131  * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation
 132  * @ref: percpu_ref to kill
 133  * @confirm_kill: optional confirmation callback
 134  *
 135  * Equivalent to percpu_ref_kill() but also schedules kill confirmation if
 136  * @confirm_kill is not NULL.  @confirm_kill, which may not block, will be
 137  * called after @ref is seen as dead from all CPUs - all further
 138  * invocations of percpu_ref_tryget_live() will fail.  See
 139  * percpu_ref_tryget_live() for more details.
 140  *
 141  * Due to the way percpu_ref is implemented, @confirm_kill will be called
 142  * after at least one full RCU grace period has passed but this is an
 143  * implementation detail and callers must not depend on it.
 144  */
 145 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 146                                  percpu_ref_func_t *confirm_kill)
 147 {
 148         WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC,
 149                   "%s called more than once on %pf!", __func__, ref->release);
 150
 151         ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
 152         ref->confirm_switch = confirm_kill;
 153
 154         call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
 155 }
 156 EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
 157
 158 /**
 159  * percpu_ref_reinit - re-initialize a percpu refcount
 160  * @ref: perpcu_ref to re-initialize
 161  *
 162  * Re-initialize @ref so that it's in the same state as when it finished
 163  * percpu_ref_init().  @ref must have been initialized successfully, killed
 164  * and reached 0 but not exited.
 165  *
 166  * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
 167  * this function is in progress.
 168  */
 169 void percpu_ref_reinit(struct percpu_ref *ref)
 170 {
 171         unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
 172         int cpu;
 173
 174         BUG_ON(!percpu_count);
 175         WARN_ON_ONCE(!percpu_ref_is_zero(ref));
 176
 177         atomic_long_set(&ref->count, 1 + PERCPU_COUNT_BIAS);
 178
 179         /*
 180          * Restore per-cpu operation.  smp_store_release() is paired with
 181          * smp_read_barrier_depends() in __ref_is_percpu() and guarantees
 182          * that the zeroing is visible to all percpu accesses which can see
 183          * the following __PERCPU_REF_ATOMIC clearing.
 184          */
 185         for_each_possible_cpu(cpu)
 186                 *per_cpu_ptr(percpu_count, cpu) = 0;
 187
 188         smp_store_release(&ref->percpu_count_ptr,
 189                           ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC);
 190 }
 191 EXPORT_SYMBOL_GPL(percpu_ref_reinit);