bcache: A block layer cache
[deliverable/linux.git] / drivers / md / bcache / closure.h
1 #ifndef _LINUX_CLOSURE_H
2 #define _LINUX_CLOSURE_H
3
4 #include <linux/llist.h>
5 #include <linux/sched.h>
6 #include <linux/workqueue.h>
7
8 /*
9 * Closure is perhaps the most overused and abused term in computer science, but
10 * since I've been unable to come up with anything better you're stuck with it
11 * again.
12 *
13 * What are closures?
14 *
15 * They embed a refcount. The basic idea is they count "things that are in
16 * progress" - in flight bios, some other thread that's doing something else -
17 * anything you might want to wait on.
18 *
19 * The refcount may be manipulated with closure_get() and closure_put().
20 * closure_put() is where many of the interesting things happen, when it causes
21 * the refcount to go to 0.
22 *
23 * Closures can be used to wait on things both synchronously and asynchronously,
24 * and synchronous and asynchronous use can be mixed without restriction. To
25 * wait synchronously, use closure_sync() - you will sleep until your closure's
26 * refcount hits 1.
27 *
28 * To wait asynchronously, use
29 * continue_at(cl, next_function, workqueue);
30 *
31 * passing it, as you might expect, the function to run when nothing is pending
32 * and the workqueue to run that function out of.
33 *
34 * continue_at() also, critically, is a macro that returns the calling function.
35 * There's good reason for this.
36 *
37 * To use safely closures asynchronously, they must always have a refcount while
38 * they are running owned by the thread that is running them. Otherwise, suppose
39 * you submit some bios and wish to have a function run when they all complete:
40 *
41 * foo_endio(struct bio *bio, int error)
42 * {
43 * closure_put(cl);
44 * }
45 *
46 * closure_init(cl);
47 *
48 * do_stuff();
49 * closure_get(cl);
50 * bio1->bi_endio = foo_endio;
51 * bio_submit(bio1);
52 *
53 * do_more_stuff();
54 * closure_get(cl);
55 * bio2->bi_endio = foo_endio;
56 * bio_submit(bio2);
57 *
58 * continue_at(cl, complete_some_read, system_wq);
59 *
60 * If closure's refcount started at 0, complete_some_read() could run before the
61 * second bio was submitted - which is almost always not what you want! More
62 * importantly, it wouldn't be possible to say whether the original thread or
63 * complete_some_read()'s thread owned the closure - and whatever state it was
64 * associated with!
65 *
66 * So, closure_init() initializes a closure's refcount to 1 - and when a
67 * closure_fn is run, the refcount will be reset to 1 first.
68 *
69 * Then, the rule is - if you got the refcount with closure_get(), release it
70 * with closure_put() (i.e, in a bio->bi_endio function). If you have a refcount
71 * on a closure because you called closure_init() or you were run out of a
72 * closure - _always_ use continue_at(). Doing so consistently will help
73 * eliminate an entire class of particularly pernicious races.
74 *
75 * For a closure to wait on an arbitrary event, we need to introduce waitlists:
76 *
77 * struct closure_waitlist list;
78 * closure_wait_event(list, cl, condition);
79 * closure_wake_up(wait_list);
80 *
81 * These work analagously to wait_event() and wake_up() - except that instead of
82 * operating on the current thread (for wait_event()) and lists of threads, they
83 * operate on an explicit closure and lists of closures.
84 *
85 * Because it's a closure we can now wait either synchronously or
86 * asynchronously. closure_wait_event() returns the current value of the
87 * condition, and if it returned false continue_at() or closure_sync() can be
88 * used to wait for it to become true.
89 *
90 * It's useful for waiting on things when you can't sleep in the context in
91 * which you must check the condition (perhaps a spinlock held, or you might be
92 * beneath generic_make_request() - in which case you can't sleep on IO).
93 *
94 * closure_wait_event() will wait either synchronously or asynchronously,
95 * depending on whether the closure is in blocking mode or not. You can pick a
96 * mode explicitly with closure_wait_event_sync() and
97 * closure_wait_event_async(), which do just what you might expect.
98 *
99 * Lastly, you might have a wait list dedicated to a specific event, and have no
100 * need for specifying the condition - you just want to wait until someone runs
101 * closure_wake_up() on the appropriate wait list. In that case, just use
102 * closure_wait(). It will return either true or false, depending on whether the
103 * closure was already on a wait list or not - a closure can only be on one wait
104 * list at a time.
105 *
106 * Parents:
107 *
108 * closure_init() takes two arguments - it takes the closure to initialize, and
109 * a (possibly null) parent.
110 *
111 * If parent is non null, the new closure will have a refcount for its lifetime;
112 * a closure is considered to be "finished" when its refcount hits 0 and the
113 * function to run is null. Hence
114 *
115 * continue_at(cl, NULL, NULL);
116 *
117 * returns up the (spaghetti) stack of closures, precisely like normal return
118 * returns up the C stack. continue_at() with non null fn is better thought of
119 * as doing a tail call.
120 *
121 * All this implies that a closure should typically be embedded in a particular
122 * struct (which its refcount will normally control the lifetime of), and that
123 * struct can very much be thought of as a stack frame.
124 *
125 * Locking:
126 *
127 * Closures are based on work items but they can be thought of as more like
128 * threads - in that like threads and unlike work items they have a well
129 * defined lifetime; they are created (with closure_init()) and eventually
130 * complete after a continue_at(cl, NULL, NULL).
131 *
132 * Suppose you've got some larger structure with a closure embedded in it that's
133 * used for periodically doing garbage collection. You only want one garbage
134 * collection happening at a time, so the natural thing to do is protect it with
135 * a lock. However, it's difficult to use a lock protecting a closure correctly
136 * because the unlock should come after the last continue_to() (additionally, if
137 * you're using the closure asynchronously a mutex won't work since a mutex has
138 * to be unlocked by the same process that locked it).
139 *
140 * So to make it less error prone and more efficient, we also have the ability
141 * to use closures as locks:
142 *
143 * closure_init_unlocked();
144 * closure_trylock();
145 *
146 * That's all we need for trylock() - the last closure_put() implicitly unlocks
147 * it for you. But for closure_lock(), we also need a wait list:
148 *
149 * struct closure_with_waitlist frobnicator_cl;
150 *
151 * closure_init_unlocked(&frobnicator_cl);
152 * closure_lock(&frobnicator_cl);
153 *
154 * A closure_with_waitlist embeds a closure and a wait list - much like struct
155 * delayed_work embeds a work item and a timer_list. The important thing is, use
156 * it exactly like you would a regular closure and closure_put() will magically
157 * handle everything for you.
158 *
159 * We've got closures that embed timers, too. They're called, appropriately
160 * enough:
161 * struct closure_with_timer;
162 *
163 * This gives you access to closure_delay(). It takes a refcount for a specified
164 * number of jiffies - you could then call closure_sync() (for a slightly
165 * convoluted version of msleep()) or continue_at() - which gives you the same
166 * effect as using a delayed work item, except you can reuse the work_struct
167 * already embedded in struct closure.
168 *
169 * Lastly, there's struct closure_with_waitlist_and_timer. It does what you
170 * probably expect, if you happen to need the features of both. (You don't
171 * really want to know how all this is implemented, but if I've done my job
172 * right you shouldn't have to care).
173 */
174
175 struct closure;
176 typedef void (closure_fn) (struct closure *);
177
178 struct closure_waitlist {
179 struct llist_head list;
180 };
181
182 enum closure_type {
183 TYPE_closure = 0,
184 TYPE_closure_with_waitlist = 1,
185 TYPE_closure_with_timer = 2,
186 TYPE_closure_with_waitlist_and_timer = 3,
187 MAX_CLOSURE_TYPE = 3,
188 };
189
190 enum closure_state {
191 /*
192 * CLOSURE_BLOCKING: Causes closure_wait_event() to block, instead of
193 * waiting asynchronously
194 *
195 * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by
196 * the thread that owns the closure, and cleared by the thread that's
197 * waking up the closure.
198 *
199 * CLOSURE_SLEEPING: Must be set before a thread uses a closure to sleep
200 * - indicates that cl->task is valid and closure_put() may wake it up.
201 * Only set or cleared by the thread that owns the closure.
202 *
203 * CLOSURE_TIMER: Analagous to CLOSURE_WAITING, indicates that a closure
204 * has an outstanding timer. Must be set by the thread that owns the
205 * closure, and cleared by the timer function when the timer goes off.
206 *
207 * The rest are for debugging and don't affect behaviour:
208 *
209 * CLOSURE_RUNNING: Set when a closure is running (i.e. by
210 * closure_init() and when closure_put() runs then next function), and
211 * must be cleared before remaining hits 0. Primarily to help guard
212 * against incorrect usage and accidentally transferring references.
213 * continue_at() and closure_return() clear it for you, if you're doing
214 * something unusual you can use closure_set_dead() which also helps
215 * annotate where references are being transferred.
216 *
217 * CLOSURE_STACK: Sanity check - remaining should never hit 0 on a
218 * closure with this flag set
219 */
220
221 CLOSURE_BITS_START = (1 << 19),
222 CLOSURE_DESTRUCTOR = (1 << 19),
223 CLOSURE_BLOCKING = (1 << 21),
224 CLOSURE_WAITING = (1 << 23),
225 CLOSURE_SLEEPING = (1 << 25),
226 CLOSURE_TIMER = (1 << 27),
227 CLOSURE_RUNNING = (1 << 29),
228 CLOSURE_STACK = (1 << 31),
229 };
230
231 #define CLOSURE_GUARD_MASK \
232 ((CLOSURE_DESTRUCTOR|CLOSURE_BLOCKING|CLOSURE_WAITING| \
233 CLOSURE_SLEEPING|CLOSURE_TIMER|CLOSURE_RUNNING|CLOSURE_STACK) << 1)
234
235 #define CLOSURE_REMAINING_MASK (CLOSURE_BITS_START - 1)
236 #define CLOSURE_REMAINING_INITIALIZER (1|CLOSURE_RUNNING)
237
238 struct closure {
239 union {
240 struct {
241 struct workqueue_struct *wq;
242 struct task_struct *task;
243 struct llist_node list;
244 closure_fn *fn;
245 };
246 struct work_struct work;
247 };
248
249 struct closure *parent;
250
251 atomic_t remaining;
252
253 enum closure_type type;
254
255 #ifdef CONFIG_BCACHE_CLOSURES_DEBUG
256 #define CLOSURE_MAGIC_DEAD 0xc054dead
257 #define CLOSURE_MAGIC_ALIVE 0xc054a11e
258
259 unsigned magic;
260 struct list_head all;
261 unsigned long ip;
262 unsigned long waiting_on;
263 #endif
264 };
265
266 struct closure_with_waitlist {
267 struct closure cl;
268 struct closure_waitlist wait;
269 };
270
271 struct closure_with_timer {
272 struct closure cl;
273 struct timer_list timer;
274 };
275
276 struct closure_with_waitlist_and_timer {
277 struct closure cl;
278 struct closure_waitlist wait;
279 struct timer_list timer;
280 };
281
282 extern unsigned invalid_closure_type(void);
283
284 #define __CLOSURE_TYPE(cl, _t) \
285 __builtin_types_compatible_p(typeof(cl), struct _t) \
286 ? TYPE_ ## _t : \
287
288 #define __closure_type(cl) \
289 ( \
290 __CLOSURE_TYPE(cl, closure) \
291 __CLOSURE_TYPE(cl, closure_with_waitlist) \
292 __CLOSURE_TYPE(cl, closure_with_timer) \
293 __CLOSURE_TYPE(cl, closure_with_waitlist_and_timer) \
294 invalid_closure_type() \
295 )
296
297 void closure_sub(struct closure *cl, int v);
298 void closure_put(struct closure *cl);
299 void closure_queue(struct closure *cl);
300 void __closure_wake_up(struct closure_waitlist *list);
301 bool closure_wait(struct closure_waitlist *list, struct closure *cl);
302 void closure_sync(struct closure *cl);
303
304 bool closure_trylock(struct closure *cl, struct closure *parent);
305 void __closure_lock(struct closure *cl, struct closure *parent,
306 struct closure_waitlist *wait_list);
307
308 void do_closure_timer_init(struct closure *cl);
309 bool __closure_delay(struct closure *cl, unsigned long delay,
310 struct timer_list *timer);
311 void __closure_flush(struct closure *cl, struct timer_list *timer);
312 void __closure_flush_sync(struct closure *cl, struct timer_list *timer);
313
314 #ifdef CONFIG_BCACHE_CLOSURES_DEBUG
315
316 void closure_debug_create(struct closure *cl);
317 void closure_debug_destroy(struct closure *cl);
318
319 #else
320
321 static inline void closure_debug_create(struct closure *cl) {}
322 static inline void closure_debug_destroy(struct closure *cl) {}
323
324 #endif
325
326 static inline void closure_set_ip(struct closure *cl)
327 {
328 #ifdef CONFIG_BCACHE_CLOSURES_DEBUG
329 cl->ip = _THIS_IP_;
330 #endif
331 }
332
333 static inline void closure_set_ret_ip(struct closure *cl)
334 {
335 #ifdef CONFIG_BCACHE_CLOSURES_DEBUG
336 cl->ip = _RET_IP_;
337 #endif
338 }
339
340 static inline void closure_get(struct closure *cl)
341 {
342 #ifdef CONFIG_BCACHE_CLOSURES_DEBUG
343 BUG_ON((atomic_inc_return(&cl->remaining) &
344 CLOSURE_REMAINING_MASK) <= 1);
345 #else
346 atomic_inc(&cl->remaining);
347 #endif
348 }
349
350 static inline void closure_set_stopped(struct closure *cl)
351 {
352 atomic_sub(CLOSURE_RUNNING, &cl->remaining);
353 }
354
355 static inline bool closure_is_stopped(struct closure *cl)
356 {
357 return !(atomic_read(&cl->remaining) & CLOSURE_RUNNING);
358 }
359
360 static inline bool closure_is_unlocked(struct closure *cl)
361 {
362 return atomic_read(&cl->remaining) == -1;
363 }
364
365 static inline void do_closure_init(struct closure *cl, struct closure *parent,
366 bool running)
367 {
368 switch (cl->type) {
369 case TYPE_closure_with_timer:
370 case TYPE_closure_with_waitlist_and_timer:
371 do_closure_timer_init(cl);
372 default:
373 break;
374 }
375
376 cl->parent = parent;
377 if (parent)
378 closure_get(parent);
379
380 if (running) {
381 closure_debug_create(cl);
382 atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
383 } else
384 atomic_set(&cl->remaining, -1);
385
386 closure_set_ip(cl);
387 }
388
389 /*
390 * Hack to get at the embedded closure if there is one, by doing an unsafe cast:
391 * the result of __closure_type() is thrown away, it's used merely for type
392 * checking.
393 */
394 #define __to_internal_closure(cl) \
395 ({ \
396 BUILD_BUG_ON(__closure_type(*cl) > MAX_CLOSURE_TYPE); \
397 (struct closure *) cl; \
398 })
399
400 #define closure_init_type(cl, parent, running) \
401 do { \
402 struct closure *_cl = __to_internal_closure(cl); \
403 _cl->type = __closure_type(*(cl)); \
404 do_closure_init(_cl, parent, running); \
405 } while (0)
406
407 /**
408 * __closure_init() - Initialize a closure, skipping the memset()
409 *
410 * May be used instead of closure_init() when memory has already been zeroed.
411 */
412 #define __closure_init(cl, parent) \
413 closure_init_type(cl, parent, true)
414
415 /**
416 * closure_init() - Initialize a closure, setting the refcount to 1
417 * @cl: closure to initialize
418 * @parent: parent of the new closure. cl will take a refcount on it for its
419 * lifetime; may be NULL.
420 */
421 #define closure_init(cl, parent) \
422 do { \
423 memset((cl), 0, sizeof(*(cl))); \
424 __closure_init(cl, parent); \
425 } while (0)
426
427 static inline void closure_init_stack(struct closure *cl)
428 {
429 memset(cl, 0, sizeof(struct closure));
430 atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER|
431 CLOSURE_BLOCKING|CLOSURE_STACK);
432 }
433
434 /**
435 * closure_init_unlocked() - Initialize a closure but leave it unlocked.
436 * @cl: closure to initialize
437 *
438 * For when the closure will be used as a lock. The closure may not be used
439 * until after a closure_lock() or closure_trylock().
440 */
441 #define closure_init_unlocked(cl) \
442 do { \
443 memset((cl), 0, sizeof(*(cl))); \
444 closure_init_type(cl, NULL, false); \
445 } while (0)
446
447 /**
448 * closure_lock() - lock and initialize a closure.
449 * @cl: the closure to lock
450 * @parent: the new parent for this closure
451 *
452 * The closure must be of one of the types that has a waitlist (otherwise we
453 * wouldn't be able to sleep on contention).
454 *
455 * @parent has exactly the same meaning as in closure_init(); if non null, the
456 * closure will take a reference on @parent which will be released when it is
457 * unlocked.
458 */
459 #define closure_lock(cl, parent) \
460 __closure_lock(__to_internal_closure(cl), parent, &(cl)->wait)
461
462 /**
463 * closure_delay() - delay some number of jiffies
464 * @cl: the closure that will sleep
465 * @delay: the delay in jiffies
466 *
467 * Takes a refcount on @cl which will be released after @delay jiffies; this may
468 * be used to have a function run after a delay with continue_at(), or
469 * closure_sync() may be used for a convoluted version of msleep().
470 */
471 #define closure_delay(cl, delay) \
472 __closure_delay(__to_internal_closure(cl), delay, &(cl)->timer)
473
474 #define closure_flush(cl) \
475 __closure_flush(__to_internal_closure(cl), &(cl)->timer)
476
477 #define closure_flush_sync(cl) \
478 __closure_flush_sync(__to_internal_closure(cl), &(cl)->timer)
479
480 static inline void __closure_end_sleep(struct closure *cl)
481 {
482 __set_current_state(TASK_RUNNING);
483
484 if (atomic_read(&cl->remaining) & CLOSURE_SLEEPING)
485 atomic_sub(CLOSURE_SLEEPING, &cl->remaining);
486 }
487
488 static inline void __closure_start_sleep(struct closure *cl)
489 {
490 closure_set_ip(cl);
491 cl->task = current;
492 set_current_state(TASK_UNINTERRUPTIBLE);
493
494 if (!(atomic_read(&cl->remaining) & CLOSURE_SLEEPING))
495 atomic_add(CLOSURE_SLEEPING, &cl->remaining);
496 }
497
498 /**
499 * closure_blocking() - returns true if the closure is in blocking mode.
500 *
501 * If a closure is in blocking mode, closure_wait_event() will sleep until the
502 * condition is true instead of waiting asynchronously.
503 */
504 static inline bool closure_blocking(struct closure *cl)
505 {
506 return atomic_read(&cl->remaining) & CLOSURE_BLOCKING;
507 }
508
509 /**
510 * set_closure_blocking() - put a closure in blocking mode.
511 *
512 * If a closure is in blocking mode, closure_wait_event() will sleep until the
513 * condition is true instead of waiting asynchronously.
514 *
515 * Not thread safe - can only be called by the thread running the closure.
516 */
517 static inline void set_closure_blocking(struct closure *cl)
518 {
519 if (!closure_blocking(cl))
520 atomic_add(CLOSURE_BLOCKING, &cl->remaining);
521 }
522
523 /*
524 * Not thread safe - can only be called by the thread running the closure.
525 */
526 static inline void clear_closure_blocking(struct closure *cl)
527 {
528 if (closure_blocking(cl))
529 atomic_sub(CLOSURE_BLOCKING, &cl->remaining);
530 }
531
532 /**
533 * closure_wake_up() - wake up all closures on a wait list.
534 */
535 static inline void closure_wake_up(struct closure_waitlist *list)
536 {
537 smp_mb();
538 __closure_wake_up(list);
539 }
540
541 /*
542 * Wait on an event, synchronously or asynchronously - analogous to wait_event()
543 * but for closures.
544 *
545 * The loop is oddly structured so as to avoid a race; we must check the
546 * condition again after we've added ourself to the waitlist. We know if we were
547 * already on the waitlist because closure_wait() returns false; thus, we only
548 * schedule or break if closure_wait() returns false. If it returns true, we
549 * just loop again - rechecking the condition.
550 *
551 * The __closure_wake_up() is necessary because we may race with the event
552 * becoming true; i.e. we see event false -> wait -> recheck condition, but the
553 * thread that made the event true may have called closure_wake_up() before we
554 * added ourself to the wait list.
555 *
556 * We have to call closure_sync() at the end instead of just
557 * __closure_end_sleep() because a different thread might've called
558 * closure_wake_up() before us and gotten preempted before they dropped the
559 * refcount on our closure. If this was a stack allocated closure, that would be
560 * bad.
561 */
562 #define __closure_wait_event(list, cl, condition, _block) \
563 ({ \
564 bool block = _block; \
565 typeof(condition) ret; \
566 \
567 while (1) { \
568 ret = (condition); \
569 if (ret) { \
570 __closure_wake_up(list); \
571 if (block) \
572 closure_sync(cl); \
573 \
574 break; \
575 } \
576 \
577 if (block) \
578 __closure_start_sleep(cl); \
579 \
580 if (!closure_wait(list, cl)) { \
581 if (!block) \
582 break; \
583 \
584 schedule(); \
585 } \
586 } \
587 \
588 ret; \
589 })
590
591 /**
592 * closure_wait_event() - wait on a condition, synchronously or asynchronously.
593 * @list: the wait list to wait on
594 * @cl: the closure that is doing the waiting
595 * @condition: a C expression for the event to wait for
596 *
597 * If the closure is in blocking mode, sleeps until the @condition evaluates to
598 * true - exactly like wait_event().
599 *
600 * If the closure is not in blocking mode, waits asynchronously; if the
601 * condition is currently false the @cl is put onto @list and returns. @list
602 * owns a refcount on @cl; closure_sync() or continue_at() may be used later to
603 * wait for another thread to wake up @list, which drops the refcount on @cl.
604 *
605 * Returns the value of @condition; @cl will be on @list iff @condition was
606 * false.
607 *
608 * closure_wake_up(@list) must be called after changing any variable that could
609 * cause @condition to become true.
610 */
611 #define closure_wait_event(list, cl, condition) \
612 __closure_wait_event(list, cl, condition, closure_blocking(cl))
613
614 #define closure_wait_event_async(list, cl, condition) \
615 __closure_wait_event(list, cl, condition, false)
616
617 #define closure_wait_event_sync(list, cl, condition) \
618 __closure_wait_event(list, cl, condition, true)
619
620 static inline void set_closure_fn(struct closure *cl, closure_fn *fn,
621 struct workqueue_struct *wq)
622 {
623 BUG_ON(object_is_on_stack(cl));
624 closure_set_ip(cl);
625 cl->fn = fn;
626 cl->wq = wq;
627 /* between atomic_dec() in closure_put() */
628 smp_mb__before_atomic_dec();
629 }
630
631 #define continue_at(_cl, _fn, _wq) \
632 do { \
633 set_closure_fn(_cl, _fn, _wq); \
634 closure_sub(_cl, CLOSURE_RUNNING + 1); \
635 return; \
636 } while (0)
637
638 #define closure_return(_cl) continue_at((_cl), NULL, NULL)
639
640 #define continue_at_nobarrier(_cl, _fn, _wq) \
641 do { \
642 set_closure_fn(_cl, _fn, _wq); \
643 closure_queue(cl); \
644 return; \
645 } while (0)
646
647 #define closure_return_with_destructor(_cl, _destructor) \
648 do { \
649 set_closure_fn(_cl, _destructor, NULL); \
650 closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1); \
651 return; \
652 } while (0)
653
654 static inline void closure_call(struct closure *cl, closure_fn fn,
655 struct workqueue_struct *wq,
656 struct closure *parent)
657 {
658 closure_init(cl, parent);
659 continue_at_nobarrier(cl, fn, wq);
660 }
661
662 static inline void closure_trylock_call(struct closure *cl, closure_fn fn,
663 struct workqueue_struct *wq,
664 struct closure *parent)
665 {
666 if (closure_trylock(cl, parent))
667 continue_at_nobarrier(cl, fn, wq);
668 }
669
670 #endif /* _LINUX_CLOSURE_H */
This page took 0.046747 seconds and 5 git commands to generate.