4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
26 #include <linux/module.h>
27 #include <linux/drbd.h>
28 #include <linux/sched.h>
29 #include <linux/smp_lock.h>
30 #include <linux/wait.h>
32 #include <linux/memcontrol.h>
33 #include <linux/mm_inline.h>
34 #include <linux/slab.h>
35 #include <linux/random.h>
36 #include <linux/string.h>
37 #include <linux/scatterlist.h>
42 #define SLEEP_TIME (HZ/10)
44 static int w_make_ov_request(struct drbd_conf
*mdev
, struct drbd_work
*w
, int cancel
);
53 * more endio handlers:
54 atodb_endio in drbd_actlog.c
55 drbd_bm_async_io_complete in drbd_bitmap.c
57 * For all these callbacks, note the following:
58 * The callbacks will be called in irq context by the IDE drivers,
59 * and in Softirqs/Tasklets/BH context by the SCSI drivers.
60 * Try to get the locking right :)
65 /* About the global_state_lock
66 Each state transition on an device holds a read lock. In case we have
67 to evaluate the sync after dependencies, we grab a write lock, because
68 we need stable states on all devices for that. */
69 rwlock_t global_state_lock
;
71 /* used for synchronous meta data and bitmap IO
72 * submitted by drbd_md_sync_page_io()
74 void drbd_md_io_complete(struct bio
*bio
, int error
)
76 struct drbd_md_io
*md_io
;
78 md_io
= (struct drbd_md_io
*)bio
->bi_private
;
81 complete(&md_io
->event
);
84 /* reads on behalf of the partner,
85 * "submitted" by the receiver
87 void drbd_endio_read_sec_final(struct drbd_epoch_entry
*e
) __releases(local
)
89 unsigned long flags
= 0;
90 struct drbd_conf
*mdev
= e
->mdev
;
92 D_ASSERT(e
->block_id
!= ID_VACANT
);
94 spin_lock_irqsave(&mdev
->req_lock
, flags
);
95 mdev
->read_cnt
+= e
->size
>> 9;
97 if (list_empty(&mdev
->read_ee
))
98 wake_up(&mdev
->ee_wait
);
99 if (test_bit(__EE_WAS_ERROR
, &e
->flags
))
100 __drbd_chk_io_error(mdev
, FALSE
);
101 spin_unlock_irqrestore(&mdev
->req_lock
, flags
);
103 drbd_queue_work(&mdev
->data
.work
, &e
->w
);
107 static int is_failed_barrier(int ee_flags
)
109 return (ee_flags
& (EE_IS_BARRIER
|EE_WAS_ERROR
|EE_RESUBMITTED
))
110 == (EE_IS_BARRIER
|EE_WAS_ERROR
);
113 /* writes on behalf of the partner, or resync writes,
114 * "submitted" by the receiver, final stage. */
115 static void drbd_endio_write_sec_final(struct drbd_epoch_entry
*e
) __releases(local
)
117 unsigned long flags
= 0;
118 struct drbd_conf
*mdev
= e
->mdev
;
122 int do_al_complete_io
;
124 /* if this is a failed barrier request, disable use of barriers,
125 * and schedule for resubmission */
126 if (is_failed_barrier(e
->flags
)) {
127 drbd_bump_write_ordering(mdev
, WO_bdev_flush
);
128 spin_lock_irqsave(&mdev
->req_lock
, flags
);
129 list_del(&e
->w
.list
);
130 e
->flags
= (e
->flags
& ~EE_WAS_ERROR
) | EE_RESUBMITTED
;
131 e
->w
.cb
= w_e_reissue
;
132 /* put_ldev actually happens below, once we come here again. */
134 spin_unlock_irqrestore(&mdev
->req_lock
, flags
);
135 drbd_queue_work(&mdev
->data
.work
, &e
->w
);
139 D_ASSERT(e
->block_id
!= ID_VACANT
);
141 /* after we moved e to done_ee,
142 * we may no longer access it,
143 * it may be freed/reused already!
144 * (as soon as we release the req_lock) */
145 e_sector
= e
->sector
;
146 do_al_complete_io
= e
->flags
& EE_CALL_AL_COMPLETE_IO
;
147 is_syncer_req
= is_syncer_block_id(e
->block_id
);
149 spin_lock_irqsave(&mdev
->req_lock
, flags
);
150 mdev
->writ_cnt
+= e
->size
>> 9;
151 list_del(&e
->w
.list
); /* has been on active_ee or sync_ee */
152 list_add_tail(&e
->w
.list
, &mdev
->done_ee
);
154 /* No hlist_del_init(&e->colision) here, we did not send the Ack yet,
155 * neither did we wake possibly waiting conflicting requests.
156 * done from "drbd_process_done_ee" within the appropriate w.cb
157 * (e_end_block/e_end_resync_block) or from _drbd_clear_done_ee */
159 do_wake
= is_syncer_req
160 ? list_empty(&mdev
->sync_ee
)
161 : list_empty(&mdev
->active_ee
);
163 if (test_bit(__EE_WAS_ERROR
, &e
->flags
))
164 __drbd_chk_io_error(mdev
, FALSE
);
165 spin_unlock_irqrestore(&mdev
->req_lock
, flags
);
168 drbd_rs_complete_io(mdev
, e_sector
);
171 wake_up(&mdev
->ee_wait
);
173 if (do_al_complete_io
)
174 drbd_al_complete_io(mdev
, e_sector
);
180 /* writes on behalf of the partner, or resync writes,
181 * "submitted" by the receiver.
183 void drbd_endio_sec(struct bio
*bio
, int error
)
185 struct drbd_epoch_entry
*e
= bio
->bi_private
;
186 struct drbd_conf
*mdev
= e
->mdev
;
187 int uptodate
= bio_flagged(bio
, BIO_UPTODATE
);
188 int is_write
= bio_data_dir(bio
) == WRITE
;
191 dev_warn(DEV
, "%s: error=%d s=%llus\n",
192 is_write
? "write" : "read", error
,
193 (unsigned long long)e
->sector
);
194 if (!error
&& !uptodate
) {
195 dev_warn(DEV
, "%s: setting error to -EIO s=%llus\n",
196 is_write
? "write" : "read",
197 (unsigned long long)e
->sector
);
198 /* strange behavior of some lower level drivers...
199 * fail the request by clearing the uptodate flag,
200 * but do not return any error?! */
205 set_bit(__EE_WAS_ERROR
, &e
->flags
);
207 bio_put(bio
); /* no need for the bio anymore */
208 if (atomic_dec_and_test(&e
->pending_bios
)) {
210 drbd_endio_write_sec_final(e
);
212 drbd_endio_read_sec_final(e
);
216 /* read, readA or write requests on R_PRIMARY coming from drbd_make_request
218 void drbd_endio_pri(struct bio
*bio
, int error
)
221 struct drbd_request
*req
= bio
->bi_private
;
222 struct drbd_conf
*mdev
= req
->mdev
;
223 struct bio_and_error m
;
224 enum drbd_req_event what
;
225 int uptodate
= bio_flagged(bio
, BIO_UPTODATE
);
228 dev_warn(DEV
, "p %s: error=%d\n",
229 bio_data_dir(bio
) == WRITE
? "write" : "read", error
);
230 if (!error
&& !uptodate
) {
231 dev_warn(DEV
, "p %s: setting error to -EIO\n",
232 bio_data_dir(bio
) == WRITE
? "write" : "read");
233 /* strange behavior of some lower level drivers...
234 * fail the request by clearing the uptodate flag,
235 * but do not return any error?! */
239 /* to avoid recursion in __req_mod */
240 if (unlikely(error
)) {
241 what
= (bio_data_dir(bio
) == WRITE
)
242 ? write_completed_with_error
243 : (bio_rw(bio
) == READ
)
244 ? read_completed_with_error
245 : read_ahead_completed_with_error
;
249 bio_put(req
->private_bio
);
250 req
->private_bio
= ERR_PTR(error
);
252 spin_lock_irqsave(&mdev
->req_lock
, flags
);
253 __req_mod(req
, what
, &m
);
254 spin_unlock_irqrestore(&mdev
->req_lock
, flags
);
257 complete_master_bio(mdev
, &m
);
260 int w_io_error(struct drbd_conf
*mdev
, struct drbd_work
*w
, int cancel
)
262 struct drbd_request
*req
= container_of(w
, struct drbd_request
, w
);
264 /* NOTE: mdev->ldev can be NULL by the time we get here! */
265 /* D_ASSERT(mdev->ldev->dc.on_io_error != EP_PASS_ON); */
267 /* the only way this callback is scheduled is from _req_may_be_done,
268 * when it is done and had a local write error, see comments there */
274 int w_read_retry_remote(struct drbd_conf
*mdev
, struct drbd_work
*w
, int cancel
)
276 struct drbd_request
*req
= container_of(w
, struct drbd_request
, w
);
278 /* We should not detach for read io-error,
279 * but try to WRITE the P_DATA_REPLY to the failed location,
280 * to give the disk the chance to relocate that block */
282 spin_lock_irq(&mdev
->req_lock
);
284 mdev
->state
.conn
< C_CONNECTED
||
285 mdev
->state
.pdsk
<= D_INCONSISTENT
) {
286 _req_mod(req
, send_canceled
);
287 spin_unlock_irq(&mdev
->req_lock
);
288 dev_alert(DEV
, "WE ARE LOST. Local IO failure, no peer.\n");
291 spin_unlock_irq(&mdev
->req_lock
);
293 return w_send_read_req(mdev
, w
, 0);
296 int w_resync_inactive(struct drbd_conf
*mdev
, struct drbd_work
*w
, int cancel
)
298 ERR_IF(cancel
) return 1;
299 dev_err(DEV
, "resync inactive, but callback triggered??\n");
300 return 1; /* Simply ignore this! */
303 void drbd_csum_ee(struct drbd_conf
*mdev
, struct crypto_hash
*tfm
, struct drbd_epoch_entry
*e
, void *digest
)
305 struct hash_desc desc
;
306 struct scatterlist sg
;
307 struct page
*page
= e
->pages
;
314 sg_init_table(&sg
, 1);
315 crypto_hash_init(&desc
);
317 while ((tmp
= page_chain_next(page
))) {
318 /* all but the last page will be fully used */
319 sg_set_page(&sg
, page
, PAGE_SIZE
, 0);
320 crypto_hash_update(&desc
, &sg
, sg
.length
);
323 /* and now the last, possibly only partially used page */
324 len
= e
->size
& (PAGE_SIZE
- 1);
325 sg_set_page(&sg
, page
, len
?: PAGE_SIZE
, 0);
326 crypto_hash_update(&desc
, &sg
, sg
.length
);
327 crypto_hash_final(&desc
, digest
);
330 void drbd_csum_bio(struct drbd_conf
*mdev
, struct crypto_hash
*tfm
, struct bio
*bio
, void *digest
)
332 struct hash_desc desc
;
333 struct scatterlist sg
;
334 struct bio_vec
*bvec
;
340 sg_init_table(&sg
, 1);
341 crypto_hash_init(&desc
);
343 __bio_for_each_segment(bvec
, bio
, i
, 0) {
344 sg_set_page(&sg
, bvec
->bv_page
, bvec
->bv_len
, bvec
->bv_offset
);
345 crypto_hash_update(&desc
, &sg
, sg
.length
);
347 crypto_hash_final(&desc
, digest
);
350 static int w_e_send_csum(struct drbd_conf
*mdev
, struct drbd_work
*w
, int cancel
)
352 struct drbd_epoch_entry
*e
= container_of(w
, struct drbd_epoch_entry
, w
);
357 D_ASSERT(e
->block_id
== DRBD_MAGIC
+ 0xbeef);
359 if (unlikely(cancel
)) {
360 drbd_free_ee(mdev
, e
);
364 if (likely((e
->flags
& EE_WAS_ERROR
) == 0)) {
365 digest_size
= crypto_hash_digestsize(mdev
->csums_tfm
);
366 digest
= kmalloc(digest_size
, GFP_NOIO
);
368 drbd_csum_ee(mdev
, mdev
->csums_tfm
, e
, digest
);
370 inc_rs_pending(mdev
);
371 ok
= drbd_send_drequest_csum(mdev
,
379 dev_err(DEV
, "kmalloc() of digest failed.\n");
385 drbd_free_ee(mdev
, e
);
388 dev_err(DEV
, "drbd_send_drequest(..., csum) failed\n");
392 #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
394 static int read_for_csum(struct drbd_conf
*mdev
, sector_t sector
, int size
)
396 struct drbd_epoch_entry
*e
;
401 /* GFP_TRY, because if there is no memory available right now, this may
402 * be rescheduled for later. It is "only" background resync, after all. */
403 e
= drbd_alloc_ee(mdev
, DRBD_MAGIC
+0xbeef, sector
, size
, GFP_TRY
);
407 spin_lock_irq(&mdev
->req_lock
);
408 list_add(&e
->w
.list
, &mdev
->read_ee
);
409 spin_unlock_irq(&mdev
->req_lock
);
411 e
->w
.cb
= w_e_send_csum
;
412 if (drbd_submit_ee(mdev
, e
, READ
, DRBD_FAULT_RS_RD
) == 0)
415 drbd_free_ee(mdev
, e
);
421 void resync_timer_fn(unsigned long data
)
424 struct drbd_conf
*mdev
= (struct drbd_conf
*) data
;
427 spin_lock_irqsave(&mdev
->req_lock
, flags
);
429 if (likely(!test_and_clear_bit(STOP_SYNC_TIMER
, &mdev
->flags
))) {
431 if (mdev
->state
.conn
== C_VERIFY_S
)
432 mdev
->resync_work
.cb
= w_make_ov_request
;
434 mdev
->resync_work
.cb
= w_make_resync_request
;
437 mdev
->resync_work
.cb
= w_resync_inactive
;
440 spin_unlock_irqrestore(&mdev
->req_lock
, flags
);
442 /* harmless race: list_empty outside data.work.q_lock */
443 if (list_empty(&mdev
->resync_work
.list
) && queue
)
444 drbd_queue_work(&mdev
->data
.work
, &mdev
->resync_work
);
447 static int calc_resync_rate(struct drbd_conf
*mdev
)
449 int d
= mdev
->data_delay
/ 1000; /* us -> ms */
450 int td
= mdev
->sync_conf
.throttle_th
* 100; /* 0.1s -> ms */
451 int hd
= mdev
->sync_conf
.hold_off_th
* 100; /* 0.1s -> ms */
452 int cr
= mdev
->sync_conf
.rate
;
454 return d
<= td
? cr
:
456 cr
+ (cr
* (td
- d
) / (hd
- td
));
459 int w_make_resync_request(struct drbd_conf
*mdev
,
460 struct drbd_work
*w
, int cancel
)
464 const sector_t capacity
= drbd_get_capacity(mdev
->this_bdev
);
465 int max_segment_size
;
466 int number
, i
, size
, pe
, mx
;
467 int align
, queued
, sndbuf
;
469 if (unlikely(cancel
))
472 if (unlikely(mdev
->state
.conn
< C_CONNECTED
)) {
473 dev_err(DEV
, "Confused in w_make_resync_request()! cstate < Connected");
477 if (mdev
->state
.conn
!= C_SYNC_TARGET
)
478 dev_err(DEV
, "%s in w_make_resync_request\n",
479 drbd_conn_str(mdev
->state
.conn
));
481 if (!get_ldev(mdev
)) {
482 /* Since we only need to access mdev->rsync a
483 get_ldev_if_state(mdev,D_FAILED) would be sufficient, but
484 to continue resync with a broken disk makes no sense at
486 dev_err(DEV
, "Disk broke down during resync!\n");
487 mdev
->resync_work
.cb
= w_resync_inactive
;
491 /* starting with drbd 8.3.8, we can handle multi-bio EEs,
492 * if it should be necessary */
493 max_segment_size
= mdev
->agreed_pro_version
< 94 ?
494 queue_max_segment_size(mdev
->rq_queue
) : DRBD_MAX_SEGMENT_SIZE
;
496 mdev
->c_sync_rate
= calc_resync_rate(mdev
);
497 number
= SLEEP_TIME
* mdev
->c_sync_rate
/ ((BM_BLOCK_SIZE
/ 1024) * HZ
);
498 pe
= atomic_read(&mdev
->rs_pending_cnt
);
500 mutex_lock(&mdev
->data
.mutex
);
501 if (mdev
->data
.socket
)
502 mx
= mdev
->data
.socket
->sk
->sk_rcvbuf
/ sizeof(struct p_block_req
);
505 mutex_unlock(&mdev
->data
.mutex
);
507 /* For resync rates >160MB/sec, allow more pending RS requests */
511 /* Limit the number of pending RS requests to no more than the peer's receive buffer */
512 if ((pe
+ number
) > mx
) {
516 for (i
= 0; i
< number
; i
++) {
517 /* Stop generating RS requests, when half of the send buffer is filled */
518 mutex_lock(&mdev
->data
.mutex
);
519 if (mdev
->data
.socket
) {
520 queued
= mdev
->data
.socket
->sk
->sk_wmem_queued
;
521 sndbuf
= mdev
->data
.socket
->sk
->sk_sndbuf
;
526 mutex_unlock(&mdev
->data
.mutex
);
527 if (queued
> sndbuf
/ 2)
531 size
= BM_BLOCK_SIZE
;
532 bit
= drbd_bm_find_next(mdev
, mdev
->bm_resync_fo
);
535 mdev
->bm_resync_fo
= drbd_bm_bits(mdev
);
536 mdev
->resync_work
.cb
= w_resync_inactive
;
541 sector
= BM_BIT_TO_SECT(bit
);
543 if (drbd_try_rs_begin_io(mdev
, sector
)) {
544 mdev
->bm_resync_fo
= bit
;
547 mdev
->bm_resync_fo
= bit
+ 1;
549 if (unlikely(drbd_bm_test_bit(mdev
, bit
) == 0)) {
550 drbd_rs_complete_io(mdev
, sector
);
554 #if DRBD_MAX_SEGMENT_SIZE > BM_BLOCK_SIZE
555 /* try to find some adjacent bits.
556 * we stop if we have already the maximum req size.
558 * Additionally always align bigger requests, in order to
559 * be prepared for all stripe sizes of software RAIDs.
563 if (size
+ BM_BLOCK_SIZE
> max_segment_size
)
566 /* Be always aligned */
567 if (sector
& ((1<<(align
+3))-1))
570 /* do not cross extent boundaries */
571 if (((bit
+1) & BM_BLOCKS_PER_BM_EXT_MASK
) == 0)
573 /* now, is it actually dirty, after all?
574 * caution, drbd_bm_test_bit is tri-state for some
575 * obscure reason; ( b == 0 ) would get the out-of-band
576 * only accidentally right because of the "oddly sized"
577 * adjustment below */
578 if (drbd_bm_test_bit(mdev
, bit
+1) != 1)
581 size
+= BM_BLOCK_SIZE
;
582 if ((BM_BLOCK_SIZE
<< align
) <= size
)
586 /* if we merged some,
587 * reset the offset to start the next drbd_bm_find_next from */
588 if (size
> BM_BLOCK_SIZE
)
589 mdev
->bm_resync_fo
= bit
+ 1;
592 /* adjust very last sectors, in case we are oddly sized */
593 if (sector
+ (size
>>9) > capacity
)
594 size
= (capacity
-sector
)<<9;
595 if (mdev
->agreed_pro_version
>= 89 && mdev
->csums_tfm
) {
596 switch (read_for_csum(mdev
, sector
, size
)) {
597 case 0: /* Disk failure*/
600 case 2: /* Allocation failed */
601 drbd_rs_complete_io(mdev
, sector
);
602 mdev
->bm_resync_fo
= BM_SECT_TO_BIT(sector
);
604 /* case 1: everything ok */
607 inc_rs_pending(mdev
);
608 if (!drbd_send_drequest(mdev
, P_RS_DATA_REQUEST
,
609 sector
, size
, ID_SYNCER
)) {
610 dev_err(DEV
, "drbd_send_drequest() failed, aborting...\n");
611 dec_rs_pending(mdev
);
618 if (mdev
->bm_resync_fo
>= drbd_bm_bits(mdev
)) {
619 /* last syncer _request_ was sent,
620 * but the P_RS_DATA_REPLY not yet received. sync will end (and
621 * next sync group will resume), as soon as we receive the last
622 * resync data block, and the last bit is cleared.
623 * until then resync "work" is "inactive" ...
625 mdev
->resync_work
.cb
= w_resync_inactive
;
631 mod_timer(&mdev
->resync_timer
, jiffies
+ SLEEP_TIME
);
636 static int w_make_ov_request(struct drbd_conf
*mdev
, struct drbd_work
*w
, int cancel
)
640 const sector_t capacity
= drbd_get_capacity(mdev
->this_bdev
);
642 if (unlikely(cancel
))
645 if (unlikely(mdev
->state
.conn
< C_CONNECTED
)) {
646 dev_err(DEV
, "Confused in w_make_ov_request()! cstate < Connected");
650 number
= SLEEP_TIME
*mdev
->sync_conf
.rate
/ ((BM_BLOCK_SIZE
/1024)*HZ
);
651 if (atomic_read(&mdev
->rs_pending_cnt
) > number
)
654 number
-= atomic_read(&mdev
->rs_pending_cnt
);
656 sector
= mdev
->ov_position
;
657 for (i
= 0; i
< number
; i
++) {
658 if (sector
>= capacity
) {
659 mdev
->resync_work
.cb
= w_resync_inactive
;
663 size
= BM_BLOCK_SIZE
;
665 if (drbd_try_rs_begin_io(mdev
, sector
)) {
666 mdev
->ov_position
= sector
;
670 if (sector
+ (size
>>9) > capacity
)
671 size
= (capacity
-sector
)<<9;
673 inc_rs_pending(mdev
);
674 if (!drbd_send_ov_request(mdev
, sector
, size
)) {
675 dec_rs_pending(mdev
);
678 sector
+= BM_SECT_PER_BIT
;
680 mdev
->ov_position
= sector
;
683 mod_timer(&mdev
->resync_timer
, jiffies
+ SLEEP_TIME
);
688 int w_ov_finished(struct drbd_conf
*mdev
, struct drbd_work
*w
, int cancel
)
692 drbd_resync_finished(mdev
);
697 static int w_resync_finished(struct drbd_conf
*mdev
, struct drbd_work
*w
, int cancel
)
701 drbd_resync_finished(mdev
);
706 int drbd_resync_finished(struct drbd_conf
*mdev
)
708 unsigned long db
, dt
, dbdt
;
710 union drbd_state os
, ns
;
712 char *khelper_cmd
= NULL
;
714 /* Remove all elements from the resync LRU. Since future actions
715 * might set bits in the (main) bitmap, then the entries in the
716 * resync LRU would be wrong. */
717 if (drbd_rs_del_all(mdev
)) {
718 /* In case this is not possible now, most probably because
719 * there are P_RS_DATA_REPLY Packets lingering on the worker's
720 * queue (or even the read operations for those packets
721 * is not finished by now). Retry in 100ms. */
724 __set_current_state(TASK_INTERRUPTIBLE
);
725 schedule_timeout(HZ
/ 10);
726 w
= kmalloc(sizeof(struct drbd_work
), GFP_ATOMIC
);
728 w
->cb
= w_resync_finished
;
729 drbd_queue_work(&mdev
->data
.work
, w
);
732 dev_err(DEV
, "Warn failed to drbd_rs_del_all() and to kmalloc(w).\n");
735 dt
= (jiffies
- mdev
->rs_start
- mdev
->rs_paused
) / HZ
;
739 dbdt
= Bit2KB(db
/dt
);
740 mdev
->rs_paused
/= HZ
;
745 spin_lock_irq(&mdev
->req_lock
);
748 /* This protects us against multiple calls (that can happen in the presence
749 of application IO), and against connectivity loss just before we arrive here. */
750 if (os
.conn
<= C_CONNECTED
)
754 ns
.conn
= C_CONNECTED
;
756 dev_info(DEV
, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
757 (os
.conn
== C_VERIFY_S
|| os
.conn
== C_VERIFY_T
) ?
758 "Online verify " : "Resync",
759 dt
+ mdev
->rs_paused
, mdev
->rs_paused
, dbdt
);
761 n_oos
= drbd_bm_total_weight(mdev
);
763 if (os
.conn
== C_VERIFY_S
|| os
.conn
== C_VERIFY_T
) {
765 dev_alert(DEV
, "Online verify found %lu %dk block out of sync!\n",
767 khelper_cmd
= "out-of-sync";
770 D_ASSERT((n_oos
- mdev
->rs_failed
) == 0);
772 if (os
.conn
== C_SYNC_TARGET
|| os
.conn
== C_PAUSED_SYNC_T
)
773 khelper_cmd
= "after-resync-target";
775 if (mdev
->csums_tfm
&& mdev
->rs_total
) {
776 const unsigned long s
= mdev
->rs_same_csum
;
777 const unsigned long t
= mdev
->rs_total
;
780 (t
< 100000) ? ((s
*100)/t
) : (s
/(t
/100));
781 dev_info(DEV
, "%u %% had equal check sums, eliminated: %luK; "
782 "transferred %luK total %luK\n",
784 Bit2KB(mdev
->rs_same_csum
),
785 Bit2KB(mdev
->rs_total
- mdev
->rs_same_csum
),
786 Bit2KB(mdev
->rs_total
));
790 if (mdev
->rs_failed
) {
791 dev_info(DEV
, " %lu failed blocks\n", mdev
->rs_failed
);
793 if (os
.conn
== C_SYNC_TARGET
|| os
.conn
== C_PAUSED_SYNC_T
) {
794 ns
.disk
= D_INCONSISTENT
;
795 ns
.pdsk
= D_UP_TO_DATE
;
797 ns
.disk
= D_UP_TO_DATE
;
798 ns
.pdsk
= D_INCONSISTENT
;
801 ns
.disk
= D_UP_TO_DATE
;
802 ns
.pdsk
= D_UP_TO_DATE
;
804 if (os
.conn
== C_SYNC_TARGET
|| os
.conn
== C_PAUSED_SYNC_T
) {
807 for (i
= UI_BITMAP
; i
<= UI_HISTORY_END
; i
++)
808 _drbd_uuid_set(mdev
, i
, mdev
->p_uuid
[i
]);
809 drbd_uuid_set(mdev
, UI_BITMAP
, mdev
->ldev
->md
.uuid
[UI_CURRENT
]);
810 _drbd_uuid_set(mdev
, UI_CURRENT
, mdev
->p_uuid
[UI_CURRENT
]);
812 dev_err(DEV
, "mdev->p_uuid is NULL! BUG\n");
816 drbd_uuid_set_bm(mdev
, 0UL);
819 /* Now the two UUID sets are equal, update what we
820 * know of the peer. */
822 for (i
= UI_CURRENT
; i
<= UI_HISTORY_END
; i
++)
823 mdev
->p_uuid
[i
] = mdev
->ldev
->md
.uuid
[i
];
827 _drbd_set_state(mdev
, ns
, CS_VERBOSE
, NULL
);
829 spin_unlock_irq(&mdev
->req_lock
);
835 mdev
->ov_start_sector
= 0;
837 if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC
, &mdev
->flags
)) {
838 dev_warn(DEV
, "Writing the whole bitmap, due to failed kmalloc\n");
839 drbd_queue_bitmap_io(mdev
, &drbd_bm_write
, NULL
, "write from resync_finished");
843 drbd_khelper(mdev
, khelper_cmd
);
849 static void move_to_net_ee_or_free(struct drbd_conf
*mdev
, struct drbd_epoch_entry
*e
)
851 if (drbd_ee_has_active_page(e
)) {
852 /* This might happen if sendpage() has not finished */
853 spin_lock_irq(&mdev
->req_lock
);
854 list_add_tail(&e
->w
.list
, &mdev
->net_ee
);
855 spin_unlock_irq(&mdev
->req_lock
);
857 drbd_free_ee(mdev
, e
);
861 * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
862 * @mdev: DRBD device.
864 * @cancel: The connection will be closed anyways
866 int w_e_end_data_req(struct drbd_conf
*mdev
, struct drbd_work
*w
, int cancel
)
868 struct drbd_epoch_entry
*e
= container_of(w
, struct drbd_epoch_entry
, w
);
871 if (unlikely(cancel
)) {
872 drbd_free_ee(mdev
, e
);
877 if (likely((e
->flags
& EE_WAS_ERROR
) == 0)) {
878 ok
= drbd_send_block(mdev
, P_DATA_REPLY
, e
);
880 if (__ratelimit(&drbd_ratelimit_state
))
881 dev_err(DEV
, "Sending NegDReply. sector=%llus.\n",
882 (unsigned long long)e
->sector
);
884 ok
= drbd_send_ack(mdev
, P_NEG_DREPLY
, e
);
889 move_to_net_ee_or_free(mdev
, e
);
892 dev_err(DEV
, "drbd_send_block() failed\n");
897 * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUESTRS
898 * @mdev: DRBD device.
900 * @cancel: The connection will be closed anyways
902 int w_e_end_rsdata_req(struct drbd_conf
*mdev
, struct drbd_work
*w
, int cancel
)
904 struct drbd_epoch_entry
*e
= container_of(w
, struct drbd_epoch_entry
, w
);
907 if (unlikely(cancel
)) {
908 drbd_free_ee(mdev
, e
);
913 if (get_ldev_if_state(mdev
, D_FAILED
)) {
914 drbd_rs_complete_io(mdev
, e
->sector
);
918 if (likely((e
->flags
& EE_WAS_ERROR
) == 0)) {
919 if (likely(mdev
->state
.pdsk
>= D_INCONSISTENT
)) {
920 inc_rs_pending(mdev
);
921 ok
= drbd_send_block(mdev
, P_RS_DATA_REPLY
, e
);
923 if (__ratelimit(&drbd_ratelimit_state
))
924 dev_err(DEV
, "Not sending RSDataReply, "
925 "partner DISKLESS!\n");
929 if (__ratelimit(&drbd_ratelimit_state
))
930 dev_err(DEV
, "Sending NegRSDReply. sector %llus.\n",
931 (unsigned long long)e
->sector
);
933 ok
= drbd_send_ack(mdev
, P_NEG_RS_DREPLY
, e
);
935 /* update resync data with failure */
936 drbd_rs_failed_io(mdev
, e
->sector
, e
->size
);
941 move_to_net_ee_or_free(mdev
, e
);
944 dev_err(DEV
, "drbd_send_block() failed\n");
948 int w_e_end_csum_rs_req(struct drbd_conf
*mdev
, struct drbd_work
*w
, int cancel
)
950 struct drbd_epoch_entry
*e
= container_of(w
, struct drbd_epoch_entry
, w
);
951 struct digest_info
*di
;
956 if (unlikely(cancel
)) {
957 drbd_free_ee(mdev
, e
);
962 drbd_rs_complete_io(mdev
, e
->sector
);
964 di
= (struct digest_info
*)(unsigned long)e
->block_id
;
966 if (likely((e
->flags
& EE_WAS_ERROR
) == 0)) {
967 /* quick hack to try to avoid a race against reconfiguration.
968 * a real fix would be much more involved,
969 * introducing more locking mechanisms */
970 if (mdev
->csums_tfm
) {
971 digest_size
= crypto_hash_digestsize(mdev
->csums_tfm
);
972 D_ASSERT(digest_size
== di
->digest_size
);
973 digest
= kmalloc(digest_size
, GFP_NOIO
);
976 drbd_csum_ee(mdev
, mdev
->csums_tfm
, e
, digest
);
977 eq
= !memcmp(digest
, di
->digest
, digest_size
);
982 drbd_set_in_sync(mdev
, e
->sector
, e
->size
);
983 /* rs_same_csums unit is BM_BLOCK_SIZE */
984 mdev
->rs_same_csum
+= e
->size
>> BM_BLOCK_SHIFT
;
985 ok
= drbd_send_ack(mdev
, P_RS_IS_IN_SYNC
, e
);
987 inc_rs_pending(mdev
);
988 e
->block_id
= ID_SYNCER
;
989 ok
= drbd_send_block(mdev
, P_RS_DATA_REPLY
, e
);
992 ok
= drbd_send_ack(mdev
, P_NEG_RS_DREPLY
, e
);
993 if (__ratelimit(&drbd_ratelimit_state
))
994 dev_err(DEV
, "Sending NegDReply. I guess it gets messy.\n");
1001 move_to_net_ee_or_free(mdev
, e
);
1004 dev_err(DEV
, "drbd_send_block/ack() failed\n");
1008 int w_e_end_ov_req(struct drbd_conf
*mdev
, struct drbd_work
*w
, int cancel
)
1010 struct drbd_epoch_entry
*e
= container_of(w
, struct drbd_epoch_entry
, w
);
1015 if (unlikely(cancel
))
1018 if (unlikely((e
->flags
& EE_WAS_ERROR
) != 0))
1021 digest_size
= crypto_hash_digestsize(mdev
->verify_tfm
);
1022 /* FIXME if this allocation fails, online verify will not terminate! */
1023 digest
= kmalloc(digest_size
, GFP_NOIO
);
1025 drbd_csum_ee(mdev
, mdev
->verify_tfm
, e
, digest
);
1026 inc_rs_pending(mdev
);
1027 ok
= drbd_send_drequest_csum(mdev
, e
->sector
, e
->size
,
1028 digest
, digest_size
, P_OV_REPLY
);
1030 dec_rs_pending(mdev
);
1035 drbd_free_ee(mdev
, e
);
1042 void drbd_ov_oos_found(struct drbd_conf
*mdev
, sector_t sector
, int size
)
1044 if (mdev
->ov_last_oos_start
+ mdev
->ov_last_oos_size
== sector
) {
1045 mdev
->ov_last_oos_size
+= size
>>9;
1047 mdev
->ov_last_oos_start
= sector
;
1048 mdev
->ov_last_oos_size
= size
>>9;
1050 drbd_set_out_of_sync(mdev
, sector
, size
);
1051 set_bit(WRITE_BM_AFTER_RESYNC
, &mdev
->flags
);
1054 int w_e_end_ov_reply(struct drbd_conf
*mdev
, struct drbd_work
*w
, int cancel
)
1056 struct drbd_epoch_entry
*e
= container_of(w
, struct drbd_epoch_entry
, w
);
1057 struct digest_info
*di
;
1062 if (unlikely(cancel
)) {
1063 drbd_free_ee(mdev
, e
);
1068 /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1069 * the resync lru has been cleaned up already */
1070 drbd_rs_complete_io(mdev
, e
->sector
);
1072 di
= (struct digest_info
*)(unsigned long)e
->block_id
;
1074 if (likely((e
->flags
& EE_WAS_ERROR
) == 0)) {
1075 digest_size
= crypto_hash_digestsize(mdev
->verify_tfm
);
1076 digest
= kmalloc(digest_size
, GFP_NOIO
);
1078 drbd_csum_ee(mdev
, mdev
->verify_tfm
, e
, digest
);
1080 D_ASSERT(digest_size
== di
->digest_size
);
1081 eq
= !memcmp(digest
, di
->digest
, digest_size
);
1085 ok
= drbd_send_ack(mdev
, P_NEG_RS_DREPLY
, e
);
1086 if (__ratelimit(&drbd_ratelimit_state
))
1087 dev_err(DEV
, "Sending NegDReply. I guess it gets messy.\n");
1095 drbd_ov_oos_found(mdev
, e
->sector
, e
->size
);
1099 ok
= drbd_send_ack_ex(mdev
, P_OV_RESULT
, e
->sector
, e
->size
,
1100 eq
? ID_IN_SYNC
: ID_OUT_OF_SYNC
);
1102 drbd_free_ee(mdev
, e
);
1104 if (--mdev
->ov_left
== 0) {
1106 drbd_resync_finished(mdev
);
1112 int w_prev_work_done(struct drbd_conf
*mdev
, struct drbd_work
*w
, int cancel
)
1114 struct drbd_wq_barrier
*b
= container_of(w
, struct drbd_wq_barrier
, w
);
1119 int w_send_barrier(struct drbd_conf
*mdev
, struct drbd_work
*w
, int cancel
)
1121 struct drbd_tl_epoch
*b
= container_of(w
, struct drbd_tl_epoch
, w
);
1122 struct p_barrier
*p
= &mdev
->data
.sbuf
.barrier
;
1125 /* really avoid racing with tl_clear. w.cb may have been referenced
1126 * just before it was reassigned and re-queued, so double check that.
1127 * actually, this race was harmless, since we only try to send the
1128 * barrier packet here, and otherwise do nothing with the object.
1129 * but compare with the head of w_clear_epoch */
1130 spin_lock_irq(&mdev
->req_lock
);
1131 if (w
->cb
!= w_send_barrier
|| mdev
->state
.conn
< C_CONNECTED
)
1133 spin_unlock_irq(&mdev
->req_lock
);
1137 if (!drbd_get_data_sock(mdev
))
1139 p
->barrier
= b
->br_number
;
1140 /* inc_ap_pending was done where this was queued.
1141 * dec_ap_pending will be done in got_BarrierAck
1142 * or (on connection loss) in w_clear_epoch. */
1143 ok
= _drbd_send_cmd(mdev
, mdev
->data
.socket
, P_BARRIER
,
1144 (struct p_header
*)p
, sizeof(*p
), 0);
1145 drbd_put_data_sock(mdev
);
1150 int w_send_write_hint(struct drbd_conf
*mdev
, struct drbd_work
*w
, int cancel
)
1154 return drbd_send_short_cmd(mdev
, P_UNPLUG_REMOTE
);
1158 * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
1159 * @mdev: DRBD device.
1161 * @cancel: The connection will be closed anyways
1163 int w_send_dblock(struct drbd_conf
*mdev
, struct drbd_work
*w
, int cancel
)
1165 struct drbd_request
*req
= container_of(w
, struct drbd_request
, w
);
1168 if (unlikely(cancel
)) {
1169 req_mod(req
, send_canceled
);
1173 ok
= drbd_send_dblock(mdev
, req
);
1174 req_mod(req
, ok
? handed_over_to_network
: send_failed
);
1180 * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
1181 * @mdev: DRBD device.
1183 * @cancel: The connection will be closed anyways
1185 int w_send_read_req(struct drbd_conf
*mdev
, struct drbd_work
*w
, int cancel
)
1187 struct drbd_request
*req
= container_of(w
, struct drbd_request
, w
);
1190 if (unlikely(cancel
)) {
1191 req_mod(req
, send_canceled
);
1195 ok
= drbd_send_drequest(mdev
, P_DATA_REQUEST
, req
->sector
, req
->size
,
1196 (unsigned long)req
);
1199 /* ?? we set C_TIMEOUT or C_BROKEN_PIPE in drbd_send();
1200 * so this is probably redundant */
1201 if (mdev
->state
.conn
>= C_CONNECTED
)
1202 drbd_force_state(mdev
, NS(conn
, C_NETWORK_FAILURE
));
1204 req_mod(req
, ok
? handed_over_to_network
: send_failed
);
1209 static int _drbd_may_sync_now(struct drbd_conf
*mdev
)
1211 struct drbd_conf
*odev
= mdev
;
1214 if (odev
->sync_conf
.after
== -1)
1216 odev
= minor_to_mdev(odev
->sync_conf
.after
);
1217 ERR_IF(!odev
) return 1;
1218 if ((odev
->state
.conn
>= C_SYNC_SOURCE
&&
1219 odev
->state
.conn
<= C_PAUSED_SYNC_T
) ||
1220 odev
->state
.aftr_isp
|| odev
->state
.peer_isp
||
1221 odev
->state
.user_isp
)
1227 * _drbd_pause_after() - Pause resync on all devices that may not resync now
1228 * @mdev: DRBD device.
1230 * Called from process context only (admin command and after_state_ch).
1232 static int _drbd_pause_after(struct drbd_conf
*mdev
)
1234 struct drbd_conf
*odev
;
1237 for (i
= 0; i
< minor_count
; i
++) {
1238 odev
= minor_to_mdev(i
);
1241 if (odev
->state
.conn
== C_STANDALONE
&& odev
->state
.disk
== D_DISKLESS
)
1243 if (!_drbd_may_sync_now(odev
))
1244 rv
|= (__drbd_set_state(_NS(odev
, aftr_isp
, 1), CS_HARD
, NULL
)
1245 != SS_NOTHING_TO_DO
);
1252 * _drbd_resume_next() - Resume resync on all devices that may resync now
1253 * @mdev: DRBD device.
1255 * Called from process context only (admin command and worker).
1257 static int _drbd_resume_next(struct drbd_conf
*mdev
)
1259 struct drbd_conf
*odev
;
1262 for (i
= 0; i
< minor_count
; i
++) {
1263 odev
= minor_to_mdev(i
);
1266 if (odev
->state
.conn
== C_STANDALONE
&& odev
->state
.disk
== D_DISKLESS
)
1268 if (odev
->state
.aftr_isp
) {
1269 if (_drbd_may_sync_now(odev
))
1270 rv
|= (__drbd_set_state(_NS(odev
, aftr_isp
, 0),
1272 != SS_NOTHING_TO_DO
) ;
1278 void resume_next_sg(struct drbd_conf
*mdev
)
1280 write_lock_irq(&global_state_lock
);
1281 _drbd_resume_next(mdev
);
1282 write_unlock_irq(&global_state_lock
);
1285 void suspend_other_sg(struct drbd_conf
*mdev
)
1287 write_lock_irq(&global_state_lock
);
1288 _drbd_pause_after(mdev
);
1289 write_unlock_irq(&global_state_lock
);
1292 static int sync_after_error(struct drbd_conf
*mdev
, int o_minor
)
1294 struct drbd_conf
*odev
;
1298 if (o_minor
< -1 || minor_to_mdev(o_minor
) == NULL
)
1299 return ERR_SYNC_AFTER
;
1301 /* check for loops */
1302 odev
= minor_to_mdev(o_minor
);
1305 return ERR_SYNC_AFTER_CYCLE
;
1307 /* dependency chain ends here, no cycles. */
1308 if (odev
->sync_conf
.after
== -1)
1311 /* follow the dependency chain */
1312 odev
= minor_to_mdev(odev
->sync_conf
.after
);
1316 int drbd_alter_sa(struct drbd_conf
*mdev
, int na
)
1321 write_lock_irq(&global_state_lock
);
1322 retcode
= sync_after_error(mdev
, na
);
1323 if (retcode
== NO_ERROR
) {
1324 mdev
->sync_conf
.after
= na
;
1326 changes
= _drbd_pause_after(mdev
);
1327 changes
|= _drbd_resume_next(mdev
);
1330 write_unlock_irq(&global_state_lock
);
1334 static void ping_peer(struct drbd_conf
*mdev
)
1336 clear_bit(GOT_PING_ACK
, &mdev
->flags
);
1338 wait_event(mdev
->misc_wait
,
1339 test_bit(GOT_PING_ACK
, &mdev
->flags
) || mdev
->state
.conn
< C_CONNECTED
);
1343 * drbd_start_resync() - Start the resync process
1344 * @mdev: DRBD device.
1345 * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET
1347 * This function might bring you directly into one of the
1348 * C_PAUSED_SYNC_* states.
1350 void drbd_start_resync(struct drbd_conf
*mdev
, enum drbd_conns side
)
1352 union drbd_state ns
;
1355 if (mdev
->state
.conn
>= C_SYNC_SOURCE
) {
1356 dev_err(DEV
, "Resync already running!\n");
1360 /* In case a previous resync run was aborted by an IO error/detach on the peer. */
1361 drbd_rs_cancel_all(mdev
);
1363 if (side
== C_SYNC_TARGET
) {
1364 /* Since application IO was locked out during C_WF_BITMAP_T and
1365 C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1366 we check that we might make the data inconsistent. */
1367 r
= drbd_khelper(mdev
, "before-resync-target");
1368 r
= (r
>> 8) & 0xff;
1370 dev_info(DEV
, "before-resync-target handler returned %d, "
1371 "dropping connection.\n", r
);
1372 drbd_force_state(mdev
, NS(conn
, C_DISCONNECTING
));
1377 drbd_state_lock(mdev
);
1379 if (!get_ldev_if_state(mdev
, D_NEGOTIATING
)) {
1380 drbd_state_unlock(mdev
);
1384 if (side
== C_SYNC_TARGET
) {
1385 mdev
->bm_resync_fo
= 0;
1386 } else /* side == C_SYNC_SOURCE */ {
1389 get_random_bytes(&uuid
, sizeof(u64
));
1390 drbd_uuid_set(mdev
, UI_BITMAP
, uuid
);
1391 drbd_send_sync_uuid(mdev
, uuid
);
1393 D_ASSERT(mdev
->state
.disk
== D_UP_TO_DATE
);
1396 write_lock_irq(&global_state_lock
);
1399 ns
.aftr_isp
= !_drbd_may_sync_now(mdev
);
1403 if (side
== C_SYNC_TARGET
)
1404 ns
.disk
= D_INCONSISTENT
;
1405 else /* side == C_SYNC_SOURCE */
1406 ns
.pdsk
= D_INCONSISTENT
;
1408 r
= __drbd_set_state(mdev
, ns
, CS_VERBOSE
, NULL
);
1411 if (ns
.conn
< C_CONNECTED
)
1412 r
= SS_UNKNOWN_ERROR
;
1414 if (r
== SS_SUCCESS
) {
1416 mdev
->rs_mark_left
= drbd_bm_total_weight(mdev
);
1417 mdev
->rs_failed
= 0;
1418 mdev
->rs_paused
= 0;
1420 mdev
->rs_mark_time
= jiffies
;
1421 mdev
->rs_same_csum
= 0;
1422 _drbd_pause_after(mdev
);
1424 write_unlock_irq(&global_state_lock
);
1427 if (r
== SS_SUCCESS
) {
1428 dev_info(DEV
, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
1429 drbd_conn_str(ns
.conn
),
1430 (unsigned long) mdev
->rs_total
<< (BM_BLOCK_SHIFT
-10),
1431 (unsigned long) mdev
->rs_total
);
1433 if (mdev
->rs_total
== 0) {
1434 /* Peer still reachable? Beware of failing before-resync-target handlers! */
1436 drbd_resync_finished(mdev
);
1439 /* ns.conn may already be != mdev->state.conn,
1440 * we may have been paused in between, or become paused until
1441 * the timer triggers.
1442 * No matter, that is handled in resync_timer_fn() */
1443 if (ns
.conn
== C_SYNC_TARGET
)
1444 mod_timer(&mdev
->resync_timer
, jiffies
);
1448 drbd_state_unlock(mdev
);
1451 int drbd_worker(struct drbd_thread
*thi
)
1453 struct drbd_conf
*mdev
= thi
->mdev
;
1454 struct drbd_work
*w
= NULL
;
1455 LIST_HEAD(work_list
);
1458 sprintf(current
->comm
, "drbd%d_worker", mdev_to_minor(mdev
));
1460 while (get_t_state(thi
) == Running
) {
1461 drbd_thread_current_set_cpu(mdev
);
1463 if (down_trylock(&mdev
->data
.work
.s
)) {
1464 mutex_lock(&mdev
->data
.mutex
);
1465 if (mdev
->data
.socket
&& !mdev
->net_conf
->no_cork
)
1466 drbd_tcp_uncork(mdev
->data
.socket
);
1467 mutex_unlock(&mdev
->data
.mutex
);
1469 intr
= down_interruptible(&mdev
->data
.work
.s
);
1471 mutex_lock(&mdev
->data
.mutex
);
1472 if (mdev
->data
.socket
&& !mdev
->net_conf
->no_cork
)
1473 drbd_tcp_cork(mdev
->data
.socket
);
1474 mutex_unlock(&mdev
->data
.mutex
);
1478 D_ASSERT(intr
== -EINTR
);
1479 flush_signals(current
);
1480 ERR_IF (get_t_state(thi
) == Running
)
1485 if (get_t_state(thi
) != Running
)
1487 /* With this break, we have done a down() but not consumed
1488 the entry from the list. The cleanup code takes care of
1492 spin_lock_irq(&mdev
->data
.work
.q_lock
);
1493 ERR_IF(list_empty(&mdev
->data
.work
.q
)) {
1494 /* something terribly wrong in our logic.
1495 * we were able to down() the semaphore,
1496 * but the list is empty... doh.
1498 * what is the best thing to do now?
1499 * try again from scratch, restarting the receiver,
1500 * asender, whatnot? could break even more ugly,
1501 * e.g. when we are primary, but no good local data.
1503 * I'll try to get away just starting over this loop.
1505 spin_unlock_irq(&mdev
->data
.work
.q_lock
);
1508 w
= list_entry(mdev
->data
.work
.q
.next
, struct drbd_work
, list
);
1509 list_del_init(&w
->list
);
1510 spin_unlock_irq(&mdev
->data
.work
.q_lock
);
1512 if (!w
->cb(mdev
, w
, mdev
->state
.conn
< C_CONNECTED
)) {
1513 /* dev_warn(DEV, "worker: a callback failed! \n"); */
1514 if (mdev
->state
.conn
>= C_CONNECTED
)
1515 drbd_force_state(mdev
,
1516 NS(conn
, C_NETWORK_FAILURE
));
1519 D_ASSERT(test_bit(DEVICE_DYING
, &mdev
->flags
));
1520 D_ASSERT(test_bit(CONFIG_PENDING
, &mdev
->flags
));
1522 spin_lock_irq(&mdev
->data
.work
.q_lock
);
1524 while (!list_empty(&mdev
->data
.work
.q
)) {
1525 list_splice_init(&mdev
->data
.work
.q
, &work_list
);
1526 spin_unlock_irq(&mdev
->data
.work
.q_lock
);
1528 while (!list_empty(&work_list
)) {
1529 w
= list_entry(work_list
.next
, struct drbd_work
, list
);
1530 list_del_init(&w
->list
);
1532 i
++; /* dead debugging code */
1535 spin_lock_irq(&mdev
->data
.work
.q_lock
);
1537 sema_init(&mdev
->data
.work
.s
, 0);
1538 /* DANGEROUS race: if someone did queue his work within the spinlock,
1539 * but up() ed outside the spinlock, we could get an up() on the
1540 * semaphore without corresponding list entry.
1543 spin_unlock_irq(&mdev
->data
.work
.q_lock
);
1545 D_ASSERT(mdev
->state
.disk
== D_DISKLESS
&& mdev
->state
.conn
== C_STANDALONE
);
1546 /* _drbd_set_state only uses stop_nowait.
1547 * wait here for the Exiting receiver. */
1548 drbd_thread_stop(&mdev
->receiver
);
1549 drbd_mdev_cleanup(mdev
);
1551 dev_info(DEV
, "worker terminated\n");
1553 clear_bit(DEVICE_DYING
, &mdev
->flags
);
1554 clear_bit(CONFIG_PENDING
, &mdev
->flags
);
1555 wake_up(&mdev
->state_wait
);