drbd: Fix a potential race that could case data inconsistency
[deliverable/linux.git] / drivers / block / drbd / drbd_receiver.c
CommitLineData
b411b363
PR
1/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
b411b363
PR
26#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
b411b363
PR
31#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
b411b363
PR
39#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
b411b363
PR
44#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
b411b363
PR
47#include "drbd_req.h"
48
49#include "drbd_vli.h"
50
77351055
PR
51struct packet_info {
52 enum drbd_packet cmd;
e2857216
AG
53 unsigned int size;
54 unsigned int vnr;
e658983a 55 void *data;
77351055
PR
56};
57
b411b363
PR
58enum finish_epoch {
59 FE_STILL_LIVE,
60 FE_DESTROYED,
61 FE_RECYCLED,
62};
63
6038178e 64static int drbd_do_features(struct drbd_tconn *tconn);
13e6037d 65static int drbd_do_auth(struct drbd_tconn *tconn);
c141ebda 66static int drbd_disconnected(struct drbd_conf *mdev);
b411b363 67
1e9dd291 68static enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *, struct drbd_epoch *, enum epoch_event);
99920dc5 69static int e_end_block(struct drbd_work *, int);
b411b363 70
b411b363
PR
71
72#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
73
45bb912b
LE
74/*
75 * some helper functions to deal with single linked page lists,
76 * page->private being our "next" pointer.
77 */
78
79/* If at least n pages are linked at head, get n pages off.
80 * Otherwise, don't modify head, and return NULL.
81 * Locking is the responsibility of the caller.
82 */
83static struct page *page_chain_del(struct page **head, int n)
84{
85 struct page *page;
86 struct page *tmp;
87
88 BUG_ON(!n);
89 BUG_ON(!head);
90
91 page = *head;
23ce4227
PR
92
93 if (!page)
94 return NULL;
95
45bb912b
LE
96 while (page) {
97 tmp = page_chain_next(page);
98 if (--n == 0)
99 break; /* found sufficient pages */
100 if (tmp == NULL)
101 /* insufficient pages, don't use any of them. */
102 return NULL;
103 page = tmp;
104 }
105
106 /* add end of list marker for the returned list */
107 set_page_private(page, 0);
108 /* actual return value, and adjustment of head */
109 page = *head;
110 *head = tmp;
111 return page;
112}
113
114/* may be used outside of locks to find the tail of a (usually short)
115 * "private" page chain, before adding it back to a global chain head
116 * with page_chain_add() under a spinlock. */
117static struct page *page_chain_tail(struct page *page, int *len)
118{
119 struct page *tmp;
120 int i = 1;
121 while ((tmp = page_chain_next(page)))
122 ++i, page = tmp;
123 if (len)
124 *len = i;
125 return page;
126}
127
128static int page_chain_free(struct page *page)
129{
130 struct page *tmp;
131 int i = 0;
132 page_chain_for_each_safe(page, tmp) {
133 put_page(page);
134 ++i;
135 }
136 return i;
137}
138
139static void page_chain_add(struct page **head,
140 struct page *chain_first, struct page *chain_last)
141{
142#if 1
143 struct page *tmp;
144 tmp = page_chain_tail(chain_first, NULL);
145 BUG_ON(tmp != chain_last);
146#endif
147
148 /* add chain to head */
149 set_page_private(chain_last, (unsigned long)*head);
150 *head = chain_first;
151}
152
18c2d522
AG
153static struct page *__drbd_alloc_pages(struct drbd_conf *mdev,
154 unsigned int number)
b411b363
PR
155{
156 struct page *page = NULL;
45bb912b 157 struct page *tmp = NULL;
18c2d522 158 unsigned int i = 0;
b411b363
PR
159
160 /* Yes, testing drbd_pp_vacant outside the lock is racy.
161 * So what. It saves a spin_lock. */
45bb912b 162 if (drbd_pp_vacant >= number) {
b411b363 163 spin_lock(&drbd_pp_lock);
45bb912b
LE
164 page = page_chain_del(&drbd_pp_pool, number);
165 if (page)
166 drbd_pp_vacant -= number;
b411b363 167 spin_unlock(&drbd_pp_lock);
45bb912b
LE
168 if (page)
169 return page;
b411b363 170 }
45bb912b 171
b411b363
PR
172 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
173 * "criss-cross" setup, that might cause write-out on some other DRBD,
174 * which in turn might block on the other node at this very place. */
45bb912b
LE
175 for (i = 0; i < number; i++) {
176 tmp = alloc_page(GFP_TRY);
177 if (!tmp)
178 break;
179 set_page_private(tmp, (unsigned long)page);
180 page = tmp;
181 }
182
183 if (i == number)
184 return page;
185
186 /* Not enough pages immediately available this time.
c37c8ecf 187 * No need to jump around here, drbd_alloc_pages will retry this
45bb912b
LE
188 * function "soon". */
189 if (page) {
190 tmp = page_chain_tail(page, NULL);
191 spin_lock(&drbd_pp_lock);
192 page_chain_add(&drbd_pp_pool, page, tmp);
193 drbd_pp_vacant += i;
194 spin_unlock(&drbd_pp_lock);
195 }
196 return NULL;
b411b363
PR
197}
198
a990be46
AG
199static void reclaim_finished_net_peer_reqs(struct drbd_conf *mdev,
200 struct list_head *to_be_freed)
b411b363 201{
db830c46 202 struct drbd_peer_request *peer_req;
b411b363
PR
203 struct list_head *le, *tle;
204
205 /* The EEs are always appended to the end of the list. Since
206 they are sent in order over the wire, they have to finish
207 in order. As soon as we see the first not finished we can
208 stop to examine the list... */
209
210 list_for_each_safe(le, tle, &mdev->net_ee) {
db830c46 211 peer_req = list_entry(le, struct drbd_peer_request, w.list);
045417f7 212 if (drbd_peer_req_has_active_page(peer_req))
b411b363
PR
213 break;
214 list_move(le, to_be_freed);
215 }
216}
217
218static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
219{
220 LIST_HEAD(reclaimed);
db830c46 221 struct drbd_peer_request *peer_req, *t;
b411b363 222
87eeee41 223 spin_lock_irq(&mdev->tconn->req_lock);
a990be46 224 reclaim_finished_net_peer_reqs(mdev, &reclaimed);
87eeee41 225 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 226
db830c46 227 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
3967deb1 228 drbd_free_net_peer_req(mdev, peer_req);
b411b363
PR
229}
230
231/**
c37c8ecf 232 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
b411b363 233 * @mdev: DRBD device.
45bb912b
LE
234 * @number: number of pages requested
235 * @retry: whether to retry, if not enough pages are available right now
236 *
237 * Tries to allocate number pages, first from our own page pool, then from
238 * the kernel, unless this allocation would exceed the max_buffers setting.
239 * Possibly retry until DRBD frees sufficient pages somewhere else.
b411b363 240 *
45bb912b 241 * Returns a page chain linked via page->private.
b411b363 242 */
c37c8ecf
AG
243struct page *drbd_alloc_pages(struct drbd_conf *mdev, unsigned int number,
244 bool retry)
b411b363
PR
245{
246 struct page *page = NULL;
44ed167d 247 struct net_conf *nc;
b411b363 248 DEFINE_WAIT(wait);
44ed167d 249 int mxb;
b411b363 250
45bb912b
LE
251 /* Yes, we may run up to @number over max_buffers. If we
252 * follow it strictly, the admin will get it wrong anyways. */
44ed167d
PR
253 rcu_read_lock();
254 nc = rcu_dereference(mdev->tconn->net_conf);
255 mxb = nc ? nc->max_buffers : 1000000;
256 rcu_read_unlock();
257
258 if (atomic_read(&mdev->pp_in_use) < mxb)
18c2d522 259 page = __drbd_alloc_pages(mdev, number);
b411b363 260
45bb912b 261 while (page == NULL) {
b411b363
PR
262 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
263
264 drbd_kick_lo_and_reclaim_net(mdev);
265
44ed167d 266 if (atomic_read(&mdev->pp_in_use) < mxb) {
18c2d522 267 page = __drbd_alloc_pages(mdev, number);
b411b363
PR
268 if (page)
269 break;
270 }
271
272 if (!retry)
273 break;
274
275 if (signal_pending(current)) {
c37c8ecf 276 dev_warn(DEV, "drbd_alloc_pages interrupted!\n");
b411b363
PR
277 break;
278 }
279
280 schedule();
281 }
282 finish_wait(&drbd_pp_wait, &wait);
283
45bb912b
LE
284 if (page)
285 atomic_add(number, &mdev->pp_in_use);
b411b363
PR
286 return page;
287}
288
c37c8ecf 289/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
87eeee41 290 * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
45bb912b
LE
291 * Either links the page chain back to the global pool,
292 * or returns all pages to the system. */
5cc287e0 293static void drbd_free_pages(struct drbd_conf *mdev, struct page *page, int is_net)
b411b363 294{
435f0740 295 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
b411b363 296 int i;
435f0740 297
81a5d60e 298 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
45bb912b
LE
299 i = page_chain_free(page);
300 else {
301 struct page *tmp;
302 tmp = page_chain_tail(page, &i);
303 spin_lock(&drbd_pp_lock);
304 page_chain_add(&drbd_pp_pool, page, tmp);
305 drbd_pp_vacant += i;
306 spin_unlock(&drbd_pp_lock);
b411b363 307 }
435f0740 308 i = atomic_sub_return(i, a);
45bb912b 309 if (i < 0)
435f0740
LE
310 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
311 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
b411b363
PR
312 wake_up(&drbd_pp_wait);
313}
314
315/*
316You need to hold the req_lock:
317 _drbd_wait_ee_list_empty()
318
319You must not have the req_lock:
3967deb1 320 drbd_free_peer_req()
0db55363 321 drbd_alloc_peer_req()
7721f567 322 drbd_free_peer_reqs()
b411b363 323 drbd_ee_fix_bhs()
a990be46 324 drbd_finish_peer_reqs()
b411b363
PR
325 drbd_clear_done_ee()
326 drbd_wait_ee_list_empty()
327*/
328
f6ffca9f 329struct drbd_peer_request *
0db55363
AG
330drbd_alloc_peer_req(struct drbd_conf *mdev, u64 id, sector_t sector,
331 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
b411b363 332{
db830c46 333 struct drbd_peer_request *peer_req;
b411b363 334 struct page *page;
45bb912b 335 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
b411b363 336
0cf9d27e 337 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
b411b363
PR
338 return NULL;
339
db830c46
AG
340 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
341 if (!peer_req) {
b411b363 342 if (!(gfp_mask & __GFP_NOWARN))
0db55363 343 dev_err(DEV, "%s: allocation failed\n", __func__);
b411b363
PR
344 return NULL;
345 }
346
c37c8ecf 347 page = drbd_alloc_pages(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
45bb912b
LE
348 if (!page)
349 goto fail;
b411b363 350
db830c46
AG
351 drbd_clear_interval(&peer_req->i);
352 peer_req->i.size = data_size;
353 peer_req->i.sector = sector;
354 peer_req->i.local = false;
355 peer_req->i.waiting = false;
356
357 peer_req->epoch = NULL;
a21e9298 358 peer_req->w.mdev = mdev;
db830c46
AG
359 peer_req->pages = page;
360 atomic_set(&peer_req->pending_bios, 0);
361 peer_req->flags = 0;
9a8e7753
AG
362 /*
363 * The block_id is opaque to the receiver. It is not endianness
364 * converted, and sent back to the sender unchanged.
365 */
db830c46 366 peer_req->block_id = id;
b411b363 367
db830c46 368 return peer_req;
b411b363 369
45bb912b 370 fail:
db830c46 371 mempool_free(peer_req, drbd_ee_mempool);
b411b363
PR
372 return NULL;
373}
374
3967deb1 375void __drbd_free_peer_req(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
f6ffca9f 376 int is_net)
b411b363 377{
db830c46
AG
378 if (peer_req->flags & EE_HAS_DIGEST)
379 kfree(peer_req->digest);
5cc287e0 380 drbd_free_pages(mdev, peer_req->pages, is_net);
db830c46
AG
381 D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
382 D_ASSERT(drbd_interval_empty(&peer_req->i));
383 mempool_free(peer_req, drbd_ee_mempool);
b411b363
PR
384}
385
7721f567 386int drbd_free_peer_reqs(struct drbd_conf *mdev, struct list_head *list)
b411b363
PR
387{
388 LIST_HEAD(work_list);
db830c46 389 struct drbd_peer_request *peer_req, *t;
b411b363 390 int count = 0;
435f0740 391 int is_net = list == &mdev->net_ee;
b411b363 392
87eeee41 393 spin_lock_irq(&mdev->tconn->req_lock);
b411b363 394 list_splice_init(list, &work_list);
87eeee41 395 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 396
db830c46 397 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
3967deb1 398 __drbd_free_peer_req(mdev, peer_req, is_net);
b411b363
PR
399 count++;
400 }
401 return count;
402}
403
a990be46
AG
404/*
405 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
b411b363 406 */
a990be46 407static int drbd_finish_peer_reqs(struct drbd_conf *mdev)
b411b363
PR
408{
409 LIST_HEAD(work_list);
410 LIST_HEAD(reclaimed);
db830c46 411 struct drbd_peer_request *peer_req, *t;
e2b3032b 412 int err = 0;
b411b363 413
87eeee41 414 spin_lock_irq(&mdev->tconn->req_lock);
a990be46 415 reclaim_finished_net_peer_reqs(mdev, &reclaimed);
b411b363 416 list_splice_init(&mdev->done_ee, &work_list);
87eeee41 417 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 418
db830c46 419 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
3967deb1 420 drbd_free_net_peer_req(mdev, peer_req);
b411b363
PR
421
422 /* possible callbacks here:
7be8da07 423 * e_end_block, and e_end_resync_block, e_send_discard_write.
b411b363
PR
424 * all ignore the last argument.
425 */
db830c46 426 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
e2b3032b
AG
427 int err2;
428
b411b363 429 /* list_del not necessary, next/prev members not touched */
e2b3032b
AG
430 err2 = peer_req->w.cb(&peer_req->w, !!err);
431 if (!err)
432 err = err2;
3967deb1 433 drbd_free_peer_req(mdev, peer_req);
b411b363
PR
434 }
435 wake_up(&mdev->ee_wait);
436
e2b3032b 437 return err;
b411b363
PR
438}
439
d4da1537
AG
440static void _drbd_wait_ee_list_empty(struct drbd_conf *mdev,
441 struct list_head *head)
b411b363
PR
442{
443 DEFINE_WAIT(wait);
444
445 /* avoids spin_lock/unlock
446 * and calling prepare_to_wait in the fast path */
447 while (!list_empty(head)) {
448 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
87eeee41 449 spin_unlock_irq(&mdev->tconn->req_lock);
7eaceacc 450 io_schedule();
b411b363 451 finish_wait(&mdev->ee_wait, &wait);
87eeee41 452 spin_lock_irq(&mdev->tconn->req_lock);
b411b363
PR
453 }
454}
455
d4da1537
AG
456static void drbd_wait_ee_list_empty(struct drbd_conf *mdev,
457 struct list_head *head)
b411b363 458{
87eeee41 459 spin_lock_irq(&mdev->tconn->req_lock);
b411b363 460 _drbd_wait_ee_list_empty(mdev, head);
87eeee41 461 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
462}
463
464/* see also kernel_accept; which is only present since 2.6.18.
465 * also we want to log which part of it failed, exactly */
7653620d 466static int drbd_accept(const char **what, struct socket *sock, struct socket **newsock)
b411b363
PR
467{
468 struct sock *sk = sock->sk;
469 int err = 0;
470
471 *what = "listen";
472 err = sock->ops->listen(sock, 5);
473 if (err < 0)
474 goto out;
475
476 *what = "sock_create_lite";
477 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
478 newsock);
479 if (err < 0)
480 goto out;
481
482 *what = "accept";
483 err = sock->ops->accept(sock, *newsock, 0);
484 if (err < 0) {
485 sock_release(*newsock);
486 *newsock = NULL;
487 goto out;
488 }
489 (*newsock)->ops = sock->ops;
dd9b3604 490 __module_get((*newsock)->ops->owner);
b411b363
PR
491
492out:
493 return err;
494}
495
dbd9eea0 496static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
b411b363
PR
497{
498 mm_segment_t oldfs;
499 struct kvec iov = {
500 .iov_base = buf,
501 .iov_len = size,
502 };
503 struct msghdr msg = {
504 .msg_iovlen = 1,
505 .msg_iov = (struct iovec *)&iov,
506 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
507 };
508 int rv;
509
510 oldfs = get_fs();
511 set_fs(KERNEL_DS);
512 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
513 set_fs(oldfs);
514
515 return rv;
516}
517
de0ff338 518static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
b411b363
PR
519{
520 mm_segment_t oldfs;
521 struct kvec iov = {
522 .iov_base = buf,
523 .iov_len = size,
524 };
525 struct msghdr msg = {
526 .msg_iovlen = 1,
527 .msg_iov = (struct iovec *)&iov,
528 .msg_flags = MSG_WAITALL | MSG_NOSIGNAL
529 };
530 int rv;
531
532 oldfs = get_fs();
533 set_fs(KERNEL_DS);
534
535 for (;;) {
de0ff338 536 rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags);
b411b363
PR
537 if (rv == size)
538 break;
539
540 /* Note:
541 * ECONNRESET other side closed the connection
542 * ERESTARTSYS (on sock) we got a signal
543 */
544
545 if (rv < 0) {
546 if (rv == -ECONNRESET)
de0ff338 547 conn_info(tconn, "sock was reset by peer\n");
b411b363 548 else if (rv != -ERESTARTSYS)
de0ff338 549 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
b411b363
PR
550 break;
551 } else if (rv == 0) {
de0ff338 552 conn_info(tconn, "sock was shut down by peer\n");
b411b363
PR
553 break;
554 } else {
555 /* signal came in, or peer/link went down,
556 * after we read a partial message
557 */
558 /* D_ASSERT(signal_pending(current)); */
559 break;
560 }
561 };
562
563 set_fs(oldfs);
564
565 if (rv != size)
bbeb641c 566 conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
b411b363
PR
567
568 return rv;
569}
570
c6967746
AG
571static int drbd_recv_all(struct drbd_tconn *tconn, void *buf, size_t size)
572{
573 int err;
574
575 err = drbd_recv(tconn, buf, size);
576 if (err != size) {
577 if (err >= 0)
578 err = -EIO;
579 } else
580 err = 0;
581 return err;
582}
583
a5c31904
AG
584static int drbd_recv_all_warn(struct drbd_tconn *tconn, void *buf, size_t size)
585{
586 int err;
587
588 err = drbd_recv_all(tconn, buf, size);
589 if (err && !signal_pending(current))
590 conn_warn(tconn, "short read (expected size %d)\n", (int)size);
591 return err;
592}
593
5dbf1673
LE
594/* quoting tcp(7):
595 * On individual connections, the socket buffer size must be set prior to the
596 * listen(2) or connect(2) calls in order to have it take effect.
597 * This is our wrapper to do so.
598 */
599static void drbd_setbufsize(struct socket *sock, unsigned int snd,
600 unsigned int rcv)
601{
602 /* open coded SO_SNDBUF, SO_RCVBUF */
603 if (snd) {
604 sock->sk->sk_sndbuf = snd;
605 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
606 }
607 if (rcv) {
608 sock->sk->sk_rcvbuf = rcv;
609 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
610 }
611}
612
eac3e990 613static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
b411b363
PR
614{
615 const char *what;
616 struct socket *sock;
617 struct sockaddr_in6 src_in6;
44ed167d
PR
618 struct sockaddr_in6 peer_in6;
619 struct net_conf *nc;
620 int err, peer_addr_len, my_addr_len;
69ef82de 621 int sndbuf_size, rcvbuf_size, connect_int;
b411b363
PR
622 int disconnect_on_error = 1;
623
44ed167d
PR
624 rcu_read_lock();
625 nc = rcu_dereference(tconn->net_conf);
626 if (!nc) {
627 rcu_read_unlock();
b411b363 628 return NULL;
44ed167d 629 }
44ed167d
PR
630 sndbuf_size = nc->sndbuf_size;
631 rcvbuf_size = nc->rcvbuf_size;
69ef82de 632 connect_int = nc->connect_int;
089c075d 633 rcu_read_unlock();
44ed167d 634
089c075d
AG
635 my_addr_len = min_t(int, tconn->my_addr_len, sizeof(src_in6));
636 memcpy(&src_in6, &tconn->my_addr, my_addr_len);
44ed167d 637
089c075d 638 if (((struct sockaddr *)&tconn->my_addr)->sa_family == AF_INET6)
44ed167d
PR
639 src_in6.sin6_port = 0;
640 else
641 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
642
089c075d
AG
643 peer_addr_len = min_t(int, tconn->peer_addr_len, sizeof(src_in6));
644 memcpy(&peer_in6, &tconn->peer_addr, peer_addr_len);
b411b363
PR
645
646 what = "sock_create_kern";
44ed167d
PR
647 err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
648 SOCK_STREAM, IPPROTO_TCP, &sock);
b411b363
PR
649 if (err < 0) {
650 sock = NULL;
651 goto out;
652 }
653
654 sock->sk->sk_rcvtimeo =
69ef82de 655 sock->sk->sk_sndtimeo = connect_int * HZ;
44ed167d 656 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
b411b363
PR
657
658 /* explicitly bind to the configured IP as source IP
659 * for the outgoing connections.
660 * This is needed for multihomed hosts and to be
661 * able to use lo: interfaces for drbd.
662 * Make sure to use 0 as port number, so linux selects
663 * a free one dynamically.
664 */
b411b363 665 what = "bind before connect";
44ed167d 666 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
b411b363
PR
667 if (err < 0)
668 goto out;
669
670 /* connect may fail, peer not yet available.
671 * stay C_WF_CONNECTION, don't go Disconnecting! */
672 disconnect_on_error = 0;
673 what = "connect";
44ed167d 674 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
b411b363
PR
675
676out:
677 if (err < 0) {
678 if (sock) {
679 sock_release(sock);
680 sock = NULL;
681 }
682 switch (-err) {
683 /* timeout, busy, signal pending */
684 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
685 case EINTR: case ERESTARTSYS:
686 /* peer not (yet) available, network problem */
687 case ECONNREFUSED: case ENETUNREACH:
688 case EHOSTDOWN: case EHOSTUNREACH:
689 disconnect_on_error = 0;
690 break;
691 default:
eac3e990 692 conn_err(tconn, "%s failed, err = %d\n", what, err);
b411b363
PR
693 }
694 if (disconnect_on_error)
bbeb641c 695 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363 696 }
44ed167d 697
b411b363
PR
698 return sock;
699}
700
7653620d 701static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
b411b363 702{
44ed167d 703 int timeo, err, my_addr_len;
69ef82de 704 int sndbuf_size, rcvbuf_size, connect_int;
b411b363 705 struct socket *s_estab = NULL, *s_listen;
44ed167d
PR
706 struct sockaddr_in6 my_addr;
707 struct net_conf *nc;
b411b363
PR
708 const char *what;
709
44ed167d
PR
710 rcu_read_lock();
711 nc = rcu_dereference(tconn->net_conf);
712 if (!nc) {
713 rcu_read_unlock();
b411b363 714 return NULL;
44ed167d 715 }
44ed167d
PR
716 sndbuf_size = nc->sndbuf_size;
717 rcvbuf_size = nc->rcvbuf_size;
69ef82de 718 connect_int = nc->connect_int;
44ed167d 719 rcu_read_unlock();
b411b363 720
089c075d
AG
721 my_addr_len = min_t(int, tconn->my_addr_len, sizeof(struct sockaddr_in6));
722 memcpy(&my_addr, &tconn->my_addr, my_addr_len);
723
b411b363 724 what = "sock_create_kern";
44ed167d 725 err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
b411b363
PR
726 SOCK_STREAM, IPPROTO_TCP, &s_listen);
727 if (err) {
728 s_listen = NULL;
729 goto out;
730 }
731
69ef82de 732 timeo = connect_int * HZ;
b411b363
PR
733 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
734
735 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
736 s_listen->sk->sk_rcvtimeo = timeo;
737 s_listen->sk->sk_sndtimeo = timeo;
44ed167d 738 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
b411b363
PR
739
740 what = "bind before listen";
44ed167d 741 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
b411b363
PR
742 if (err < 0)
743 goto out;
744
7653620d 745 err = drbd_accept(&what, s_listen, &s_estab);
b411b363
PR
746
747out:
748 if (s_listen)
749 sock_release(s_listen);
750 if (err < 0) {
751 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
7653620d 752 conn_err(tconn, "%s failed, err = %d\n", what, err);
bbeb641c 753 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363
PR
754 }
755 }
b411b363
PR
756
757 return s_estab;
758}
759
e658983a 760static int decode_header(struct drbd_tconn *, void *, struct packet_info *);
b411b363 761
9f5bdc33
AG
762static int send_first_packet(struct drbd_tconn *tconn, struct drbd_socket *sock,
763 enum drbd_packet cmd)
764{
765 if (!conn_prepare_command(tconn, sock))
766 return -EIO;
e658983a 767 return conn_send_command(tconn, sock, cmd, 0, NULL, 0);
b411b363
PR
768}
769
9f5bdc33 770static int receive_first_packet(struct drbd_tconn *tconn, struct socket *sock)
b411b363 771{
9f5bdc33
AG
772 unsigned int header_size = drbd_header_size(tconn);
773 struct packet_info pi;
774 int err;
b411b363 775
9f5bdc33
AG
776 err = drbd_recv_short(sock, tconn->data.rbuf, header_size, 0);
777 if (err != header_size) {
778 if (err >= 0)
779 err = -EIO;
780 return err;
781 }
782 err = decode_header(tconn, tconn->data.rbuf, &pi);
783 if (err)
784 return err;
785 return pi.cmd;
b411b363
PR
786}
787
788/**
789 * drbd_socket_okay() - Free the socket if its connection is not okay
b411b363
PR
790 * @sock: pointer to the pointer to the socket.
791 */
dbd9eea0 792static int drbd_socket_okay(struct socket **sock)
b411b363
PR
793{
794 int rr;
795 char tb[4];
796
797 if (!*sock)
81e84650 798 return false;
b411b363 799
dbd9eea0 800 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
b411b363
PR
801
802 if (rr > 0 || rr == -EAGAIN) {
81e84650 803 return true;
b411b363
PR
804 } else {
805 sock_release(*sock);
806 *sock = NULL;
81e84650 807 return false;
b411b363
PR
808 }
809}
2325eb66
PR
810/* Gets called if a connection is established, or if a new minor gets created
811 in a connection */
c141ebda 812int drbd_connected(struct drbd_conf *mdev)
907599e0 813{
0829f5ed 814 int err;
907599e0
PR
815
816 atomic_set(&mdev->packet_seq, 0);
817 mdev->peer_seq = 0;
818
8410da8f
PR
819 mdev->state_mutex = mdev->tconn->agreed_pro_version < 100 ?
820 &mdev->tconn->cstate_mutex :
821 &mdev->own_state_mutex;
822
0829f5ed
AG
823 err = drbd_send_sync_param(mdev);
824 if (!err)
825 err = drbd_send_sizes(mdev, 0, 0);
826 if (!err)
827 err = drbd_send_uuids(mdev);
828 if (!err)
43de7c85 829 err = drbd_send_current_state(mdev);
907599e0
PR
830 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
831 clear_bit(RESIZE_PENDING, &mdev->flags);
8b924f1d 832 mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */
0829f5ed 833 return err;
907599e0
PR
834}
835
b411b363
PR
836/*
837 * return values:
838 * 1 yes, we have a valid connection
839 * 0 oops, did not work out, please try again
840 * -1 peer talks different language,
841 * no point in trying again, please go standalone.
842 * -2 We do not have a network config...
843 */
81fa2e67 844static int conn_connect(struct drbd_tconn *tconn)
b411b363 845{
7da35862 846 struct drbd_socket sock, msock;
c141ebda 847 struct drbd_conf *mdev;
44ed167d 848 struct net_conf *nc;
c141ebda 849 int vnr, timeout, try, h, ok;
08b165ba 850 bool discard_my_data;
b411b363 851
bbeb641c 852 if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
b411b363
PR
853 return -2;
854
7da35862
PR
855 mutex_init(&sock.mutex);
856 sock.sbuf = tconn->data.sbuf;
857 sock.rbuf = tconn->data.rbuf;
858 sock.socket = NULL;
859 mutex_init(&msock.mutex);
860 msock.sbuf = tconn->meta.sbuf;
861 msock.rbuf = tconn->meta.rbuf;
862 msock.socket = NULL;
863
907599e0 864 clear_bit(DISCARD_CONCURRENT, &tconn->flags);
0916e0e3
AG
865
866 /* Assume that the peer only understands protocol 80 until we know better. */
867 tconn->agreed_pro_version = 80;
b411b363 868
b411b363 869 do {
2bf89621
AG
870 struct socket *s;
871
b411b363
PR
872 for (try = 0;;) {
873 /* 3 tries, this should take less than a second! */
907599e0 874 s = drbd_try_connect(tconn);
b411b363
PR
875 if (s || ++try >= 3)
876 break;
877 /* give the other side time to call bind() & listen() */
20ee6390 878 schedule_timeout_interruptible(HZ / 10);
b411b363
PR
879 }
880
881 if (s) {
7da35862
PR
882 if (!sock.socket) {
883 sock.socket = s;
884 send_first_packet(tconn, &sock, P_INITIAL_DATA);
885 } else if (!msock.socket) {
886 msock.socket = s;
887 send_first_packet(tconn, &msock, P_INITIAL_META);
b411b363 888 } else {
81fa2e67 889 conn_err(tconn, "Logic error in conn_connect()\n");
b411b363
PR
890 goto out_release_sockets;
891 }
892 }
893
7da35862
PR
894 if (sock.socket && msock.socket) {
895 rcu_read_lock();
896 nc = rcu_dereference(tconn->net_conf);
897 timeout = nc->ping_timeo * HZ / 10;
898 rcu_read_unlock();
899 schedule_timeout_interruptible(timeout);
900 ok = drbd_socket_okay(&sock.socket);
901 ok = drbd_socket_okay(&msock.socket) && ok;
b411b363
PR
902 if (ok)
903 break;
904 }
905
906retry:
907599e0 907 s = drbd_wait_for_connect(tconn);
b411b363 908 if (s) {
9f5bdc33 909 try = receive_first_packet(tconn, s);
7da35862
PR
910 drbd_socket_okay(&sock.socket);
911 drbd_socket_okay(&msock.socket);
b411b363 912 switch (try) {
e5d6f33a 913 case P_INITIAL_DATA:
7da35862 914 if (sock.socket) {
907599e0 915 conn_warn(tconn, "initial packet S crossed\n");
7da35862 916 sock_release(sock.socket);
b411b363 917 }
7da35862 918 sock.socket = s;
b411b363 919 break;
e5d6f33a 920 case P_INITIAL_META:
7da35862 921 if (msock.socket) {
907599e0 922 conn_warn(tconn, "initial packet M crossed\n");
7da35862 923 sock_release(msock.socket);
b411b363 924 }
7da35862 925 msock.socket = s;
907599e0 926 set_bit(DISCARD_CONCURRENT, &tconn->flags);
b411b363
PR
927 break;
928 default:
907599e0 929 conn_warn(tconn, "Error receiving initial packet\n");
b411b363
PR
930 sock_release(s);
931 if (random32() & 1)
932 goto retry;
933 }
934 }
935
bbeb641c 936 if (tconn->cstate <= C_DISCONNECTING)
b411b363
PR
937 goto out_release_sockets;
938 if (signal_pending(current)) {
939 flush_signals(current);
940 smp_rmb();
907599e0 941 if (get_t_state(&tconn->receiver) == EXITING)
b411b363
PR
942 goto out_release_sockets;
943 }
944
7da35862
PR
945 if (sock.socket && &msock.socket) {
946 ok = drbd_socket_okay(&sock.socket);
947 ok = drbd_socket_okay(&msock.socket) && ok;
b411b363
PR
948 if (ok)
949 break;
950 }
951 } while (1);
952
7da35862
PR
953 sock.socket->sk->sk_reuse = 1; /* SO_REUSEADDR */
954 msock.socket->sk->sk_reuse = 1; /* SO_REUSEADDR */
b411b363 955
7da35862
PR
956 sock.socket->sk->sk_allocation = GFP_NOIO;
957 msock.socket->sk->sk_allocation = GFP_NOIO;
b411b363 958
7da35862
PR
959 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
960 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
b411b363 961
b411b363 962 /* NOT YET ...
7da35862
PR
963 * sock.socket->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
964 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
6038178e 965 * first set it to the P_CONNECTION_FEATURES timeout,
b411b363 966 * which we set to 4x the configured ping_timeout. */
44ed167d
PR
967 rcu_read_lock();
968 nc = rcu_dereference(tconn->net_conf);
969
7da35862
PR
970 sock.socket->sk->sk_sndtimeo =
971 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
44ed167d 972
7da35862 973 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
44ed167d 974 timeout = nc->timeout * HZ / 10;
08b165ba 975 discard_my_data = nc->discard_my_data;
44ed167d 976 rcu_read_unlock();
b411b363 977
7da35862 978 msock.socket->sk->sk_sndtimeo = timeout;
b411b363
PR
979
980 /* we don't want delays.
25985edc 981 * we use TCP_CORK where appropriate, though */
7da35862
PR
982 drbd_tcp_nodelay(sock.socket);
983 drbd_tcp_nodelay(msock.socket);
b411b363 984
7da35862
PR
985 tconn->data.socket = sock.socket;
986 tconn->meta.socket = msock.socket;
907599e0 987 tconn->last_received = jiffies;
b411b363 988
6038178e 989 h = drbd_do_features(tconn);
b411b363
PR
990 if (h <= 0)
991 return h;
992
907599e0 993 if (tconn->cram_hmac_tfm) {
b411b363 994 /* drbd_request_state(mdev, NS(conn, WFAuth)); */
907599e0 995 switch (drbd_do_auth(tconn)) {
b10d96cb 996 case -1:
907599e0 997 conn_err(tconn, "Authentication of peer failed\n");
b411b363 998 return -1;
b10d96cb 999 case 0:
907599e0 1000 conn_err(tconn, "Authentication of peer failed, trying again.\n");
b10d96cb 1001 return 0;
b411b363
PR
1002 }
1003 }
1004
7da35862
PR
1005 tconn->data.socket->sk->sk_sndtimeo = timeout;
1006 tconn->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
b411b363 1007
387eb308 1008 if (drbd_send_protocol(tconn) == -EOPNOTSUPP)
7e2455c1 1009 return -1;
b411b363 1010
c141ebda
PR
1011 rcu_read_lock();
1012 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
1013 kref_get(&mdev->kref);
1014 rcu_read_unlock();
08b165ba
PR
1015
1016 if (discard_my_data)
1017 set_bit(DISCARD_MY_DATA, &mdev->flags);
1018 else
1019 clear_bit(DISCARD_MY_DATA, &mdev->flags);
1020
c141ebda
PR
1021 drbd_connected(mdev);
1022 kref_put(&mdev->kref, &drbd_minor_destroy);
1023 rcu_read_lock();
1024 }
1025 rcu_read_unlock();
1026
823bd832
PR
1027 if (conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE) < SS_SUCCESS)
1028 return 0;
1029
1030 drbd_thread_start(&tconn->asender);
1031
08b165ba
PR
1032 mutex_lock(&tconn->conf_update);
1033 /* The discard_my_data flag is a single-shot modifier to the next
1034 * connection attempt, the handshake of which is now well underway.
1035 * No need for rcu style copying of the whole struct
1036 * just to clear a single value. */
1037 tconn->net_conf->discard_my_data = 0;
1038 mutex_unlock(&tconn->conf_update);
1039
d3fcb490 1040 return h;
b411b363
PR
1041
1042out_release_sockets:
7da35862
PR
1043 if (sock.socket)
1044 sock_release(sock.socket);
1045 if (msock.socket)
1046 sock_release(msock.socket);
b411b363
PR
1047 return -1;
1048}
1049
e658983a 1050static int decode_header(struct drbd_tconn *tconn, void *header, struct packet_info *pi)
b411b363 1051{
e658983a
AG
1052 unsigned int header_size = drbd_header_size(tconn);
1053
0c8e36d9
AG
1054 if (header_size == sizeof(struct p_header100) &&
1055 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1056 struct p_header100 *h = header;
1057 if (h->pad != 0) {
1058 conn_err(tconn, "Header padding is not zero\n");
1059 return -EINVAL;
1060 }
1061 pi->vnr = be16_to_cpu(h->volume);
1062 pi->cmd = be16_to_cpu(h->command);
1063 pi->size = be32_to_cpu(h->length);
1064 } else if (header_size == sizeof(struct p_header95) &&
1065 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
e658983a 1066 struct p_header95 *h = header;
e658983a 1067 pi->cmd = be16_to_cpu(h->command);
b55d84ba
AG
1068 pi->size = be32_to_cpu(h->length);
1069 pi->vnr = 0;
e658983a
AG
1070 } else if (header_size == sizeof(struct p_header80) &&
1071 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1072 struct p_header80 *h = header;
1073 pi->cmd = be16_to_cpu(h->command);
1074 pi->size = be16_to_cpu(h->length);
77351055 1075 pi->vnr = 0;
02918be2 1076 } else {
e658983a
AG
1077 conn_err(tconn, "Wrong magic value 0x%08x in protocol version %d\n",
1078 be32_to_cpu(*(__be32 *)header),
1079 tconn->agreed_pro_version);
8172f3e9 1080 return -EINVAL;
b411b363 1081 }
e658983a 1082 pi->data = header + header_size;
8172f3e9 1083 return 0;
257d0af6
PR
1084}
1085
9ba7aa00 1086static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
257d0af6 1087{
e658983a 1088 void *buffer = tconn->data.rbuf;
69bc7bc3 1089 int err;
257d0af6 1090
e658983a 1091 err = drbd_recv_all_warn(tconn, buffer, drbd_header_size(tconn));
a5c31904 1092 if (err)
69bc7bc3 1093 return err;
257d0af6 1094
e658983a 1095 err = decode_header(tconn, buffer, pi);
9ba7aa00 1096 tconn->last_received = jiffies;
b411b363 1097
69bc7bc3 1098 return err;
b411b363
PR
1099}
1100
4b0007c0 1101static void drbd_flush(struct drbd_tconn *tconn)
b411b363
PR
1102{
1103 int rv;
4b0007c0
PR
1104 struct drbd_conf *mdev;
1105 int vnr;
1106
1107 if (tconn->write_ordering >= WO_bdev_flush) {
615e087f 1108 rcu_read_lock();
4b0007c0 1109 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
615e087f
LE
1110 if (!get_ldev(mdev))
1111 continue;
1112 kref_get(&mdev->kref);
1113 rcu_read_unlock();
1114
1115 rv = blkdev_issue_flush(mdev->ldev->backing_bdev,
1116 GFP_NOIO, NULL);
1117 if (rv) {
1118 dev_info(DEV, "local disk flush failed with status %d\n", rv);
1119 /* would rather check on EOPNOTSUPP, but that is not reliable.
1120 * don't try again for ANY return value != 0
1121 * if (rv == -EOPNOTSUPP) */
1122 drbd_bump_write_ordering(tconn, WO_drain_io);
4b0007c0 1123 }
615e087f
LE
1124 put_ldev(mdev);
1125 kref_put(&mdev->kref, &drbd_minor_destroy);
1126
1127 rcu_read_lock();
1128 if (rv)
1129 break;
b411b363 1130 }
615e087f 1131 rcu_read_unlock();
b411b363 1132 }
b411b363
PR
1133}
1134
1135/**
1136 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
1137 * @mdev: DRBD device.
1138 * @epoch: Epoch object.
1139 * @ev: Epoch event.
1140 */
1e9dd291 1141static enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *tconn,
b411b363
PR
1142 struct drbd_epoch *epoch,
1143 enum epoch_event ev)
1144{
2451fc3b 1145 int epoch_size;
b411b363 1146 struct drbd_epoch *next_epoch;
b411b363
PR
1147 enum finish_epoch rv = FE_STILL_LIVE;
1148
12038a3a 1149 spin_lock(&tconn->epoch_lock);
b411b363
PR
1150 do {
1151 next_epoch = NULL;
b411b363
PR
1152
1153 epoch_size = atomic_read(&epoch->epoch_size);
1154
1155 switch (ev & ~EV_CLEANUP) {
1156 case EV_PUT:
1157 atomic_dec(&epoch->active);
1158 break;
1159 case EV_GOT_BARRIER_NR:
1160 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
b411b363
PR
1161 break;
1162 case EV_BECAME_LAST:
1163 /* nothing to do*/
1164 break;
1165 }
1166
b411b363
PR
1167 if (epoch_size != 0 &&
1168 atomic_read(&epoch->active) == 0 &&
85d73513 1169 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
b411b363 1170 if (!(ev & EV_CLEANUP)) {
12038a3a 1171 spin_unlock(&tconn->epoch_lock);
1d2783d5 1172 drbd_send_b_ack(epoch->mdev, epoch->barrier_nr, epoch_size);
12038a3a 1173 spin_lock(&tconn->epoch_lock);
b411b363 1174 }
85d73513 1175 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
1d2783d5 1176 dec_unacked(epoch->mdev);
b411b363 1177
12038a3a 1178 if (tconn->current_epoch != epoch) {
b411b363
PR
1179 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1180 list_del(&epoch->list);
1181 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
12038a3a 1182 tconn->epochs--;
b411b363
PR
1183 kfree(epoch);
1184
1185 if (rv == FE_STILL_LIVE)
1186 rv = FE_DESTROYED;
1187 } else {
1188 epoch->flags = 0;
1189 atomic_set(&epoch->epoch_size, 0);
698f9315 1190 /* atomic_set(&epoch->active, 0); is already zero */
b411b363
PR
1191 if (rv == FE_STILL_LIVE)
1192 rv = FE_RECYCLED;
1193 }
1194 }
1195
1196 if (!next_epoch)
1197 break;
1198
1199 epoch = next_epoch;
1200 } while (1);
1201
12038a3a 1202 spin_unlock(&tconn->epoch_lock);
b411b363 1203
b411b363
PR
1204 return rv;
1205}
1206
1207/**
1208 * drbd_bump_write_ordering() - Fall back to an other write ordering method
4b0007c0 1209 * @tconn: DRBD connection.
b411b363
PR
1210 * @wo: Write ordering method to try.
1211 */
4b0007c0 1212void drbd_bump_write_ordering(struct drbd_tconn *tconn, enum write_ordering_e wo)
b411b363 1213{
daeda1cc 1214 struct disk_conf *dc;
4b0007c0 1215 struct drbd_conf *mdev;
b411b363 1216 enum write_ordering_e pwo;
4b0007c0 1217 int vnr;
b411b363
PR
1218 static char *write_ordering_str[] = {
1219 [WO_none] = "none",
1220 [WO_drain_io] = "drain",
1221 [WO_bdev_flush] = "flush",
b411b363
PR
1222 };
1223
4b0007c0 1224 pwo = tconn->write_ordering;
b411b363 1225 wo = min(pwo, wo);
daeda1cc 1226 rcu_read_lock();
4b0007c0
PR
1227 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
1228 if (!get_ldev(mdev))
1229 continue;
1230 dc = rcu_dereference(mdev->ldev->disk_conf);
1231
1232 if (wo == WO_bdev_flush && !dc->disk_flushes)
1233 wo = WO_drain_io;
1234 if (wo == WO_drain_io && !dc->disk_drain)
1235 wo = WO_none;
1236 put_ldev(mdev);
1237 }
daeda1cc 1238 rcu_read_unlock();
4b0007c0
PR
1239 tconn->write_ordering = wo;
1240 if (pwo != tconn->write_ordering || wo == WO_bdev_flush)
1241 conn_info(tconn, "Method to ensure write ordering: %s\n", write_ordering_str[tconn->write_ordering]);
b411b363
PR
1242}
1243
45bb912b 1244/**
fbe29dec 1245 * drbd_submit_peer_request()
45bb912b 1246 * @mdev: DRBD device.
db830c46 1247 * @peer_req: peer request
45bb912b 1248 * @rw: flag field, see bio->bi_rw
10f6d992
LE
1249 *
1250 * May spread the pages to multiple bios,
1251 * depending on bio_add_page restrictions.
1252 *
1253 * Returns 0 if all bios have been submitted,
1254 * -ENOMEM if we could not allocate enough bios,
1255 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1256 * single page to an empty bio (which should never happen and likely indicates
1257 * that the lower level IO stack is in some way broken). This has been observed
1258 * on certain Xen deployments.
45bb912b
LE
1259 */
1260/* TODO allocate from our own bio_set. */
fbe29dec
AG
1261int drbd_submit_peer_request(struct drbd_conf *mdev,
1262 struct drbd_peer_request *peer_req,
1263 const unsigned rw, const int fault_type)
45bb912b
LE
1264{
1265 struct bio *bios = NULL;
1266 struct bio *bio;
db830c46
AG
1267 struct page *page = peer_req->pages;
1268 sector_t sector = peer_req->i.sector;
1269 unsigned ds = peer_req->i.size;
45bb912b
LE
1270 unsigned n_bios = 0;
1271 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
10f6d992 1272 int err = -ENOMEM;
45bb912b
LE
1273
1274 /* In most cases, we will only need one bio. But in case the lower
1275 * level restrictions happen to be different at this offset on this
1276 * side than those of the sending peer, we may need to submit the
da4a75d2
LE
1277 * request in more than one bio.
1278 *
1279 * Plain bio_alloc is good enough here, this is no DRBD internally
1280 * generated bio, but a bio allocated on behalf of the peer.
1281 */
45bb912b
LE
1282next_bio:
1283 bio = bio_alloc(GFP_NOIO, nr_pages);
1284 if (!bio) {
1285 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1286 goto fail;
1287 }
db830c46 1288 /* > peer_req->i.sector, unless this is the first bio */
45bb912b
LE
1289 bio->bi_sector = sector;
1290 bio->bi_bdev = mdev->ldev->backing_bdev;
45bb912b 1291 bio->bi_rw = rw;
db830c46 1292 bio->bi_private = peer_req;
fcefa62e 1293 bio->bi_end_io = drbd_peer_request_endio;
45bb912b
LE
1294
1295 bio->bi_next = bios;
1296 bios = bio;
1297 ++n_bios;
1298
1299 page_chain_for_each(page) {
1300 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1301 if (!bio_add_page(bio, page, len, 0)) {
10f6d992
LE
1302 /* A single page must always be possible!
1303 * But in case it fails anyways,
1304 * we deal with it, and complain (below). */
1305 if (bio->bi_vcnt == 0) {
1306 dev_err(DEV,
1307 "bio_add_page failed for len=%u, "
1308 "bi_vcnt=0 (bi_sector=%llu)\n",
1309 len, (unsigned long long)bio->bi_sector);
1310 err = -ENOSPC;
1311 goto fail;
1312 }
45bb912b
LE
1313 goto next_bio;
1314 }
1315 ds -= len;
1316 sector += len >> 9;
1317 --nr_pages;
1318 }
1319 D_ASSERT(page == NULL);
1320 D_ASSERT(ds == 0);
1321
db830c46 1322 atomic_set(&peer_req->pending_bios, n_bios);
45bb912b
LE
1323 do {
1324 bio = bios;
1325 bios = bios->bi_next;
1326 bio->bi_next = NULL;
1327
45bb912b 1328 drbd_generic_make_request(mdev, fault_type, bio);
45bb912b 1329 } while (bios);
45bb912b
LE
1330 return 0;
1331
1332fail:
1333 while (bios) {
1334 bio = bios;
1335 bios = bios->bi_next;
1336 bio_put(bio);
1337 }
10f6d992 1338 return err;
45bb912b
LE
1339}
1340
53840641 1341static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
db830c46 1342 struct drbd_peer_request *peer_req)
53840641 1343{
db830c46 1344 struct drbd_interval *i = &peer_req->i;
53840641
AG
1345
1346 drbd_remove_interval(&mdev->write_requests, i);
1347 drbd_clear_interval(i);
1348
6c852bec 1349 /* Wake up any processes waiting for this peer request to complete. */
53840641
AG
1350 if (i->waiting)
1351 wake_up(&mdev->misc_wait);
1352}
1353
77fede51
PR
1354void conn_wait_active_ee_empty(struct drbd_tconn *tconn)
1355{
1356 struct drbd_conf *mdev;
1357 int vnr;
1358
1359 rcu_read_lock();
1360 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
1361 kref_get(&mdev->kref);
1362 rcu_read_unlock();
1363 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
1364 kref_put(&mdev->kref, &drbd_minor_destroy);
1365 rcu_read_lock();
1366 }
1367 rcu_read_unlock();
1368}
1369
4a76b161 1370static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 1371{
4a76b161 1372 struct drbd_conf *mdev;
2451fc3b 1373 int rv;
e658983a 1374 struct p_barrier *p = pi->data;
b411b363
PR
1375 struct drbd_epoch *epoch;
1376
4a76b161
AG
1377 mdev = vnr_to_mdev(tconn, pi->vnr);
1378 if (!mdev)
1379 return -EIO;
1380
b411b363
PR
1381 inc_unacked(mdev);
1382
12038a3a
PR
1383 tconn->current_epoch->barrier_nr = p->barrier;
1384 tconn->current_epoch->mdev = mdev;
1e9dd291 1385 rv = drbd_may_finish_epoch(tconn, tconn->current_epoch, EV_GOT_BARRIER_NR);
b411b363
PR
1386
1387 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1388 * the activity log, which means it would not be resynced in case the
1389 * R_PRIMARY crashes now.
1390 * Therefore we must send the barrier_ack after the barrier request was
1391 * completed. */
4b0007c0 1392 switch (tconn->write_ordering) {
b411b363
PR
1393 case WO_none:
1394 if (rv == FE_RECYCLED)
82bc0194 1395 return 0;
2451fc3b
PR
1396
1397 /* receiver context, in the writeout path of the other node.
1398 * avoid potential distributed deadlock */
1399 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1400 if (epoch)
1401 break;
1402 else
1403 dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
1404 /* Fall through */
b411b363
PR
1405
1406 case WO_bdev_flush:
1407 case WO_drain_io:
77fede51 1408 conn_wait_active_ee_empty(tconn);
4b0007c0 1409 drbd_flush(tconn);
2451fc3b 1410
12038a3a 1411 if (atomic_read(&tconn->current_epoch->epoch_size)) {
2451fc3b
PR
1412 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1413 if (epoch)
1414 break;
b411b363
PR
1415 }
1416
12038a3a 1417 epoch = tconn->current_epoch;
2451fc3b
PR
1418 wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
1419
1420 D_ASSERT(atomic_read(&epoch->active) == 0);
1421 D_ASSERT(epoch->flags == 0);
b411b363 1422
82bc0194 1423 return 0;
2451fc3b 1424 default:
4b0007c0 1425 dev_err(DEV, "Strangeness in tconn->write_ordering %d\n", tconn->write_ordering);
82bc0194 1426 return -EIO;
b411b363
PR
1427 }
1428
1429 epoch->flags = 0;
1430 atomic_set(&epoch->epoch_size, 0);
1431 atomic_set(&epoch->active, 0);
1432
12038a3a
PR
1433 spin_lock(&tconn->epoch_lock);
1434 if (atomic_read(&tconn->current_epoch->epoch_size)) {
1435 list_add(&epoch->list, &tconn->current_epoch->list);
1436 tconn->current_epoch = epoch;
1437 tconn->epochs++;
b411b363
PR
1438 } else {
1439 /* The current_epoch got recycled while we allocated this one... */
1440 kfree(epoch);
1441 }
12038a3a 1442 spin_unlock(&tconn->epoch_lock);
b411b363 1443
82bc0194 1444 return 0;
b411b363
PR
1445}
1446
1447/* used from receive_RSDataReply (recv_resync_read)
1448 * and from receive_Data */
f6ffca9f
AG
1449static struct drbd_peer_request *
1450read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
1451 int data_size) __must_hold(local)
b411b363 1452{
6666032a 1453 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
db830c46 1454 struct drbd_peer_request *peer_req;
b411b363 1455 struct page *page;
a5c31904 1456 int dgs, ds, err;
a0638456
PR
1457 void *dig_in = mdev->tconn->int_dig_in;
1458 void *dig_vv = mdev->tconn->int_dig_vv;
6b4388ac 1459 unsigned long *data;
b411b363 1460
88104ca4
AG
1461 dgs = 0;
1462 if (mdev->tconn->peer_integrity_tfm) {
1463 dgs = crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm);
9f5bdc33
AG
1464 /*
1465 * FIXME: Receive the incoming digest into the receive buffer
1466 * here, together with its struct p_data?
1467 */
a5c31904
AG
1468 err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs);
1469 if (err)
b411b363 1470 return NULL;
88104ca4 1471 data_size -= dgs;
b411b363
PR
1472 }
1473
841ce241
AG
1474 if (!expect(data_size != 0))
1475 return NULL;
1476 if (!expect(IS_ALIGNED(data_size, 512)))
1477 return NULL;
1478 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1479 return NULL;
b411b363 1480
6666032a
LE
1481 /* even though we trust out peer,
1482 * we sometimes have to double check. */
1483 if (sector + (data_size>>9) > capacity) {
fdda6544
LE
1484 dev_err(DEV, "request from peer beyond end of local disk: "
1485 "capacity: %llus < sector: %llus + size: %u\n",
6666032a
LE
1486 (unsigned long long)capacity,
1487 (unsigned long long)sector, data_size);
1488 return NULL;
1489 }
1490
b411b363
PR
1491 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1492 * "criss-cross" setup, that might cause write-out on some other DRBD,
1493 * which in turn might block on the other node at this very place. */
0db55363 1494 peer_req = drbd_alloc_peer_req(mdev, id, sector, data_size, GFP_NOIO);
db830c46 1495 if (!peer_req)
b411b363 1496 return NULL;
45bb912b 1497
b411b363 1498 ds = data_size;
db830c46 1499 page = peer_req->pages;
45bb912b
LE
1500 page_chain_for_each(page) {
1501 unsigned len = min_t(int, ds, PAGE_SIZE);
6b4388ac 1502 data = kmap(page);
a5c31904 1503 err = drbd_recv_all_warn(mdev->tconn, data, len);
0cf9d27e 1504 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
6b4388ac
PR
1505 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1506 data[0] = data[0] ^ (unsigned long)-1;
1507 }
b411b363 1508 kunmap(page);
a5c31904 1509 if (err) {
3967deb1 1510 drbd_free_peer_req(mdev, peer_req);
b411b363
PR
1511 return NULL;
1512 }
a5c31904 1513 ds -= len;
b411b363
PR
1514 }
1515
1516 if (dgs) {
5b614abe 1517 drbd_csum_ee(mdev, mdev->tconn->peer_integrity_tfm, peer_req, dig_vv);
b411b363 1518 if (memcmp(dig_in, dig_vv, dgs)) {
470be44a
LE
1519 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1520 (unsigned long long)sector, data_size);
3967deb1 1521 drbd_free_peer_req(mdev, peer_req);
b411b363
PR
1522 return NULL;
1523 }
1524 }
1525 mdev->recv_cnt += data_size>>9;
db830c46 1526 return peer_req;
b411b363
PR
1527}
1528
1529/* drbd_drain_block() just takes a data block
1530 * out of the socket input buffer, and discards it.
1531 */
1532static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1533{
1534 struct page *page;
a5c31904 1535 int err = 0;
b411b363
PR
1536 void *data;
1537
c3470cde 1538 if (!data_size)
fc5be839 1539 return 0;
c3470cde 1540
c37c8ecf 1541 page = drbd_alloc_pages(mdev, 1, 1);
b411b363
PR
1542
1543 data = kmap(page);
1544 while (data_size) {
fc5be839
AG
1545 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1546
a5c31904
AG
1547 err = drbd_recv_all_warn(mdev->tconn, data, len);
1548 if (err)
b411b363 1549 break;
a5c31904 1550 data_size -= len;
b411b363
PR
1551 }
1552 kunmap(page);
5cc287e0 1553 drbd_free_pages(mdev, page, 0);
fc5be839 1554 return err;
b411b363
PR
1555}
1556
1557static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1558 sector_t sector, int data_size)
1559{
1560 struct bio_vec *bvec;
1561 struct bio *bio;
a5c31904 1562 int dgs, err, i, expect;
a0638456
PR
1563 void *dig_in = mdev->tconn->int_dig_in;
1564 void *dig_vv = mdev->tconn->int_dig_vv;
b411b363 1565
88104ca4
AG
1566 dgs = 0;
1567 if (mdev->tconn->peer_integrity_tfm) {
1568 dgs = crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm);
a5c31904
AG
1569 err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs);
1570 if (err)
1571 return err;
88104ca4 1572 data_size -= dgs;
b411b363
PR
1573 }
1574
b411b363
PR
1575 /* optimistically update recv_cnt. if receiving fails below,
1576 * we disconnect anyways, and counters will be reset. */
1577 mdev->recv_cnt += data_size>>9;
1578
1579 bio = req->master_bio;
1580 D_ASSERT(sector == bio->bi_sector);
1581
1582 bio_for_each_segment(bvec, bio, i) {
a5c31904 1583 void *mapped = kmap(bvec->bv_page) + bvec->bv_offset;
b411b363 1584 expect = min_t(int, data_size, bvec->bv_len);
a5c31904 1585 err = drbd_recv_all_warn(mdev->tconn, mapped, expect);
b411b363 1586 kunmap(bvec->bv_page);
a5c31904
AG
1587 if (err)
1588 return err;
1589 data_size -= expect;
b411b363
PR
1590 }
1591
1592 if (dgs) {
5b614abe 1593 drbd_csum_bio(mdev, mdev->tconn->peer_integrity_tfm, bio, dig_vv);
b411b363
PR
1594 if (memcmp(dig_in, dig_vv, dgs)) {
1595 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
28284cef 1596 return -EINVAL;
b411b363
PR
1597 }
1598 }
1599
1600 D_ASSERT(data_size == 0);
28284cef 1601 return 0;
b411b363
PR
1602}
1603
a990be46
AG
1604/*
1605 * e_end_resync_block() is called in asender context via
1606 * drbd_finish_peer_reqs().
1607 */
99920dc5 1608static int e_end_resync_block(struct drbd_work *w, int unused)
b411b363 1609{
8050e6d0
AG
1610 struct drbd_peer_request *peer_req =
1611 container_of(w, struct drbd_peer_request, w);
00d56944 1612 struct drbd_conf *mdev = w->mdev;
db830c46 1613 sector_t sector = peer_req->i.sector;
99920dc5 1614 int err;
b411b363 1615
db830c46 1616 D_ASSERT(drbd_interval_empty(&peer_req->i));
b411b363 1617
db830c46
AG
1618 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1619 drbd_set_in_sync(mdev, sector, peer_req->i.size);
99920dc5 1620 err = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
b411b363
PR
1621 } else {
1622 /* Record failure to sync */
db830c46 1623 drbd_rs_failed_io(mdev, sector, peer_req->i.size);
b411b363 1624
99920dc5 1625 err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
b411b363
PR
1626 }
1627 dec_unacked(mdev);
1628
99920dc5 1629 return err;
b411b363
PR
1630}
1631
1632static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1633{
db830c46 1634 struct drbd_peer_request *peer_req;
b411b363 1635
db830c46
AG
1636 peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
1637 if (!peer_req)
45bb912b 1638 goto fail;
b411b363
PR
1639
1640 dec_rs_pending(mdev);
1641
b411b363
PR
1642 inc_unacked(mdev);
1643 /* corresponding dec_unacked() in e_end_resync_block()
1644 * respective _drbd_clear_done_ee */
1645
db830c46 1646 peer_req->w.cb = e_end_resync_block;
45bb912b 1647
87eeee41 1648 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 1649 list_add(&peer_req->w.list, &mdev->sync_ee);
87eeee41 1650 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 1651
0f0601f4 1652 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
fbe29dec 1653 if (drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
e1c1b0fc 1654 return 0;
b411b363 1655
10f6d992
LE
1656 /* don't care for the reason here */
1657 dev_err(DEV, "submit failed, triggering re-connect\n");
87eeee41 1658 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 1659 list_del(&peer_req->w.list);
87eeee41 1660 spin_unlock_irq(&mdev->tconn->req_lock);
22cc37a9 1661
3967deb1 1662 drbd_free_peer_req(mdev, peer_req);
45bb912b
LE
1663fail:
1664 put_ldev(mdev);
e1c1b0fc 1665 return -EIO;
b411b363
PR
1666}
1667
668eebc6 1668static struct drbd_request *
bc9c5c41
AG
1669find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1670 sector_t sector, bool missing_ok, const char *func)
51624585 1671{
51624585
AG
1672 struct drbd_request *req;
1673
bc9c5c41
AG
1674 /* Request object according to our peer */
1675 req = (struct drbd_request *)(unsigned long)id;
5e472264 1676 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
668eebc6 1677 return req;
c3afd8f5 1678 if (!missing_ok) {
5af172ed 1679 dev_err(DEV, "%s: failed to find request 0x%lx, sector %llus\n", func,
c3afd8f5
AG
1680 (unsigned long)id, (unsigned long long)sector);
1681 }
51624585
AG
1682 return NULL;
1683}
1684
4a76b161 1685static int receive_DataReply(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 1686{
4a76b161 1687 struct drbd_conf *mdev;
b411b363
PR
1688 struct drbd_request *req;
1689 sector_t sector;
82bc0194 1690 int err;
e658983a 1691 struct p_data *p = pi->data;
4a76b161
AG
1692
1693 mdev = vnr_to_mdev(tconn, pi->vnr);
1694 if (!mdev)
1695 return -EIO;
b411b363
PR
1696
1697 sector = be64_to_cpu(p->sector);
1698
87eeee41 1699 spin_lock_irq(&mdev->tconn->req_lock);
bc9c5c41 1700 req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
87eeee41 1701 spin_unlock_irq(&mdev->tconn->req_lock);
c3afd8f5 1702 if (unlikely(!req))
82bc0194 1703 return -EIO;
b411b363 1704
24c4830c 1705 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
b411b363
PR
1706 * special casing it there for the various failure cases.
1707 * still no race with drbd_fail_pending_reads */
e2857216 1708 err = recv_dless_read(mdev, req, sector, pi->size);
82bc0194 1709 if (!err)
8554df1c 1710 req_mod(req, DATA_RECEIVED);
b411b363
PR
1711 /* else: nothing. handled from drbd_disconnect...
1712 * I don't think we may complete this just yet
1713 * in case we are "on-disconnect: freeze" */
1714
82bc0194 1715 return err;
b411b363
PR
1716}
1717
4a76b161 1718static int receive_RSDataReply(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 1719{
4a76b161 1720 struct drbd_conf *mdev;
b411b363 1721 sector_t sector;
82bc0194 1722 int err;
e658983a 1723 struct p_data *p = pi->data;
4a76b161
AG
1724
1725 mdev = vnr_to_mdev(tconn, pi->vnr);
1726 if (!mdev)
1727 return -EIO;
b411b363
PR
1728
1729 sector = be64_to_cpu(p->sector);
1730 D_ASSERT(p->block_id == ID_SYNCER);
1731
1732 if (get_ldev(mdev)) {
1733 /* data is submitted to disk within recv_resync_read.
1734 * corresponding put_ldev done below on error,
fcefa62e 1735 * or in drbd_peer_request_endio. */
e2857216 1736 err = recv_resync_read(mdev, sector, pi->size);
b411b363
PR
1737 } else {
1738 if (__ratelimit(&drbd_ratelimit_state))
1739 dev_err(DEV, "Can not write resync data to local disk.\n");
1740
e2857216 1741 err = drbd_drain_block(mdev, pi->size);
b411b363 1742
e2857216 1743 drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size);
b411b363
PR
1744 }
1745
e2857216 1746 atomic_add(pi->size >> 9, &mdev->rs_sect_in);
778f271d 1747
82bc0194 1748 return err;
b411b363
PR
1749}
1750
99920dc5 1751static int w_restart_write(struct drbd_work *w, int cancel)
7be8da07
AG
1752{
1753 struct drbd_request *req = container_of(w, struct drbd_request, w);
1754 struct drbd_conf *mdev = w->mdev;
1755 struct bio *bio;
1756 unsigned long start_time;
1757 unsigned long flags;
1758
1759 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
1760 if (!expect(req->rq_state & RQ_POSTPONED)) {
1761 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
99920dc5 1762 return -EIO;
7be8da07
AG
1763 }
1764 bio = req->master_bio;
1765 start_time = req->start_time;
1766 /* Postponed requests will not have their master_bio completed! */
1767 __req_mod(req, DISCARD_WRITE, NULL);
1768 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
1769
1770 while (__drbd_make_request(mdev, bio, start_time))
1771 /* retry */ ;
99920dc5 1772 return 0;
7be8da07
AG
1773}
1774
1775static void restart_conflicting_writes(struct drbd_conf *mdev,
1776 sector_t sector, int size)
1777{
1778 struct drbd_interval *i;
1779 struct drbd_request *req;
1780
1781 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1782 if (!i->local)
1783 continue;
1784 req = container_of(i, struct drbd_request, i);
1785 if (req->rq_state & RQ_LOCAL_PENDING ||
1786 !(req->rq_state & RQ_POSTPONED))
1787 continue;
1788 if (expect(list_empty(&req->w.list))) {
1789 req->w.mdev = mdev;
1790 req->w.cb = w_restart_write;
1791 drbd_queue_work(&mdev->tconn->data.work, &req->w);
1792 }
1793 }
1794}
1795
a990be46
AG
1796/*
1797 * e_end_block() is called in asender context via drbd_finish_peer_reqs().
b411b363 1798 */
99920dc5 1799static int e_end_block(struct drbd_work *w, int cancel)
b411b363 1800{
8050e6d0
AG
1801 struct drbd_peer_request *peer_req =
1802 container_of(w, struct drbd_peer_request, w);
00d56944 1803 struct drbd_conf *mdev = w->mdev;
db830c46 1804 sector_t sector = peer_req->i.sector;
99920dc5 1805 int err = 0, pcmd;
b411b363 1806
303d1448 1807 if (peer_req->flags & EE_SEND_WRITE_ACK) {
db830c46 1808 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b411b363
PR
1809 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1810 mdev->state.conn <= C_PAUSED_SYNC_T &&
db830c46 1811 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
b411b363 1812 P_RS_WRITE_ACK : P_WRITE_ACK;
99920dc5 1813 err = drbd_send_ack(mdev, pcmd, peer_req);
b411b363 1814 if (pcmd == P_RS_WRITE_ACK)
db830c46 1815 drbd_set_in_sync(mdev, sector, peer_req->i.size);
b411b363 1816 } else {
99920dc5 1817 err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
b411b363
PR
1818 /* we expect it to be marked out of sync anyways...
1819 * maybe assert this? */
1820 }
1821 dec_unacked(mdev);
1822 }
1823 /* we delete from the conflict detection hash _after_ we sent out the
1824 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
302bdeae 1825 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
87eeee41 1826 spin_lock_irq(&mdev->tconn->req_lock);
db830c46
AG
1827 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1828 drbd_remove_epoch_entry_interval(mdev, peer_req);
7be8da07
AG
1829 if (peer_req->flags & EE_RESTART_REQUESTS)
1830 restart_conflicting_writes(mdev, sector, peer_req->i.size);
87eeee41 1831 spin_unlock_irq(&mdev->tconn->req_lock);
bb3bfe96 1832 } else
db830c46 1833 D_ASSERT(drbd_interval_empty(&peer_req->i));
b411b363 1834
1e9dd291 1835 drbd_may_finish_epoch(mdev->tconn, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
b411b363 1836
99920dc5 1837 return err;
b411b363
PR
1838}
1839
7be8da07 1840static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
b411b363 1841{
7be8da07 1842 struct drbd_conf *mdev = w->mdev;
8050e6d0
AG
1843 struct drbd_peer_request *peer_req =
1844 container_of(w, struct drbd_peer_request, w);
99920dc5 1845 int err;
b411b363 1846
99920dc5 1847 err = drbd_send_ack(mdev, ack, peer_req);
b411b363
PR
1848 dec_unacked(mdev);
1849
99920dc5 1850 return err;
b411b363
PR
1851}
1852
99920dc5 1853static int e_send_discard_write(struct drbd_work *w, int unused)
7be8da07
AG
1854{
1855 return e_send_ack(w, P_DISCARD_WRITE);
1856}
1857
99920dc5 1858static int e_send_retry_write(struct drbd_work *w, int unused)
7be8da07
AG
1859{
1860 struct drbd_tconn *tconn = w->mdev->tconn;
1861
1862 return e_send_ack(w, tconn->agreed_pro_version >= 100 ?
1863 P_RETRY_WRITE : P_DISCARD_WRITE);
1864}
1865
3e394da1
AG
1866static bool seq_greater(u32 a, u32 b)
1867{
1868 /*
1869 * We assume 32-bit wrap-around here.
1870 * For 24-bit wrap-around, we would have to shift:
1871 * a <<= 8; b <<= 8;
1872 */
1873 return (s32)a - (s32)b > 0;
1874}
1875
1876static u32 seq_max(u32 a, u32 b)
1877{
1878 return seq_greater(a, b) ? a : b;
1879}
1880
7be8da07
AG
1881static bool need_peer_seq(struct drbd_conf *mdev)
1882{
1883 struct drbd_tconn *tconn = mdev->tconn;
302bdeae 1884 int tp;
7be8da07
AG
1885
1886 /*
1887 * We only need to keep track of the last packet_seq number of our peer
1888 * if we are in dual-primary mode and we have the discard flag set; see
1889 * handle_write_conflicts().
1890 */
302bdeae
PR
1891
1892 rcu_read_lock();
1893 tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
1894 rcu_read_unlock();
1895
1896 return tp && test_bit(DISCARD_CONCURRENT, &tconn->flags);
7be8da07
AG
1897}
1898
43ae077d 1899static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
3e394da1 1900{
3c13b680 1901 unsigned int newest_peer_seq;
3e394da1 1902
7be8da07
AG
1903 if (need_peer_seq(mdev)) {
1904 spin_lock(&mdev->peer_seq_lock);
3c13b680
LE
1905 newest_peer_seq = seq_max(mdev->peer_seq, peer_seq);
1906 mdev->peer_seq = newest_peer_seq;
7be8da07 1907 spin_unlock(&mdev->peer_seq_lock);
3c13b680
LE
1908 /* wake up only if we actually changed mdev->peer_seq */
1909 if (peer_seq == newest_peer_seq)
7be8da07
AG
1910 wake_up(&mdev->seq_wait);
1911 }
3e394da1
AG
1912}
1913
b411b363
PR
1914/* Called from receive_Data.
1915 * Synchronize packets on sock with packets on msock.
1916 *
1917 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1918 * packet traveling on msock, they are still processed in the order they have
1919 * been sent.
1920 *
1921 * Note: we don't care for Ack packets overtaking P_DATA packets.
1922 *
1923 * In case packet_seq is larger than mdev->peer_seq number, there are
1924 * outstanding packets on the msock. We wait for them to arrive.
1925 * In case we are the logically next packet, we update mdev->peer_seq
1926 * ourselves. Correctly handles 32bit wrap around.
1927 *
1928 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1929 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1930 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1931 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1932 *
1933 * returns 0 if we may process the packet,
1934 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
7be8da07 1935static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_seq)
b411b363
PR
1936{
1937 DEFINE_WAIT(wait);
b411b363 1938 long timeout;
7be8da07
AG
1939 int ret;
1940
1941 if (!need_peer_seq(mdev))
1942 return 0;
1943
b411b363
PR
1944 spin_lock(&mdev->peer_seq_lock);
1945 for (;;) {
7be8da07
AG
1946 if (!seq_greater(peer_seq - 1, mdev->peer_seq)) {
1947 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
1948 ret = 0;
b411b363 1949 break;
7be8da07 1950 }
b411b363
PR
1951 if (signal_pending(current)) {
1952 ret = -ERESTARTSYS;
1953 break;
1954 }
7be8da07 1955 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
b411b363 1956 spin_unlock(&mdev->peer_seq_lock);
44ed167d
PR
1957 rcu_read_lock();
1958 timeout = rcu_dereference(mdev->tconn->net_conf)->ping_timeo*HZ/10;
1959 rcu_read_unlock();
71b1c1eb 1960 timeout = schedule_timeout(timeout);
b411b363 1961 spin_lock(&mdev->peer_seq_lock);
7be8da07 1962 if (!timeout) {
b411b363 1963 ret = -ETIMEDOUT;
71b1c1eb 1964 dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n");
b411b363
PR
1965 break;
1966 }
1967 }
b411b363 1968 spin_unlock(&mdev->peer_seq_lock);
7be8da07 1969 finish_wait(&mdev->seq_wait, &wait);
b411b363
PR
1970 return ret;
1971}
1972
688593c5
LE
1973/* see also bio_flags_to_wire()
1974 * DRBD_REQ_*, because we need to semantically map the flags to data packet
1975 * flags and back. We may replicate to other kernel versions. */
1976static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
76d2e7ec 1977{
688593c5
LE
1978 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1979 (dpf & DP_FUA ? REQ_FUA : 0) |
1980 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
1981 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
76d2e7ec
PR
1982}
1983
7be8da07
AG
1984static void fail_postponed_requests(struct drbd_conf *mdev, sector_t sector,
1985 unsigned int size)
1986{
1987 struct drbd_interval *i;
1988
1989 repeat:
1990 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1991 struct drbd_request *req;
1992 struct bio_and_error m;
1993
1994 if (!i->local)
1995 continue;
1996 req = container_of(i, struct drbd_request, i);
1997 if (!(req->rq_state & RQ_POSTPONED))
1998 continue;
1999 req->rq_state &= ~RQ_POSTPONED;
2000 __req_mod(req, NEG_ACKED, &m);
2001 spin_unlock_irq(&mdev->tconn->req_lock);
2002 if (m.bio)
2003 complete_master_bio(mdev, &m);
2004 spin_lock_irq(&mdev->tconn->req_lock);
2005 goto repeat;
2006 }
2007}
2008
2009static int handle_write_conflicts(struct drbd_conf *mdev,
2010 struct drbd_peer_request *peer_req)
2011{
2012 struct drbd_tconn *tconn = mdev->tconn;
2013 bool resolve_conflicts = test_bit(DISCARD_CONCURRENT, &tconn->flags);
2014 sector_t sector = peer_req->i.sector;
2015 const unsigned int size = peer_req->i.size;
2016 struct drbd_interval *i;
2017 bool equal;
2018 int err;
2019
2020 /*
2021 * Inserting the peer request into the write_requests tree will prevent
2022 * new conflicting local requests from being added.
2023 */
2024 drbd_insert_interval(&mdev->write_requests, &peer_req->i);
2025
2026 repeat:
2027 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
2028 if (i == &peer_req->i)
2029 continue;
2030
2031 if (!i->local) {
2032 /*
2033 * Our peer has sent a conflicting remote request; this
2034 * should not happen in a two-node setup. Wait for the
2035 * earlier peer request to complete.
2036 */
2037 err = drbd_wait_misc(mdev, i);
2038 if (err)
2039 goto out;
2040 goto repeat;
2041 }
2042
2043 equal = i->sector == sector && i->size == size;
2044 if (resolve_conflicts) {
2045 /*
2046 * If the peer request is fully contained within the
2047 * overlapping request, it can be discarded; otherwise,
2048 * it will be retried once all overlapping requests
2049 * have completed.
2050 */
2051 bool discard = i->sector <= sector && i->sector +
2052 (i->size >> 9) >= sector + (size >> 9);
2053
2054 if (!equal)
2055 dev_alert(DEV, "Concurrent writes detected: "
2056 "local=%llus +%u, remote=%llus +%u, "
2057 "assuming %s came first\n",
2058 (unsigned long long)i->sector, i->size,
2059 (unsigned long long)sector, size,
2060 discard ? "local" : "remote");
2061
2062 inc_unacked(mdev);
2063 peer_req->w.cb = discard ? e_send_discard_write :
2064 e_send_retry_write;
2065 list_add_tail(&peer_req->w.list, &mdev->done_ee);
2066 wake_asender(mdev->tconn);
2067
2068 err = -ENOENT;
2069 goto out;
2070 } else {
2071 struct drbd_request *req =
2072 container_of(i, struct drbd_request, i);
2073
2074 if (!equal)
2075 dev_alert(DEV, "Concurrent writes detected: "
2076 "local=%llus +%u, remote=%llus +%u\n",
2077 (unsigned long long)i->sector, i->size,
2078 (unsigned long long)sector, size);
2079
2080 if (req->rq_state & RQ_LOCAL_PENDING ||
2081 !(req->rq_state & RQ_POSTPONED)) {
2082 /*
2083 * Wait for the node with the discard flag to
2084 * decide if this request will be discarded or
2085 * retried. Requests that are discarded will
2086 * disappear from the write_requests tree.
2087 *
2088 * In addition, wait for the conflicting
2089 * request to finish locally before submitting
2090 * the conflicting peer request.
2091 */
2092 err = drbd_wait_misc(mdev, &req->i);
2093 if (err) {
2094 _conn_request_state(mdev->tconn,
2095 NS(conn, C_TIMEOUT),
2096 CS_HARD);
2097 fail_postponed_requests(mdev, sector, size);
2098 goto out;
2099 }
2100 goto repeat;
2101 }
2102 /*
2103 * Remember to restart the conflicting requests after
2104 * the new peer request has completed.
2105 */
2106 peer_req->flags |= EE_RESTART_REQUESTS;
2107 }
2108 }
2109 err = 0;
2110
2111 out:
2112 if (err)
2113 drbd_remove_epoch_entry_interval(mdev, peer_req);
2114 return err;
2115}
2116
b411b363 2117/* mirrored write */
4a76b161 2118static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 2119{
4a76b161 2120 struct drbd_conf *mdev;
b411b363 2121 sector_t sector;
db830c46 2122 struct drbd_peer_request *peer_req;
e658983a 2123 struct p_data *p = pi->data;
7be8da07 2124 u32 peer_seq = be32_to_cpu(p->seq_num);
b411b363
PR
2125 int rw = WRITE;
2126 u32 dp_flags;
302bdeae 2127 int err, tp;
b411b363 2128
4a76b161
AG
2129 mdev = vnr_to_mdev(tconn, pi->vnr);
2130 if (!mdev)
2131 return -EIO;
2132
7be8da07 2133 if (!get_ldev(mdev)) {
82bc0194
AG
2134 int err2;
2135
7be8da07 2136 err = wait_for_and_update_peer_seq(mdev, peer_seq);
e2857216 2137 drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size);
12038a3a 2138 atomic_inc(&tconn->current_epoch->epoch_size);
e2857216 2139 err2 = drbd_drain_block(mdev, pi->size);
82bc0194
AG
2140 if (!err)
2141 err = err2;
2142 return err;
b411b363
PR
2143 }
2144
fcefa62e
AG
2145 /*
2146 * Corresponding put_ldev done either below (on various errors), or in
2147 * drbd_peer_request_endio, if we successfully submit the data at the
2148 * end of this function.
2149 */
b411b363
PR
2150
2151 sector = be64_to_cpu(p->sector);
e2857216 2152 peer_req = read_in_block(mdev, p->block_id, sector, pi->size);
db830c46 2153 if (!peer_req) {
b411b363 2154 put_ldev(mdev);
82bc0194 2155 return -EIO;
b411b363
PR
2156 }
2157
db830c46 2158 peer_req->w.cb = e_end_block;
b411b363 2159
688593c5
LE
2160 dp_flags = be32_to_cpu(p->dp_flags);
2161 rw |= wire_flags_to_bio(mdev, dp_flags);
2162
2163 if (dp_flags & DP_MAY_SET_IN_SYNC)
db830c46 2164 peer_req->flags |= EE_MAY_SET_IN_SYNC;
688593c5 2165
12038a3a
PR
2166 spin_lock(&tconn->epoch_lock);
2167 peer_req->epoch = tconn->current_epoch;
db830c46
AG
2168 atomic_inc(&peer_req->epoch->epoch_size);
2169 atomic_inc(&peer_req->epoch->active);
12038a3a 2170 spin_unlock(&tconn->epoch_lock);
b411b363 2171
302bdeae
PR
2172 rcu_read_lock();
2173 tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
2174 rcu_read_unlock();
2175 if (tp) {
2176 peer_req->flags |= EE_IN_INTERVAL_TREE;
7be8da07
AG
2177 err = wait_for_and_update_peer_seq(mdev, peer_seq);
2178 if (err)
b411b363 2179 goto out_interrupted;
87eeee41 2180 spin_lock_irq(&mdev->tconn->req_lock);
7be8da07
AG
2181 err = handle_write_conflicts(mdev, peer_req);
2182 if (err) {
2183 spin_unlock_irq(&mdev->tconn->req_lock);
2184 if (err == -ENOENT) {
b411b363 2185 put_ldev(mdev);
82bc0194 2186 return 0;
b411b363 2187 }
7be8da07 2188 goto out_interrupted;
b411b363 2189 }
7be8da07
AG
2190 } else
2191 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 2192 list_add(&peer_req->w.list, &mdev->active_ee);
87eeee41 2193 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 2194
303d1448 2195 if (mdev->tconn->agreed_pro_version < 100) {
44ed167d
PR
2196 rcu_read_lock();
2197 switch (rcu_dereference(mdev->tconn->net_conf)->wire_protocol) {
303d1448
PR
2198 case DRBD_PROT_C:
2199 dp_flags |= DP_SEND_WRITE_ACK;
2200 break;
2201 case DRBD_PROT_B:
2202 dp_flags |= DP_SEND_RECEIVE_ACK;
2203 break;
2204 }
44ed167d 2205 rcu_read_unlock();
303d1448
PR
2206 }
2207
2208 if (dp_flags & DP_SEND_WRITE_ACK) {
2209 peer_req->flags |= EE_SEND_WRITE_ACK;
b411b363
PR
2210 inc_unacked(mdev);
2211 /* corresponding dec_unacked() in e_end_block()
2212 * respective _drbd_clear_done_ee */
303d1448
PR
2213 }
2214
2215 if (dp_flags & DP_SEND_RECEIVE_ACK) {
b411b363
PR
2216 /* I really don't like it that the receiver thread
2217 * sends on the msock, but anyways */
db830c46 2218 drbd_send_ack(mdev, P_RECV_ACK, peer_req);
b411b363
PR
2219 }
2220
6719fb03 2221 if (mdev->state.pdsk < D_INCONSISTENT) {
b411b363 2222 /* In case we have the only disk of the cluster, */
db830c46
AG
2223 drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
2224 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2225 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
181286ad 2226 drbd_al_begin_io(mdev, &peer_req->i);
b411b363
PR
2227 }
2228
82bc0194
AG
2229 err = drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR);
2230 if (!err)
2231 return 0;
b411b363 2232
10f6d992
LE
2233 /* don't care for the reason here */
2234 dev_err(DEV, "submit failed, triggering re-connect\n");
87eeee41 2235 spin_lock_irq(&mdev->tconn->req_lock);
db830c46
AG
2236 list_del(&peer_req->w.list);
2237 drbd_remove_epoch_entry_interval(mdev, peer_req);
87eeee41 2238 spin_unlock_irq(&mdev->tconn->req_lock);
db830c46 2239 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
181286ad 2240 drbd_al_complete_io(mdev, &peer_req->i);
22cc37a9 2241
b411b363 2242out_interrupted:
1e9dd291 2243 drbd_may_finish_epoch(tconn, peer_req->epoch, EV_PUT + EV_CLEANUP);
b411b363 2244 put_ldev(mdev);
3967deb1 2245 drbd_free_peer_req(mdev, peer_req);
82bc0194 2246 return err;
b411b363
PR
2247}
2248
0f0601f4
LE
2249/* We may throttle resync, if the lower device seems to be busy,
2250 * and current sync rate is above c_min_rate.
2251 *
2252 * To decide whether or not the lower device is busy, we use a scheme similar
2253 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2254 * (more than 64 sectors) of activity we cannot account for with our own resync
2255 * activity, it obviously is "busy".
2256 *
2257 * The current sync rate used here uses only the most recent two step marks,
2258 * to have a short time average so we can react faster.
2259 */
e3555d85 2260int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
0f0601f4
LE
2261{
2262 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
2263 unsigned long db, dt, dbdt;
e3555d85 2264 struct lc_element *tmp;
0f0601f4
LE
2265 int curr_events;
2266 int throttle = 0;
daeda1cc
PR
2267 unsigned int c_min_rate;
2268
2269 rcu_read_lock();
2270 c_min_rate = rcu_dereference(mdev->ldev->disk_conf)->c_min_rate;
2271 rcu_read_unlock();
0f0601f4
LE
2272
2273 /* feature disabled? */
daeda1cc 2274 if (c_min_rate == 0)
0f0601f4
LE
2275 return 0;
2276
e3555d85
PR
2277 spin_lock_irq(&mdev->al_lock);
2278 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
2279 if (tmp) {
2280 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2281 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
2282 spin_unlock_irq(&mdev->al_lock);
2283 return 0;
2284 }
2285 /* Do not slow down if app IO is already waiting for this extent */
2286 }
2287 spin_unlock_irq(&mdev->al_lock);
2288
0f0601f4
LE
2289 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2290 (int)part_stat_read(&disk->part0, sectors[1]) -
2291 atomic_read(&mdev->rs_sect_ev);
e3555d85 2292
0f0601f4
LE
2293 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
2294 unsigned long rs_left;
2295 int i;
2296
2297 mdev->rs_last_events = curr_events;
2298
2299 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2300 * approx. */
2649f080
LE
2301 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
2302
2303 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
2304 rs_left = mdev->ov_left;
2305 else
2306 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
0f0601f4
LE
2307
2308 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
2309 if (!dt)
2310 dt++;
2311 db = mdev->rs_mark_left[i] - rs_left;
2312 dbdt = Bit2KB(db/dt);
2313
daeda1cc 2314 if (dbdt > c_min_rate)
0f0601f4
LE
2315 throttle = 1;
2316 }
2317 return throttle;
2318}
2319
2320
4a76b161 2321static int receive_DataRequest(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 2322{
4a76b161 2323 struct drbd_conf *mdev;
b411b363 2324 sector_t sector;
4a76b161 2325 sector_t capacity;
db830c46 2326 struct drbd_peer_request *peer_req;
b411b363 2327 struct digest_info *di = NULL;
b18b37be 2328 int size, verb;
b411b363 2329 unsigned int fault_type;
e658983a 2330 struct p_block_req *p = pi->data;
4a76b161
AG
2331
2332 mdev = vnr_to_mdev(tconn, pi->vnr);
2333 if (!mdev)
2334 return -EIO;
2335 capacity = drbd_get_capacity(mdev->this_bdev);
b411b363
PR
2336
2337 sector = be64_to_cpu(p->sector);
2338 size = be32_to_cpu(p->blksize);
2339
c670a398 2340 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
b411b363
PR
2341 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2342 (unsigned long long)sector, size);
82bc0194 2343 return -EINVAL;
b411b363
PR
2344 }
2345 if (sector + (size>>9) > capacity) {
2346 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2347 (unsigned long long)sector, size);
82bc0194 2348 return -EINVAL;
b411b363
PR
2349 }
2350
2351 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
b18b37be 2352 verb = 1;
e2857216 2353 switch (pi->cmd) {
b18b37be
PR
2354 case P_DATA_REQUEST:
2355 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2356 break;
2357 case P_RS_DATA_REQUEST:
2358 case P_CSUM_RS_REQUEST:
2359 case P_OV_REQUEST:
2360 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2361 break;
2362 case P_OV_REPLY:
2363 verb = 0;
2364 dec_rs_pending(mdev);
2365 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2366 break;
2367 default:
49ba9b1b 2368 BUG();
b18b37be
PR
2369 }
2370 if (verb && __ratelimit(&drbd_ratelimit_state))
b411b363
PR
2371 dev_err(DEV, "Can not satisfy peer's read request, "
2372 "no local data.\n");
b18b37be 2373
a821cc4a 2374 /* drain possibly payload */
e2857216 2375 return drbd_drain_block(mdev, pi->size);
b411b363
PR
2376 }
2377
2378 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2379 * "criss-cross" setup, that might cause write-out on some other DRBD,
2380 * which in turn might block on the other node at this very place. */
0db55363 2381 peer_req = drbd_alloc_peer_req(mdev, p->block_id, sector, size, GFP_NOIO);
db830c46 2382 if (!peer_req) {
b411b363 2383 put_ldev(mdev);
82bc0194 2384 return -ENOMEM;
b411b363
PR
2385 }
2386
e2857216 2387 switch (pi->cmd) {
b411b363 2388 case P_DATA_REQUEST:
db830c46 2389 peer_req->w.cb = w_e_end_data_req;
b411b363 2390 fault_type = DRBD_FAULT_DT_RD;
80a40e43
LE
2391 /* application IO, don't drbd_rs_begin_io */
2392 goto submit;
2393
b411b363 2394 case P_RS_DATA_REQUEST:
db830c46 2395 peer_req->w.cb = w_e_end_rsdata_req;
b411b363 2396 fault_type = DRBD_FAULT_RS_RD;
5f9915bb
LE
2397 /* used in the sector offset progress display */
2398 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
b411b363
PR
2399 break;
2400
2401 case P_OV_REPLY:
2402 case P_CSUM_RS_REQUEST:
2403 fault_type = DRBD_FAULT_RS_RD;
e2857216 2404 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
b411b363
PR
2405 if (!di)
2406 goto out_free_e;
2407
e2857216 2408 di->digest_size = pi->size;
b411b363
PR
2409 di->digest = (((char *)di)+sizeof(struct digest_info));
2410
db830c46
AG
2411 peer_req->digest = di;
2412 peer_req->flags |= EE_HAS_DIGEST;
c36c3ced 2413
e2857216 2414 if (drbd_recv_all(mdev->tconn, di->digest, pi->size))
b411b363
PR
2415 goto out_free_e;
2416
e2857216 2417 if (pi->cmd == P_CSUM_RS_REQUEST) {
31890f4a 2418 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
db830c46 2419 peer_req->w.cb = w_e_end_csum_rs_req;
5f9915bb
LE
2420 /* used in the sector offset progress display */
2421 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
e2857216 2422 } else if (pi->cmd == P_OV_REPLY) {
2649f080
LE
2423 /* track progress, we may need to throttle */
2424 atomic_add(size >> 9, &mdev->rs_sect_in);
db830c46 2425 peer_req->w.cb = w_e_end_ov_reply;
b411b363 2426 dec_rs_pending(mdev);
0f0601f4
LE
2427 /* drbd_rs_begin_io done when we sent this request,
2428 * but accounting still needs to be done. */
2429 goto submit_for_resync;
b411b363
PR
2430 }
2431 break;
2432
2433 case P_OV_REQUEST:
b411b363 2434 if (mdev->ov_start_sector == ~(sector_t)0 &&
31890f4a 2435 mdev->tconn->agreed_pro_version >= 90) {
de228bba
LE
2436 unsigned long now = jiffies;
2437 int i;
b411b363
PR
2438 mdev->ov_start_sector = sector;
2439 mdev->ov_position = sector;
30b743a2
LE
2440 mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
2441 mdev->rs_total = mdev->ov_left;
de228bba
LE
2442 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2443 mdev->rs_mark_left[i] = mdev->ov_left;
2444 mdev->rs_mark_time[i] = now;
2445 }
b411b363
PR
2446 dev_info(DEV, "Online Verify start sector: %llu\n",
2447 (unsigned long long)sector);
2448 }
db830c46 2449 peer_req->w.cb = w_e_end_ov_req;
b411b363 2450 fault_type = DRBD_FAULT_RS_RD;
b411b363
PR
2451 break;
2452
b411b363 2453 default:
49ba9b1b 2454 BUG();
b411b363
PR
2455 }
2456
0f0601f4
LE
2457 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2458 * wrt the receiver, but it is not as straightforward as it may seem.
2459 * Various places in the resync start and stop logic assume resync
2460 * requests are processed in order, requeuing this on the worker thread
2461 * introduces a bunch of new code for synchronization between threads.
2462 *
2463 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2464 * "forever", throttling after drbd_rs_begin_io will lock that extent
2465 * for application writes for the same time. For now, just throttle
2466 * here, where the rest of the code expects the receiver to sleep for
2467 * a while, anyways.
2468 */
2469
2470 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2471 * this defers syncer requests for some time, before letting at least
2472 * on request through. The resync controller on the receiving side
2473 * will adapt to the incoming rate accordingly.
2474 *
2475 * We cannot throttle here if remote is Primary/SyncTarget:
2476 * we would also throttle its application reads.
2477 * In that case, throttling is done on the SyncTarget only.
2478 */
e3555d85
PR
2479 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2480 schedule_timeout_uninterruptible(HZ/10);
2481 if (drbd_rs_begin_io(mdev, sector))
80a40e43 2482 goto out_free_e;
b411b363 2483
0f0601f4
LE
2484submit_for_resync:
2485 atomic_add(size >> 9, &mdev->rs_sect_ev);
2486
80a40e43 2487submit:
b411b363 2488 inc_unacked(mdev);
87eeee41 2489 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 2490 list_add_tail(&peer_req->w.list, &mdev->read_ee);
87eeee41 2491 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 2492
fbe29dec 2493 if (drbd_submit_peer_request(mdev, peer_req, READ, fault_type) == 0)
82bc0194 2494 return 0;
b411b363 2495
10f6d992
LE
2496 /* don't care for the reason here */
2497 dev_err(DEV, "submit failed, triggering re-connect\n");
87eeee41 2498 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 2499 list_del(&peer_req->w.list);
87eeee41 2500 spin_unlock_irq(&mdev->tconn->req_lock);
22cc37a9
LE
2501 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2502
b411b363 2503out_free_e:
b411b363 2504 put_ldev(mdev);
3967deb1 2505 drbd_free_peer_req(mdev, peer_req);
82bc0194 2506 return -EIO;
b411b363
PR
2507}
2508
2509static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2510{
2511 int self, peer, rv = -100;
2512 unsigned long ch_self, ch_peer;
44ed167d 2513 enum drbd_after_sb_p after_sb_0p;
b411b363
PR
2514
2515 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2516 peer = mdev->p_uuid[UI_BITMAP] & 1;
2517
2518 ch_peer = mdev->p_uuid[UI_SIZE];
2519 ch_self = mdev->comm_bm_set;
2520
44ed167d
PR
2521 rcu_read_lock();
2522 after_sb_0p = rcu_dereference(mdev->tconn->net_conf)->after_sb_0p;
2523 rcu_read_unlock();
2524 switch (after_sb_0p) {
b411b363
PR
2525 case ASB_CONSENSUS:
2526 case ASB_DISCARD_SECONDARY:
2527 case ASB_CALL_HELPER:
44ed167d 2528 case ASB_VIOLENTLY:
b411b363
PR
2529 dev_err(DEV, "Configuration error.\n");
2530 break;
2531 case ASB_DISCONNECT:
2532 break;
2533 case ASB_DISCARD_YOUNGER_PRI:
2534 if (self == 0 && peer == 1) {
2535 rv = -1;
2536 break;
2537 }
2538 if (self == 1 && peer == 0) {
2539 rv = 1;
2540 break;
2541 }
2542 /* Else fall through to one of the other strategies... */
2543 case ASB_DISCARD_OLDER_PRI:
2544 if (self == 0 && peer == 1) {
2545 rv = 1;
2546 break;
2547 }
2548 if (self == 1 && peer == 0) {
2549 rv = -1;
2550 break;
2551 }
2552 /* Else fall through to one of the other strategies... */
ad19bf6e 2553 dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
b411b363
PR
2554 "Using discard-least-changes instead\n");
2555 case ASB_DISCARD_ZERO_CHG:
2556 if (ch_peer == 0 && ch_self == 0) {
25703f83 2557 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
b411b363
PR
2558 ? -1 : 1;
2559 break;
2560 } else {
2561 if (ch_peer == 0) { rv = 1; break; }
2562 if (ch_self == 0) { rv = -1; break; }
2563 }
44ed167d 2564 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
b411b363
PR
2565 break;
2566 case ASB_DISCARD_LEAST_CHG:
2567 if (ch_self < ch_peer)
2568 rv = -1;
2569 else if (ch_self > ch_peer)
2570 rv = 1;
2571 else /* ( ch_self == ch_peer ) */
2572 /* Well, then use something else. */
25703f83 2573 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
b411b363
PR
2574 ? -1 : 1;
2575 break;
2576 case ASB_DISCARD_LOCAL:
2577 rv = -1;
2578 break;
2579 case ASB_DISCARD_REMOTE:
2580 rv = 1;
2581 }
2582
2583 return rv;
2584}
2585
2586static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2587{
6184ea21 2588 int hg, rv = -100;
44ed167d 2589 enum drbd_after_sb_p after_sb_1p;
b411b363 2590
44ed167d
PR
2591 rcu_read_lock();
2592 after_sb_1p = rcu_dereference(mdev->tconn->net_conf)->after_sb_1p;
2593 rcu_read_unlock();
2594 switch (after_sb_1p) {
b411b363
PR
2595 case ASB_DISCARD_YOUNGER_PRI:
2596 case ASB_DISCARD_OLDER_PRI:
2597 case ASB_DISCARD_LEAST_CHG:
2598 case ASB_DISCARD_LOCAL:
2599 case ASB_DISCARD_REMOTE:
44ed167d 2600 case ASB_DISCARD_ZERO_CHG:
b411b363
PR
2601 dev_err(DEV, "Configuration error.\n");
2602 break;
2603 case ASB_DISCONNECT:
2604 break;
2605 case ASB_CONSENSUS:
2606 hg = drbd_asb_recover_0p(mdev);
2607 if (hg == -1 && mdev->state.role == R_SECONDARY)
2608 rv = hg;
2609 if (hg == 1 && mdev->state.role == R_PRIMARY)
2610 rv = hg;
2611 break;
2612 case ASB_VIOLENTLY:
2613 rv = drbd_asb_recover_0p(mdev);
2614 break;
2615 case ASB_DISCARD_SECONDARY:
2616 return mdev->state.role == R_PRIMARY ? 1 : -1;
2617 case ASB_CALL_HELPER:
2618 hg = drbd_asb_recover_0p(mdev);
2619 if (hg == -1 && mdev->state.role == R_PRIMARY) {
bb437946
AG
2620 enum drbd_state_rv rv2;
2621
2622 drbd_set_role(mdev, R_SECONDARY, 0);
b411b363
PR
2623 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2624 * we might be here in C_WF_REPORT_PARAMS which is transient.
2625 * we do not need to wait for the after state change work either. */
bb437946
AG
2626 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2627 if (rv2 != SS_SUCCESS) {
b411b363
PR
2628 drbd_khelper(mdev, "pri-lost-after-sb");
2629 } else {
2630 dev_warn(DEV, "Successfully gave up primary role.\n");
2631 rv = hg;
2632 }
2633 } else
2634 rv = hg;
2635 }
2636
2637 return rv;
2638}
2639
2640static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2641{
6184ea21 2642 int hg, rv = -100;
44ed167d 2643 enum drbd_after_sb_p after_sb_2p;
b411b363 2644
44ed167d
PR
2645 rcu_read_lock();
2646 after_sb_2p = rcu_dereference(mdev->tconn->net_conf)->after_sb_2p;
2647 rcu_read_unlock();
2648 switch (after_sb_2p) {
b411b363
PR
2649 case ASB_DISCARD_YOUNGER_PRI:
2650 case ASB_DISCARD_OLDER_PRI:
2651 case ASB_DISCARD_LEAST_CHG:
2652 case ASB_DISCARD_LOCAL:
2653 case ASB_DISCARD_REMOTE:
2654 case ASB_CONSENSUS:
2655 case ASB_DISCARD_SECONDARY:
44ed167d 2656 case ASB_DISCARD_ZERO_CHG:
b411b363
PR
2657 dev_err(DEV, "Configuration error.\n");
2658 break;
2659 case ASB_VIOLENTLY:
2660 rv = drbd_asb_recover_0p(mdev);
2661 break;
2662 case ASB_DISCONNECT:
2663 break;
2664 case ASB_CALL_HELPER:
2665 hg = drbd_asb_recover_0p(mdev);
2666 if (hg == -1) {
bb437946
AG
2667 enum drbd_state_rv rv2;
2668
b411b363
PR
2669 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2670 * we might be here in C_WF_REPORT_PARAMS which is transient.
2671 * we do not need to wait for the after state change work either. */
bb437946
AG
2672 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2673 if (rv2 != SS_SUCCESS) {
b411b363
PR
2674 drbd_khelper(mdev, "pri-lost-after-sb");
2675 } else {
2676 dev_warn(DEV, "Successfully gave up primary role.\n");
2677 rv = hg;
2678 }
2679 } else
2680 rv = hg;
2681 }
2682
2683 return rv;
2684}
2685
2686static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2687 u64 bits, u64 flags)
2688{
2689 if (!uuid) {
2690 dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2691 return;
2692 }
2693 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2694 text,
2695 (unsigned long long)uuid[UI_CURRENT],
2696 (unsigned long long)uuid[UI_BITMAP],
2697 (unsigned long long)uuid[UI_HISTORY_START],
2698 (unsigned long long)uuid[UI_HISTORY_END],
2699 (unsigned long long)bits,
2700 (unsigned long long)flags);
2701}
2702
2703/*
2704 100 after split brain try auto recover
2705 2 C_SYNC_SOURCE set BitMap
2706 1 C_SYNC_SOURCE use BitMap
2707 0 no Sync
2708 -1 C_SYNC_TARGET use BitMap
2709 -2 C_SYNC_TARGET set BitMap
2710 -100 after split brain, disconnect
2711-1000 unrelated data
4a23f264
PR
2712-1091 requires proto 91
2713-1096 requires proto 96
b411b363
PR
2714 */
2715static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2716{
2717 u64 self, peer;
2718 int i, j;
2719
2720 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2721 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2722
2723 *rule_nr = 10;
2724 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2725 return 0;
2726
2727 *rule_nr = 20;
2728 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2729 peer != UUID_JUST_CREATED)
2730 return -2;
2731
2732 *rule_nr = 30;
2733 if (self != UUID_JUST_CREATED &&
2734 (peer == UUID_JUST_CREATED || peer == (u64)0))
2735 return 2;
2736
2737 if (self == peer) {
2738 int rct, dc; /* roles at crash time */
2739
2740 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2741
31890f4a 2742 if (mdev->tconn->agreed_pro_version < 91)
4a23f264 2743 return -1091;
b411b363
PR
2744
2745 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2746 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2747 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
2748 drbd_uuid_set_bm(mdev, 0UL);
2749
2750 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2751 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2752 *rule_nr = 34;
2753 } else {
2754 dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2755 *rule_nr = 36;
2756 }
2757
2758 return 1;
2759 }
2760
2761 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2762
31890f4a 2763 if (mdev->tconn->agreed_pro_version < 91)
4a23f264 2764 return -1091;
b411b363
PR
2765
2766 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2767 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
2768 dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2769
2770 mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
2771 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
2772 mdev->p_uuid[UI_BITMAP] = 0UL;
2773
2774 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2775 *rule_nr = 35;
2776 } else {
2777 dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2778 *rule_nr = 37;
2779 }
2780
2781 return -1;
2782 }
2783
2784 /* Common power [off|failure] */
2785 rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
2786 (mdev->p_uuid[UI_FLAGS] & 2);
2787 /* lowest bit is set when we were primary,
2788 * next bit (weight 2) is set when peer was primary */
2789 *rule_nr = 40;
2790
2791 switch (rct) {
2792 case 0: /* !self_pri && !peer_pri */ return 0;
2793 case 1: /* self_pri && !peer_pri */ return 1;
2794 case 2: /* !self_pri && peer_pri */ return -1;
2795 case 3: /* self_pri && peer_pri */
25703f83 2796 dc = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
b411b363
PR
2797 return dc ? -1 : 1;
2798 }
2799 }
2800
2801 *rule_nr = 50;
2802 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2803 if (self == peer)
2804 return -1;
2805
2806 *rule_nr = 51;
2807 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2808 if (self == peer) {
31890f4a 2809 if (mdev->tconn->agreed_pro_version < 96 ?
4a23f264
PR
2810 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2811 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2812 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
b411b363
PR
2813 /* The last P_SYNC_UUID did not get though. Undo the last start of
2814 resync as sync source modifications of the peer's UUIDs. */
2815
31890f4a 2816 if (mdev->tconn->agreed_pro_version < 91)
4a23f264 2817 return -1091;
b411b363
PR
2818
2819 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2820 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
4a23f264
PR
2821
2822 dev_info(DEV, "Did not got last syncUUID packet, corrected:\n");
2823 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2824
b411b363
PR
2825 return -1;
2826 }
2827 }
2828
2829 *rule_nr = 60;
2830 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2831 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2832 peer = mdev->p_uuid[i] & ~((u64)1);
2833 if (self == peer)
2834 return -2;
2835 }
2836
2837 *rule_nr = 70;
2838 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2839 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2840 if (self == peer)
2841 return 1;
2842
2843 *rule_nr = 71;
2844 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2845 if (self == peer) {
31890f4a 2846 if (mdev->tconn->agreed_pro_version < 96 ?
4a23f264
PR
2847 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2848 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2849 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
b411b363
PR
2850 /* The last P_SYNC_UUID did not get though. Undo the last start of
2851 resync as sync source modifications of our UUIDs. */
2852
31890f4a 2853 if (mdev->tconn->agreed_pro_version < 91)
4a23f264 2854 return -1091;
b411b363
PR
2855
2856 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2857 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2858
4a23f264 2859 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
b411b363
PR
2860 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2861 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2862
2863 return 1;
2864 }
2865 }
2866
2867
2868 *rule_nr = 80;
d8c2a36b 2869 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
b411b363
PR
2870 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2871 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2872 if (self == peer)
2873 return 2;
2874 }
2875
2876 *rule_nr = 90;
2877 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2878 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2879 if (self == peer && self != ((u64)0))
2880 return 100;
2881
2882 *rule_nr = 100;
2883 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2884 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2885 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
2886 peer = mdev->p_uuid[j] & ~((u64)1);
2887 if (self == peer)
2888 return -100;
2889 }
2890 }
2891
2892 return -1000;
2893}
2894
2895/* drbd_sync_handshake() returns the new conn state on success, or
2896 CONN_MASK (-1) on failure.
2897 */
2898static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2899 enum drbd_disk_state peer_disk) __must_hold(local)
2900{
b411b363
PR
2901 enum drbd_conns rv = C_MASK;
2902 enum drbd_disk_state mydisk;
44ed167d 2903 struct net_conf *nc;
6dff2902 2904 int hg, rule_nr, rr_conflict, tentative;
b411b363
PR
2905
2906 mydisk = mdev->state.disk;
2907 if (mydisk == D_NEGOTIATING)
2908 mydisk = mdev->new_state_tmp.disk;
2909
2910 dev_info(DEV, "drbd_sync_handshake:\n");
2911 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2912 drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2913 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2914
2915 hg = drbd_uuid_compare(mdev, &rule_nr);
2916
2917 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2918
2919 if (hg == -1000) {
2920 dev_alert(DEV, "Unrelated data, aborting!\n");
2921 return C_MASK;
2922 }
4a23f264
PR
2923 if (hg < -1000) {
2924 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
b411b363
PR
2925 return C_MASK;
2926 }
2927
2928 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2929 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
2930 int f = (hg == -100) || abs(hg) == 2;
2931 hg = mydisk > D_INCONSISTENT ? 1 : -1;
2932 if (f)
2933 hg = hg*2;
2934 dev_info(DEV, "Becoming sync %s due to disk states.\n",
2935 hg > 0 ? "source" : "target");
2936 }
2937
3a11a487
AG
2938 if (abs(hg) == 100)
2939 drbd_khelper(mdev, "initial-split-brain");
2940
44ed167d
PR
2941 rcu_read_lock();
2942 nc = rcu_dereference(mdev->tconn->net_conf);
2943
2944 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
b411b363
PR
2945 int pcount = (mdev->state.role == R_PRIMARY)
2946 + (peer_role == R_PRIMARY);
2947 int forced = (hg == -100);
2948
2949 switch (pcount) {
2950 case 0:
2951 hg = drbd_asb_recover_0p(mdev);
2952 break;
2953 case 1:
2954 hg = drbd_asb_recover_1p(mdev);
2955 break;
2956 case 2:
2957 hg = drbd_asb_recover_2p(mdev);
2958 break;
2959 }
2960 if (abs(hg) < 100) {
2961 dev_warn(DEV, "Split-Brain detected, %d primaries, "
2962 "automatically solved. Sync from %s node\n",
2963 pcount, (hg < 0) ? "peer" : "this");
2964 if (forced) {
2965 dev_warn(DEV, "Doing a full sync, since"
2966 " UUIDs where ambiguous.\n");
2967 hg = hg*2;
2968 }
2969 }
2970 }
2971
2972 if (hg == -100) {
08b165ba 2973 if (test_bit(DISCARD_MY_DATA, &mdev->flags) && !(mdev->p_uuid[UI_FLAGS]&1))
b411b363 2974 hg = -1;
08b165ba 2975 if (!test_bit(DISCARD_MY_DATA, &mdev->flags) && (mdev->p_uuid[UI_FLAGS]&1))
b411b363
PR
2976 hg = 1;
2977
2978 if (abs(hg) < 100)
2979 dev_warn(DEV, "Split-Brain detected, manually solved. "
2980 "Sync from %s node\n",
2981 (hg < 0) ? "peer" : "this");
2982 }
44ed167d 2983 rr_conflict = nc->rr_conflict;
6dff2902 2984 tentative = nc->tentative;
44ed167d 2985 rcu_read_unlock();
b411b363
PR
2986
2987 if (hg == -100) {
580b9767
LE
2988 /* FIXME this log message is not correct if we end up here
2989 * after an attempted attach on a diskless node.
2990 * We just refuse to attach -- well, we drop the "connection"
2991 * to that disk, in a way... */
3a11a487 2992 dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
b411b363
PR
2993 drbd_khelper(mdev, "split-brain");
2994 return C_MASK;
2995 }
2996
2997 if (hg > 0 && mydisk <= D_INCONSISTENT) {
2998 dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
2999 return C_MASK;
3000 }
3001
3002 if (hg < 0 && /* by intention we do not use mydisk here. */
3003 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
44ed167d 3004 switch (rr_conflict) {
b411b363
PR
3005 case ASB_CALL_HELPER:
3006 drbd_khelper(mdev, "pri-lost");
3007 /* fall through */
3008 case ASB_DISCONNECT:
3009 dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
3010 return C_MASK;
3011 case ASB_VIOLENTLY:
3012 dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
3013 "assumption\n");
3014 }
3015 }
3016
6dff2902 3017 if (tentative || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) {
cf14c2e9
PR
3018 if (hg == 0)
3019 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
3020 else
3021 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
3022 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3023 abs(hg) >= 2 ? "full" : "bit-map based");
3024 return C_MASK;
3025 }
3026
b411b363
PR
3027 if (abs(hg) >= 2) {
3028 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
20ceb2b2
LE
3029 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
3030 BM_LOCKED_SET_ALLOWED))
b411b363
PR
3031 return C_MASK;
3032 }
3033
3034 if (hg > 0) { /* become sync source. */
3035 rv = C_WF_BITMAP_S;
3036 } else if (hg < 0) { /* become sync target */
3037 rv = C_WF_BITMAP_T;
3038 } else {
3039 rv = C_CONNECTED;
3040 if (drbd_bm_total_weight(mdev)) {
3041 dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
3042 drbd_bm_total_weight(mdev));
3043 }
3044 }
3045
3046 return rv;
3047}
3048
f179d76d 3049static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
b411b363
PR
3050{
3051 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
f179d76d
PR
3052 if (peer == ASB_DISCARD_REMOTE)
3053 return ASB_DISCARD_LOCAL;
b411b363
PR
3054
3055 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
f179d76d
PR
3056 if (peer == ASB_DISCARD_LOCAL)
3057 return ASB_DISCARD_REMOTE;
b411b363
PR
3058
3059 /* everything else is valid if they are equal on both sides. */
f179d76d 3060 return peer;
b411b363
PR
3061}
3062
e2857216 3063static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 3064{
e658983a 3065 struct p_protocol *p = pi->data;
036b17ea
PR
3066 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3067 int p_proto, p_discard_my_data, p_two_primaries, cf;
3068 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3069 char integrity_alg[SHARED_SECRET_MAX] = "";
accdbcc5 3070 struct crypto_hash *peer_integrity_tfm = NULL;
7aca6c75 3071 void *int_dig_in = NULL, *int_dig_vv = NULL;
b411b363 3072
b411b363
PR
3073 p_proto = be32_to_cpu(p->protocol);
3074 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3075 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3076 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
b411b363 3077 p_two_primaries = be32_to_cpu(p->two_primaries);
cf14c2e9 3078 cf = be32_to_cpu(p->conn_flags);
6139f60d 3079 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
cf14c2e9 3080
86db0618
AG
3081 if (tconn->agreed_pro_version >= 87) {
3082 int err;
3083
88104ca4 3084 if (pi->size > sizeof(integrity_alg))
86db0618 3085 return -EIO;
88104ca4 3086 err = drbd_recv_all(tconn, integrity_alg, pi->size);
86db0618
AG
3087 if (err)
3088 return err;
036b17ea
PR
3089 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
3090 }
88104ca4 3091
7d4c782c 3092 if (pi->cmd != P_PROTOCOL_UPDATE) {
fbc12f45 3093 clear_bit(CONN_DRY_RUN, &tconn->flags);
036b17ea 3094
fbc12f45
AG
3095 if (cf & CF_DRY_RUN)
3096 set_bit(CONN_DRY_RUN, &tconn->flags);
cf14c2e9 3097
fbc12f45
AG
3098 rcu_read_lock();
3099 nc = rcu_dereference(tconn->net_conf);
b411b363 3100
fbc12f45 3101 if (p_proto != nc->wire_protocol) {
d505d9be 3102 conn_err(tconn, "incompatible %s settings\n", "protocol");
fbc12f45
AG
3103 goto disconnect_rcu_unlock;
3104 }
44ed167d 3105
fbc12f45 3106 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
d505d9be 3107 conn_err(tconn, "incompatible %s settings\n", "after-sb-0pri");
fbc12f45
AG
3108 goto disconnect_rcu_unlock;
3109 }
b411b363 3110
fbc12f45 3111 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
d505d9be 3112 conn_err(tconn, "incompatible %s settings\n", "after-sb-1pri");
fbc12f45
AG
3113 goto disconnect_rcu_unlock;
3114 }
b411b363 3115
fbc12f45 3116 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
d505d9be 3117 conn_err(tconn, "incompatible %s settings\n", "after-sb-2pri");
fbc12f45
AG
3118 goto disconnect_rcu_unlock;
3119 }
b411b363 3120
fbc12f45 3121 if (p_discard_my_data && nc->discard_my_data) {
d505d9be 3122 conn_err(tconn, "incompatible %s settings\n", "discard-my-data");
fbc12f45
AG
3123 goto disconnect_rcu_unlock;
3124 }
b411b363 3125
fbc12f45 3126 if (p_two_primaries != nc->two_primaries) {
d505d9be 3127 conn_err(tconn, "incompatible %s settings\n", "allow-two-primaries");
fbc12f45
AG
3128 goto disconnect_rcu_unlock;
3129 }
b411b363 3130
fbc12f45 3131 if (strcmp(integrity_alg, nc->integrity_alg)) {
d505d9be 3132 conn_err(tconn, "incompatible %s settings\n", "data-integrity-alg");
fbc12f45
AG
3133 goto disconnect_rcu_unlock;
3134 }
b411b363 3135
fbc12f45 3136 rcu_read_unlock();
036b17ea 3137 }
7d4c782c
AG
3138
3139 if (integrity_alg[0]) {
3140 int hash_size;
3141
3142 /*
3143 * We can only change the peer data integrity algorithm
3144 * here. Changing our own data integrity algorithm
3145 * requires that we send a P_PROTOCOL_UPDATE packet at
3146 * the same time; otherwise, the peer has no way to
3147 * tell between which packets the algorithm should
3148 * change.
3149 */
3150
3151 peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
3152 if (!peer_integrity_tfm) {
3153 conn_err(tconn, "peer data-integrity-alg %s not supported\n",
3154 integrity_alg);
3155 goto disconnect;
3156 }
3157
3158 hash_size = crypto_hash_digestsize(peer_integrity_tfm);
3159 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3160 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3161 if (!(int_dig_in && int_dig_vv)) {
3162 conn_err(tconn, "Allocation of buffers for data integrity checking failed\n");
3163 goto disconnect;
3164 }
3165 }
3166
3167 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3168 if (!new_net_conf) {
3169 conn_err(tconn, "Allocation of new net_conf failed\n");
3170 goto disconnect;
3171 }
3172
3173 mutex_lock(&tconn->data.mutex);
3174 mutex_lock(&tconn->conf_update);
3175 old_net_conf = tconn->net_conf;
3176 *new_net_conf = *old_net_conf;
3177
3178 new_net_conf->wire_protocol = p_proto;
3179 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3180 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3181 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3182 new_net_conf->two_primaries = p_two_primaries;
3183
3184 rcu_assign_pointer(tconn->net_conf, new_net_conf);
3185 mutex_unlock(&tconn->conf_update);
3186 mutex_unlock(&tconn->data.mutex);
3187
3188 crypto_free_hash(tconn->peer_integrity_tfm);
3189 kfree(tconn->int_dig_in);
3190 kfree(tconn->int_dig_vv);
3191 tconn->peer_integrity_tfm = peer_integrity_tfm;
3192 tconn->int_dig_in = int_dig_in;
3193 tconn->int_dig_vv = int_dig_vv;
3194
3195 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
3196 conn_info(tconn, "peer data-integrity-alg: %s\n",
3197 integrity_alg[0] ? integrity_alg : "(none)");
3198
3199 synchronize_rcu();
3200 kfree(old_net_conf);
82bc0194 3201 return 0;
b411b363 3202
44ed167d
PR
3203disconnect_rcu_unlock:
3204 rcu_read_unlock();
b411b363 3205disconnect:
b792c35c 3206 crypto_free_hash(peer_integrity_tfm);
036b17ea
PR
3207 kfree(int_dig_in);
3208 kfree(int_dig_vv);
7204624c 3209 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 3210 return -EIO;
b411b363
PR
3211}
3212
3213/* helper function
3214 * input: alg name, feature name
3215 * return: NULL (alg name was "")
3216 * ERR_PTR(error) if something goes wrong
3217 * or the crypto hash ptr, if it worked out ok. */
3218struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
3219 const char *alg, const char *name)
3220{
3221 struct crypto_hash *tfm;
3222
3223 if (!alg[0])
3224 return NULL;
3225
3226 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
3227 if (IS_ERR(tfm)) {
3228 dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
3229 alg, name, PTR_ERR(tfm));
3230 return tfm;
3231 }
b411b363
PR
3232 return tfm;
3233}
3234
4a76b161
AG
3235static int ignore_remaining_packet(struct drbd_tconn *tconn, struct packet_info *pi)
3236{
3237 void *buffer = tconn->data.rbuf;
3238 int size = pi->size;
3239
3240 while (size) {
3241 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
3242 s = drbd_recv(tconn, buffer, s);
3243 if (s <= 0) {
3244 if (s < 0)
3245 return s;
3246 break;
3247 }
3248 size -= s;
3249 }
3250 if (size)
3251 return -EIO;
3252 return 0;
3253}
3254
3255/*
3256 * config_unknown_volume - device configuration command for unknown volume
3257 *
3258 * When a device is added to an existing connection, the node on which the
3259 * device is added first will send configuration commands to its peer but the
3260 * peer will not know about the device yet. It will warn and ignore these
3261 * commands. Once the device is added on the second node, the second node will
3262 * send the same device configuration commands, but in the other direction.
3263 *
3264 * (We can also end up here if drbd is misconfigured.)
3265 */
3266static int config_unknown_volume(struct drbd_tconn *tconn, struct packet_info *pi)
3267{
2fcb8f30
AG
3268 conn_warn(tconn, "%s packet received for volume %u, which is not configured locally\n",
3269 cmdname(pi->cmd), pi->vnr);
4a76b161
AG
3270 return ignore_remaining_packet(tconn, pi);
3271}
3272
3273static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 3274{
4a76b161 3275 struct drbd_conf *mdev;
e658983a 3276 struct p_rs_param_95 *p;
b411b363
PR
3277 unsigned int header_size, data_size, exp_max_sz;
3278 struct crypto_hash *verify_tfm = NULL;
3279 struct crypto_hash *csums_tfm = NULL;
2ec91e0e 3280 struct net_conf *old_net_conf, *new_net_conf = NULL;
813472ce 3281 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
4a76b161 3282 const int apv = tconn->agreed_pro_version;
813472ce 3283 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
778f271d 3284 int fifo_size = 0;
82bc0194 3285 int err;
b411b363 3286
4a76b161
AG
3287 mdev = vnr_to_mdev(tconn, pi->vnr);
3288 if (!mdev)
3289 return config_unknown_volume(tconn, pi);
3290
b411b363
PR
3291 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3292 : apv == 88 ? sizeof(struct p_rs_param)
3293 + SHARED_SECRET_MAX
8e26f9cc
PR
3294 : apv <= 94 ? sizeof(struct p_rs_param_89)
3295 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
b411b363 3296
e2857216 3297 if (pi->size > exp_max_sz) {
b411b363 3298 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
e2857216 3299 pi->size, exp_max_sz);
82bc0194 3300 return -EIO;
b411b363
PR
3301 }
3302
3303 if (apv <= 88) {
e658983a 3304 header_size = sizeof(struct p_rs_param);
e2857216 3305 data_size = pi->size - header_size;
8e26f9cc 3306 } else if (apv <= 94) {
e658983a 3307 header_size = sizeof(struct p_rs_param_89);
e2857216 3308 data_size = pi->size - header_size;
b411b363 3309 D_ASSERT(data_size == 0);
8e26f9cc 3310 } else {
e658983a 3311 header_size = sizeof(struct p_rs_param_95);
e2857216 3312 data_size = pi->size - header_size;
b411b363
PR
3313 D_ASSERT(data_size == 0);
3314 }
3315
3316 /* initialize verify_alg and csums_alg */
e658983a 3317 p = pi->data;
b411b363
PR
3318 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3319
e658983a 3320 err = drbd_recv_all(mdev->tconn, p, header_size);
82bc0194
AG
3321 if (err)
3322 return err;
b411b363 3323
daeda1cc
PR
3324 mutex_lock(&mdev->tconn->conf_update);
3325 old_net_conf = mdev->tconn->net_conf;
813472ce
PR
3326 if (get_ldev(mdev)) {
3327 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3328 if (!new_disk_conf) {
3329 put_ldev(mdev);
3330 mutex_unlock(&mdev->tconn->conf_update);
3331 dev_err(DEV, "Allocation of new disk_conf failed\n");
3332 return -ENOMEM;
3333 }
daeda1cc 3334
813472ce
PR
3335 old_disk_conf = mdev->ldev->disk_conf;
3336 *new_disk_conf = *old_disk_conf;
3337
6394b935 3338 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
813472ce 3339 }
daeda1cc 3340
b411b363
PR
3341 if (apv >= 88) {
3342 if (apv == 88) {
3343 if (data_size > SHARED_SECRET_MAX) {
3344 dev_err(DEV, "verify-alg too long, "
3345 "peer wants %u, accepting only %u byte\n",
3346 data_size, SHARED_SECRET_MAX);
813472ce
PR
3347 err = -EIO;
3348 goto reconnect;
b411b363
PR
3349 }
3350
82bc0194 3351 err = drbd_recv_all(mdev->tconn, p->verify_alg, data_size);
813472ce
PR
3352 if (err)
3353 goto reconnect;
b411b363
PR
3354 /* we expect NUL terminated string */
3355 /* but just in case someone tries to be evil */
3356 D_ASSERT(p->verify_alg[data_size-1] == 0);
3357 p->verify_alg[data_size-1] = 0;
3358
3359 } else /* apv >= 89 */ {
3360 /* we still expect NUL terminated strings */
3361 /* but just in case someone tries to be evil */
3362 D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3363 D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3364 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3365 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3366 }
3367
2ec91e0e 3368 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
b411b363
PR
3369 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3370 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
2ec91e0e 3371 old_net_conf->verify_alg, p->verify_alg);
b411b363
PR
3372 goto disconnect;
3373 }
3374 verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
3375 p->verify_alg, "verify-alg");
3376 if (IS_ERR(verify_tfm)) {
3377 verify_tfm = NULL;
3378 goto disconnect;
3379 }
3380 }
3381
2ec91e0e 3382 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
b411b363
PR
3383 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3384 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
2ec91e0e 3385 old_net_conf->csums_alg, p->csums_alg);
b411b363
PR
3386 goto disconnect;
3387 }
3388 csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
3389 p->csums_alg, "csums-alg");
3390 if (IS_ERR(csums_tfm)) {
3391 csums_tfm = NULL;
3392 goto disconnect;
3393 }
3394 }
3395
813472ce 3396 if (apv > 94 && new_disk_conf) {
daeda1cc
PR
3397 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3398 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3399 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3400 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
778f271d 3401
daeda1cc 3402 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
9958c857 3403 if (fifo_size != mdev->rs_plan_s->size) {
813472ce
PR
3404 new_plan = fifo_alloc(fifo_size);
3405 if (!new_plan) {
778f271d 3406 dev_err(DEV, "kmalloc of fifo_buffer failed");
f399002e 3407 put_ldev(mdev);
778f271d
PR
3408 goto disconnect;
3409 }
3410 }
8e26f9cc 3411 }
b411b363 3412
91fd4dad 3413 if (verify_tfm || csums_tfm) {
2ec91e0e
PR
3414 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3415 if (!new_net_conf) {
91fd4dad
PR
3416 dev_err(DEV, "Allocation of new net_conf failed\n");
3417 goto disconnect;
3418 }
3419
2ec91e0e 3420 *new_net_conf = *old_net_conf;
91fd4dad
PR
3421
3422 if (verify_tfm) {
2ec91e0e
PR
3423 strcpy(new_net_conf->verify_alg, p->verify_alg);
3424 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
91fd4dad
PR
3425 crypto_free_hash(mdev->tconn->verify_tfm);
3426 mdev->tconn->verify_tfm = verify_tfm;
3427 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
3428 }
3429 if (csums_tfm) {
2ec91e0e
PR
3430 strcpy(new_net_conf->csums_alg, p->csums_alg);
3431 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
91fd4dad
PR
3432 crypto_free_hash(mdev->tconn->csums_tfm);
3433 mdev->tconn->csums_tfm = csums_tfm;
3434 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
3435 }
2ec91e0e 3436 rcu_assign_pointer(tconn->net_conf, new_net_conf);
b411b363 3437 }
daeda1cc 3438 }
91fd4dad 3439
813472ce
PR
3440 if (new_disk_conf) {
3441 rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
3442 put_ldev(mdev);
3443 }
3444
3445 if (new_plan) {
3446 old_plan = mdev->rs_plan_s;
3447 rcu_assign_pointer(mdev->rs_plan_s, new_plan);
b411b363 3448 }
daeda1cc
PR
3449
3450 mutex_unlock(&mdev->tconn->conf_update);
3451 synchronize_rcu();
3452 if (new_net_conf)
3453 kfree(old_net_conf);
3454 kfree(old_disk_conf);
813472ce 3455 kfree(old_plan);
daeda1cc 3456
82bc0194 3457 return 0;
b411b363 3458
813472ce
PR
3459reconnect:
3460 if (new_disk_conf) {
3461 put_ldev(mdev);
3462 kfree(new_disk_conf);
3463 }
3464 mutex_unlock(&mdev->tconn->conf_update);
3465 return -EIO;
3466
b411b363 3467disconnect:
813472ce
PR
3468 kfree(new_plan);
3469 if (new_disk_conf) {
3470 put_ldev(mdev);
3471 kfree(new_disk_conf);
3472 }
a0095508 3473 mutex_unlock(&mdev->tconn->conf_update);
b411b363
PR
3474 /* just for completeness: actually not needed,
3475 * as this is not reached if csums_tfm was ok. */
3476 crypto_free_hash(csums_tfm);
3477 /* but free the verify_tfm again, if csums_tfm did not work out */
3478 crypto_free_hash(verify_tfm);
38fa9988 3479 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 3480 return -EIO;
b411b363
PR
3481}
3482
b411b363
PR
3483/* warn if the arguments differ by more than 12.5% */
3484static void warn_if_differ_considerably(struct drbd_conf *mdev,
3485 const char *s, sector_t a, sector_t b)
3486{
3487 sector_t d;
3488 if (a == 0 || b == 0)
3489 return;
3490 d = (a > b) ? (a - b) : (b - a);
3491 if (d > (a>>3) || d > (b>>3))
3492 dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
3493 (unsigned long long)a, (unsigned long long)b);
3494}
3495
4a76b161 3496static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 3497{
4a76b161 3498 struct drbd_conf *mdev;
e658983a 3499 struct p_sizes *p = pi->data;
b411b363 3500 enum determine_dev_size dd = unchanged;
b411b363
PR
3501 sector_t p_size, p_usize, my_usize;
3502 int ldsc = 0; /* local disk size changed */
e89b591c 3503 enum dds_flags ddsf;
b411b363 3504
4a76b161
AG
3505 mdev = vnr_to_mdev(tconn, pi->vnr);
3506 if (!mdev)
3507 return config_unknown_volume(tconn, pi);
3508
b411b363
PR
3509 p_size = be64_to_cpu(p->d_size);
3510 p_usize = be64_to_cpu(p->u_size);
3511
b411b363
PR
3512 /* just store the peer's disk size for now.
3513 * we still need to figure out whether we accept that. */
3514 mdev->p_size = p_size;
3515
b411b363 3516 if (get_ldev(mdev)) {
daeda1cc
PR
3517 rcu_read_lock();
3518 my_usize = rcu_dereference(mdev->ldev->disk_conf)->disk_size;
3519 rcu_read_unlock();
3520
b411b363
PR
3521 warn_if_differ_considerably(mdev, "lower level device sizes",
3522 p_size, drbd_get_max_capacity(mdev->ldev));
3523 warn_if_differ_considerably(mdev, "user requested size",
daeda1cc 3524 p_usize, my_usize);
b411b363
PR
3525
3526 /* if this is the first connect, or an otherwise expected
3527 * param exchange, choose the minimum */
3528 if (mdev->state.conn == C_WF_REPORT_PARAMS)
daeda1cc 3529 p_usize = min_not_zero(my_usize, p_usize);
b411b363
PR
3530
3531 /* Never shrink a device with usable data during connect.
3532 But allow online shrinking if we are connected. */
ef5e44a6 3533 if (drbd_new_dev_size(mdev, mdev->ldev, p_usize, 0) <
daeda1cc
PR
3534 drbd_get_capacity(mdev->this_bdev) &&
3535 mdev->state.disk >= D_OUTDATED &&
3536 mdev->state.conn < C_CONNECTED) {
b411b363 3537 dev_err(DEV, "The peer's disk size is too small!\n");
38fa9988 3538 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363 3539 put_ldev(mdev);
82bc0194 3540 return -EIO;
b411b363 3541 }
daeda1cc
PR
3542
3543 if (my_usize != p_usize) {
3544 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3545
3546 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3547 if (!new_disk_conf) {
3548 dev_err(DEV, "Allocation of new disk_conf failed\n");
3549 put_ldev(mdev);
3550 return -ENOMEM;
3551 }
3552
3553 mutex_lock(&mdev->tconn->conf_update);
3554 old_disk_conf = mdev->ldev->disk_conf;
3555 *new_disk_conf = *old_disk_conf;
3556 new_disk_conf->disk_size = p_usize;
3557
3558 rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
3559 mutex_unlock(&mdev->tconn->conf_update);
3560 synchronize_rcu();
3561 kfree(old_disk_conf);
3562
3563 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3564 (unsigned long)my_usize);
3565 }
3566
b411b363
PR
3567 put_ldev(mdev);
3568 }
b411b363 3569
e89b591c 3570 ddsf = be16_to_cpu(p->dds_flags);
b411b363 3571 if (get_ldev(mdev)) {
24c4830c 3572 dd = drbd_determine_dev_size(mdev, ddsf);
b411b363
PR
3573 put_ldev(mdev);
3574 if (dd == dev_size_error)
82bc0194 3575 return -EIO;
b411b363
PR
3576 drbd_md_sync(mdev);
3577 } else {
3578 /* I am diskless, need to accept the peer's size. */
3579 drbd_set_my_capacity(mdev, p_size);
3580 }
3581
99432fcc
PR
3582 mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3583 drbd_reconsider_max_bio_size(mdev);
3584
b411b363
PR
3585 if (get_ldev(mdev)) {
3586 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3587 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3588 ldsc = 1;
3589 }
3590
b411b363
PR
3591 put_ldev(mdev);
3592 }
3593
3594 if (mdev->state.conn > C_WF_REPORT_PARAMS) {
3595 if (be64_to_cpu(p->c_size) !=
3596 drbd_get_capacity(mdev->this_bdev) || ldsc) {
3597 /* we have different sizes, probably peer
3598 * needs to know my new size... */
e89b591c 3599 drbd_send_sizes(mdev, 0, ddsf);
b411b363
PR
3600 }
3601 if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
3602 (dd == grew && mdev->state.conn == C_CONNECTED)) {
3603 if (mdev->state.pdsk >= D_INCONSISTENT &&
e89b591c
PR
3604 mdev->state.disk >= D_INCONSISTENT) {
3605 if (ddsf & DDSF_NO_RESYNC)
3606 dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3607 else
3608 resync_after_online_grow(mdev);
3609 } else
b411b363
PR
3610 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
3611 }
3612 }
3613
82bc0194 3614 return 0;
b411b363
PR
3615}
3616
4a76b161 3617static int receive_uuids(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 3618{
4a76b161 3619 struct drbd_conf *mdev;
e658983a 3620 struct p_uuids *p = pi->data;
b411b363 3621 u64 *p_uuid;
62b0da3a 3622 int i, updated_uuids = 0;
b411b363 3623
4a76b161
AG
3624 mdev = vnr_to_mdev(tconn, pi->vnr);
3625 if (!mdev)
3626 return config_unknown_volume(tconn, pi);
3627
b411b363
PR
3628 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3629
3630 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3631 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3632
3633 kfree(mdev->p_uuid);
3634 mdev->p_uuid = p_uuid;
3635
3636 if (mdev->state.conn < C_CONNECTED &&
3637 mdev->state.disk < D_INCONSISTENT &&
3638 mdev->state.role == R_PRIMARY &&
3639 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3640 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3641 (unsigned long long)mdev->ed_uuid);
38fa9988 3642 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 3643 return -EIO;
b411b363
PR
3644 }
3645
3646 if (get_ldev(mdev)) {
3647 int skip_initial_sync =
3648 mdev->state.conn == C_CONNECTED &&
31890f4a 3649 mdev->tconn->agreed_pro_version >= 90 &&
b411b363
PR
3650 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3651 (p_uuid[UI_FLAGS] & 8);
3652 if (skip_initial_sync) {
3653 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3654 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
20ceb2b2
LE
3655 "clear_n_write from receive_uuids",
3656 BM_LOCKED_TEST_ALLOWED);
b411b363
PR
3657 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3658 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3659 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3660 CS_VERBOSE, NULL);
3661 drbd_md_sync(mdev);
62b0da3a 3662 updated_uuids = 1;
b411b363
PR
3663 }
3664 put_ldev(mdev);
18a50fa2
PR
3665 } else if (mdev->state.disk < D_INCONSISTENT &&
3666 mdev->state.role == R_PRIMARY) {
3667 /* I am a diskless primary, the peer just created a new current UUID
3668 for me. */
62b0da3a 3669 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
b411b363
PR
3670 }
3671
3672 /* Before we test for the disk state, we should wait until an eventually
3673 ongoing cluster wide state change is finished. That is important if
3674 we are primary and are detaching from our disk. We need to see the
3675 new disk state... */
8410da8f
PR
3676 mutex_lock(mdev->state_mutex);
3677 mutex_unlock(mdev->state_mutex);
b411b363 3678 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
62b0da3a
LE
3679 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3680
3681 if (updated_uuids)
3682 drbd_print_uuids(mdev, "receiver updated UUIDs to");
b411b363 3683
82bc0194 3684 return 0;
b411b363
PR
3685}
3686
3687/**
3688 * convert_state() - Converts the peer's view of the cluster state to our point of view
3689 * @ps: The state as seen by the peer.
3690 */
3691static union drbd_state convert_state(union drbd_state ps)
3692{
3693 union drbd_state ms;
3694
3695 static enum drbd_conns c_tab[] = {
369bea63 3696 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
b411b363
PR
3697 [C_CONNECTED] = C_CONNECTED,
3698
3699 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3700 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3701 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3702 [C_VERIFY_S] = C_VERIFY_T,
3703 [C_MASK] = C_MASK,
3704 };
3705
3706 ms.i = ps.i;
3707
3708 ms.conn = c_tab[ps.conn];
3709 ms.peer = ps.role;
3710 ms.role = ps.peer;
3711 ms.pdsk = ps.disk;
3712 ms.disk = ps.pdsk;
3713 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3714
3715 return ms;
3716}
3717
4a76b161 3718static int receive_req_state(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 3719{
4a76b161 3720 struct drbd_conf *mdev;
e658983a 3721 struct p_req_state *p = pi->data;
b411b363 3722 union drbd_state mask, val;
bf885f8a 3723 enum drbd_state_rv rv;
b411b363 3724
4a76b161
AG
3725 mdev = vnr_to_mdev(tconn, pi->vnr);
3726 if (!mdev)
3727 return -EIO;
3728
b411b363
PR
3729 mask.i = be32_to_cpu(p->mask);
3730 val.i = be32_to_cpu(p->val);
3731
25703f83 3732 if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) &&
8410da8f 3733 mutex_is_locked(mdev->state_mutex)) {
b411b363 3734 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
82bc0194 3735 return 0;
b411b363
PR
3736 }
3737
3738 mask = convert_state(mask);
3739 val = convert_state(val);
3740
dfafcc8a
PR
3741 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
3742 drbd_send_sr_reply(mdev, rv);
b411b363 3743
b411b363
PR
3744 drbd_md_sync(mdev);
3745
82bc0194 3746 return 0;
b411b363
PR
3747}
3748
e2857216 3749static int receive_req_conn_state(struct drbd_tconn *tconn, struct packet_info *pi)
dfafcc8a 3750{
e658983a 3751 struct p_req_state *p = pi->data;
dfafcc8a
PR
3752 union drbd_state mask, val;
3753 enum drbd_state_rv rv;
3754
3755 mask.i = be32_to_cpu(p->mask);
3756 val.i = be32_to_cpu(p->val);
3757
3758 if (test_bit(DISCARD_CONCURRENT, &tconn->flags) &&
3759 mutex_is_locked(&tconn->cstate_mutex)) {
3760 conn_send_sr_reply(tconn, SS_CONCURRENT_ST_CHG);
82bc0194 3761 return 0;
dfafcc8a
PR
3762 }
3763
3764 mask = convert_state(mask);
3765 val = convert_state(val);
3766
778bcf2e 3767 rv = conn_request_state(tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
dfafcc8a
PR
3768 conn_send_sr_reply(tconn, rv);
3769
82bc0194 3770 return 0;
dfafcc8a
PR
3771}
3772
4a76b161 3773static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 3774{
4a76b161 3775 struct drbd_conf *mdev;
e658983a 3776 struct p_state *p = pi->data;
4ac4aada 3777 union drbd_state os, ns, peer_state;
b411b363 3778 enum drbd_disk_state real_peer_disk;
65d922c3 3779 enum chg_state_flags cs_flags;
b411b363
PR
3780 int rv;
3781
4a76b161
AG
3782 mdev = vnr_to_mdev(tconn, pi->vnr);
3783 if (!mdev)
3784 return config_unknown_volume(tconn, pi);
3785
b411b363
PR
3786 peer_state.i = be32_to_cpu(p->state);
3787
3788 real_peer_disk = peer_state.disk;
3789 if (peer_state.disk == D_NEGOTIATING) {
3790 real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
3791 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3792 }
3793
87eeee41 3794 spin_lock_irq(&mdev->tconn->req_lock);
b411b363 3795 retry:
78bae59b 3796 os = ns = drbd_read_state(mdev);
87eeee41 3797 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 3798
b8853dbd
PR
3799 /* If some other part of the code (asender thread, timeout)
3800 * already decided to close the connection again,
3801 * we must not "re-establish" it here. */
3802 if (os.conn <= C_TEAR_DOWN)
3803 return false;
3804
9bcd2521
PR
3805 /* If this is the "end of sync" confirmation, usually the peer disk
3806 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
3807 * set) resync started in PausedSyncT, or if the timing of pause-/
3808 * unpause-sync events has been "just right", the peer disk may
3809 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
3810 */
3811 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
3812 real_peer_disk == D_UP_TO_DATE &&
e9ef7bb6
LE
3813 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3814 /* If we are (becoming) SyncSource, but peer is still in sync
3815 * preparation, ignore its uptodate-ness to avoid flapping, it
3816 * will change to inconsistent once the peer reaches active
3817 * syncing states.
3818 * It may have changed syncer-paused flags, however, so we
3819 * cannot ignore this completely. */
3820 if (peer_state.conn > C_CONNECTED &&
3821 peer_state.conn < C_SYNC_SOURCE)
3822 real_peer_disk = D_INCONSISTENT;
3823
3824 /* if peer_state changes to connected at the same time,
3825 * it explicitly notifies us that it finished resync.
3826 * Maybe we should finish it up, too? */
3827 else if (os.conn >= C_SYNC_SOURCE &&
3828 peer_state.conn == C_CONNECTED) {
3829 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3830 drbd_resync_finished(mdev);
82bc0194 3831 return 0;
e9ef7bb6
LE
3832 }
3833 }
3834
3835 /* peer says his disk is inconsistent, while we think it is uptodate,
3836 * and this happens while the peer still thinks we have a sync going on,
3837 * but we think we are already done with the sync.
3838 * We ignore this to avoid flapping pdsk.
3839 * This should not happen, if the peer is a recent version of drbd. */
3840 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3841 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3842 real_peer_disk = D_UP_TO_DATE;
3843
4ac4aada
LE
3844 if (ns.conn == C_WF_REPORT_PARAMS)
3845 ns.conn = C_CONNECTED;
b411b363 3846
67531718
PR
3847 if (peer_state.conn == C_AHEAD)
3848 ns.conn = C_BEHIND;
3849
b411b363
PR
3850 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3851 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3852 int cr; /* consider resync */
3853
3854 /* if we established a new connection */
4ac4aada 3855 cr = (os.conn < C_CONNECTED);
b411b363
PR
3856 /* if we had an established connection
3857 * and one of the nodes newly attaches a disk */
4ac4aada 3858 cr |= (os.conn == C_CONNECTED &&
b411b363 3859 (peer_state.disk == D_NEGOTIATING ||
4ac4aada 3860 os.disk == D_NEGOTIATING));
b411b363
PR
3861 /* if we have both been inconsistent, and the peer has been
3862 * forced to be UpToDate with --overwrite-data */
3863 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3864 /* if we had been plain connected, and the admin requested to
3865 * start a sync by "invalidate" or "invalidate-remote" */
4ac4aada 3866 cr |= (os.conn == C_CONNECTED &&
b411b363
PR
3867 (peer_state.conn >= C_STARTING_SYNC_S &&
3868 peer_state.conn <= C_WF_BITMAP_T));
3869
3870 if (cr)
4ac4aada 3871 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
b411b363
PR
3872
3873 put_ldev(mdev);
4ac4aada
LE
3874 if (ns.conn == C_MASK) {
3875 ns.conn = C_CONNECTED;
b411b363 3876 if (mdev->state.disk == D_NEGOTIATING) {
82f59cc6 3877 drbd_force_state(mdev, NS(disk, D_FAILED));
b411b363
PR
3878 } else if (peer_state.disk == D_NEGOTIATING) {
3879 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3880 peer_state.disk = D_DISKLESS;
580b9767 3881 real_peer_disk = D_DISKLESS;
b411b363 3882 } else {
8169e41b 3883 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->tconn->flags))
82bc0194 3884 return -EIO;
4ac4aada 3885 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
38fa9988 3886 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 3887 return -EIO;
b411b363
PR
3888 }
3889 }
3890 }
3891
87eeee41 3892 spin_lock_irq(&mdev->tconn->req_lock);
78bae59b 3893 if (os.i != drbd_read_state(mdev).i)
b411b363
PR
3894 goto retry;
3895 clear_bit(CONSIDER_RESYNC, &mdev->flags);
b411b363
PR
3896 ns.peer = peer_state.role;
3897 ns.pdsk = real_peer_disk;
3898 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
4ac4aada 3899 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
b411b363 3900 ns.disk = mdev->new_state_tmp.disk;
4ac4aada 3901 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
2aebfabb 3902 if (ns.pdsk == D_CONSISTENT && drbd_suspended(mdev) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
481c6f50 3903 test_bit(NEW_CUR_UUID, &mdev->flags)) {
8554df1c 3904 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
481c6f50 3905 for temporal network outages! */
87eeee41 3906 spin_unlock_irq(&mdev->tconn->req_lock);
481c6f50 3907 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
2f5cdd0b 3908 tl_clear(mdev->tconn);
481c6f50
PR
3909 drbd_uuid_new_current(mdev);
3910 clear_bit(NEW_CUR_UUID, &mdev->flags);
38fa9988 3911 conn_request_state(mdev->tconn, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
82bc0194 3912 return -EIO;
481c6f50 3913 }
65d922c3 3914 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
78bae59b 3915 ns = drbd_read_state(mdev);
87eeee41 3916 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
3917
3918 if (rv < SS_SUCCESS) {
38fa9988 3919 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 3920 return -EIO;
b411b363
PR
3921 }
3922
4ac4aada
LE
3923 if (os.conn > C_WF_REPORT_PARAMS) {
3924 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
b411b363
PR
3925 peer_state.disk != D_NEGOTIATING ) {
3926 /* we want resync, peer has not yet decided to sync... */
3927 /* Nowadays only used when forcing a node into primary role and
3928 setting its disk to UpToDate with that */
3929 drbd_send_uuids(mdev);
43de7c85 3930 drbd_send_current_state(mdev);
b411b363
PR
3931 }
3932 }
3933
08b165ba 3934 clear_bit(DISCARD_MY_DATA, &mdev->flags);
b411b363
PR
3935
3936 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
3937
82bc0194 3938 return 0;
b411b363
PR
3939}
3940
4a76b161 3941static int receive_sync_uuid(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 3942{
4a76b161 3943 struct drbd_conf *mdev;
e658983a 3944 struct p_rs_uuid *p = pi->data;
4a76b161
AG
3945
3946 mdev = vnr_to_mdev(tconn, pi->vnr);
3947 if (!mdev)
3948 return -EIO;
b411b363
PR
3949
3950 wait_event(mdev->misc_wait,
3951 mdev->state.conn == C_WF_SYNC_UUID ||
c4752ef1 3952 mdev->state.conn == C_BEHIND ||
b411b363
PR
3953 mdev->state.conn < C_CONNECTED ||
3954 mdev->state.disk < D_NEGOTIATING);
3955
3956 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
3957
b411b363
PR
3958 /* Here the _drbd_uuid_ functions are right, current should
3959 _not_ be rotated into the history */
3960 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
3961 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
3962 _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
3963
62b0da3a 3964 drbd_print_uuids(mdev, "updated sync uuid");
b411b363
PR
3965 drbd_start_resync(mdev, C_SYNC_TARGET);
3966
3967 put_ldev(mdev);
3968 } else
3969 dev_err(DEV, "Ignoring SyncUUID packet!\n");
3970
82bc0194 3971 return 0;
b411b363
PR
3972}
3973
2c46407d
AG
3974/**
3975 * receive_bitmap_plain
3976 *
3977 * Return 0 when done, 1 when another iteration is needed, and a negative error
3978 * code upon failure.
3979 */
3980static int
50d0b1ad 3981receive_bitmap_plain(struct drbd_conf *mdev, unsigned int size,
e658983a 3982 unsigned long *p, struct bm_xfer_ctx *c)
b411b363 3983{
50d0b1ad
AG
3984 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
3985 drbd_header_size(mdev->tconn);
e658983a 3986 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
50d0b1ad 3987 c->bm_words - c->word_offset);
e658983a 3988 unsigned int want = num_words * sizeof(*p);
2c46407d 3989 int err;
b411b363 3990
50d0b1ad
AG
3991 if (want != size) {
3992 dev_err(DEV, "%s:want (%u) != size (%u)\n", __func__, want, size);
2c46407d 3993 return -EIO;
b411b363
PR
3994 }
3995 if (want == 0)
2c46407d 3996 return 0;
e658983a 3997 err = drbd_recv_all(mdev->tconn, p, want);
82bc0194 3998 if (err)
2c46407d 3999 return err;
b411b363 4000
e658983a 4001 drbd_bm_merge_lel(mdev, c->word_offset, num_words, p);
b411b363
PR
4002
4003 c->word_offset += num_words;
4004 c->bit_offset = c->word_offset * BITS_PER_LONG;
4005 if (c->bit_offset > c->bm_bits)
4006 c->bit_offset = c->bm_bits;
4007
2c46407d 4008 return 1;
b411b363
PR
4009}
4010
a02d1240
AG
4011static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4012{
4013 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4014}
4015
4016static int dcbp_get_start(struct p_compressed_bm *p)
4017{
4018 return (p->encoding & 0x80) != 0;
4019}
4020
4021static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4022{
4023 return (p->encoding >> 4) & 0x7;
4024}
4025
2c46407d
AG
4026/**
4027 * recv_bm_rle_bits
4028 *
4029 * Return 0 when done, 1 when another iteration is needed, and a negative error
4030 * code upon failure.
4031 */
4032static int
b411b363
PR
4033recv_bm_rle_bits(struct drbd_conf *mdev,
4034 struct p_compressed_bm *p,
c6d25cfe
PR
4035 struct bm_xfer_ctx *c,
4036 unsigned int len)
b411b363
PR
4037{
4038 struct bitstream bs;
4039 u64 look_ahead;
4040 u64 rl;
4041 u64 tmp;
4042 unsigned long s = c->bit_offset;
4043 unsigned long e;
a02d1240 4044 int toggle = dcbp_get_start(p);
b411b363
PR
4045 int have;
4046 int bits;
4047
a02d1240 4048 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
b411b363
PR
4049
4050 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4051 if (bits < 0)
2c46407d 4052 return -EIO;
b411b363
PR
4053
4054 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4055 bits = vli_decode_bits(&rl, look_ahead);
4056 if (bits <= 0)
2c46407d 4057 return -EIO;
b411b363
PR
4058
4059 if (toggle) {
4060 e = s + rl -1;
4061 if (e >= c->bm_bits) {
4062 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
2c46407d 4063 return -EIO;
b411b363
PR
4064 }
4065 _drbd_bm_set_bits(mdev, s, e);
4066 }
4067
4068 if (have < bits) {
4069 dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
4070 have, bits, look_ahead,
4071 (unsigned int)(bs.cur.b - p->code),
4072 (unsigned int)bs.buf_len);
2c46407d 4073 return -EIO;
b411b363
PR
4074 }
4075 look_ahead >>= bits;
4076 have -= bits;
4077
4078 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4079 if (bits < 0)
2c46407d 4080 return -EIO;
b411b363
PR
4081 look_ahead |= tmp << have;
4082 have += bits;
4083 }
4084
4085 c->bit_offset = s;
4086 bm_xfer_ctx_bit_to_word_offset(c);
4087
2c46407d 4088 return (s != c->bm_bits);
b411b363
PR
4089}
4090
2c46407d
AG
4091/**
4092 * decode_bitmap_c
4093 *
4094 * Return 0 when done, 1 when another iteration is needed, and a negative error
4095 * code upon failure.
4096 */
4097static int
b411b363
PR
4098decode_bitmap_c(struct drbd_conf *mdev,
4099 struct p_compressed_bm *p,
c6d25cfe
PR
4100 struct bm_xfer_ctx *c,
4101 unsigned int len)
b411b363 4102{
a02d1240 4103 if (dcbp_get_code(p) == RLE_VLI_Bits)
e658983a 4104 return recv_bm_rle_bits(mdev, p, c, len - sizeof(*p));
b411b363
PR
4105
4106 /* other variants had been implemented for evaluation,
4107 * but have been dropped as this one turned out to be "best"
4108 * during all our tests. */
4109
4110 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
38fa9988 4111 conn_request_state(mdev->tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
2c46407d 4112 return -EIO;
b411b363
PR
4113}
4114
4115void INFO_bm_xfer_stats(struct drbd_conf *mdev,
4116 const char *direction, struct bm_xfer_ctx *c)
4117{
4118 /* what would it take to transfer it "plaintext" */
50d0b1ad
AG
4119 unsigned int header_size = drbd_header_size(mdev->tconn);
4120 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4121 unsigned int plain =
4122 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4123 c->bm_words * sizeof(unsigned long);
4124 unsigned int total = c->bytes[0] + c->bytes[1];
4125 unsigned int r;
b411b363
PR
4126
4127 /* total can not be zero. but just in case: */
4128 if (total == 0)
4129 return;
4130
4131 /* don't report if not compressed */
4132 if (total >= plain)
4133 return;
4134
4135 /* total < plain. check for overflow, still */
4136 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4137 : (1000 * total / plain);
4138
4139 if (r > 1000)
4140 r = 1000;
4141
4142 r = 1000 - r;
4143 dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
4144 "total %u; compression: %u.%u%%\n",
4145 direction,
4146 c->bytes[1], c->packets[1],
4147 c->bytes[0], c->packets[0],
4148 total, r/10, r % 10);
4149}
4150
4151/* Since we are processing the bitfield from lower addresses to higher,
4152 it does not matter if the process it in 32 bit chunks or 64 bit
4153 chunks as long as it is little endian. (Understand it as byte stream,
4154 beginning with the lowest byte...) If we would use big endian
4155 we would need to process it from the highest address to the lowest,
4156 in order to be agnostic to the 32 vs 64 bits issue.
4157
4158 returns 0 on failure, 1 if we successfully received it. */
4a76b161 4159static int receive_bitmap(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 4160{
4a76b161 4161 struct drbd_conf *mdev;
b411b363 4162 struct bm_xfer_ctx c;
2c46407d 4163 int err;
4a76b161
AG
4164
4165 mdev = vnr_to_mdev(tconn, pi->vnr);
4166 if (!mdev)
4167 return -EIO;
b411b363 4168
20ceb2b2
LE
4169 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
4170 /* you are supposed to send additional out-of-sync information
4171 * if you actually set bits during this phase */
b411b363 4172
b411b363
PR
4173 c = (struct bm_xfer_ctx) {
4174 .bm_bits = drbd_bm_bits(mdev),
4175 .bm_words = drbd_bm_words(mdev),
4176 };
4177
2c46407d 4178 for(;;) {
e658983a
AG
4179 if (pi->cmd == P_BITMAP)
4180 err = receive_bitmap_plain(mdev, pi->size, pi->data, &c);
4181 else if (pi->cmd == P_COMPRESSED_BITMAP) {
b411b363
PR
4182 /* MAYBE: sanity check that we speak proto >= 90,
4183 * and the feature is enabled! */
e658983a 4184 struct p_compressed_bm *p = pi->data;
b411b363 4185
50d0b1ad 4186 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(tconn)) {
b411b363 4187 dev_err(DEV, "ReportCBitmap packet too large\n");
82bc0194 4188 err = -EIO;
b411b363
PR
4189 goto out;
4190 }
e658983a 4191 if (pi->size <= sizeof(*p)) {
e2857216 4192 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", pi->size);
82bc0194 4193 err = -EIO;
78fcbdae 4194 goto out;
b411b363 4195 }
e658983a
AG
4196 err = drbd_recv_all(mdev->tconn, p, pi->size);
4197 if (err)
4198 goto out;
e2857216 4199 err = decode_bitmap_c(mdev, p, &c, pi->size);
b411b363 4200 } else {
e2857216 4201 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
82bc0194 4202 err = -EIO;
b411b363
PR
4203 goto out;
4204 }
4205
e2857216 4206 c.packets[pi->cmd == P_BITMAP]++;
50d0b1ad 4207 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(tconn) + pi->size;
b411b363 4208
2c46407d
AG
4209 if (err <= 0) {
4210 if (err < 0)
4211 goto out;
b411b363 4212 break;
2c46407d 4213 }
e2857216 4214 err = drbd_recv_header(mdev->tconn, pi);
82bc0194 4215 if (err)
b411b363 4216 goto out;
2c46407d 4217 }
b411b363
PR
4218
4219 INFO_bm_xfer_stats(mdev, "receive", &c);
4220
4221 if (mdev->state.conn == C_WF_BITMAP_T) {
de1f8e4a
AG
4222 enum drbd_state_rv rv;
4223
82bc0194
AG
4224 err = drbd_send_bitmap(mdev);
4225 if (err)
b411b363
PR
4226 goto out;
4227 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
de1f8e4a
AG
4228 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
4229 D_ASSERT(rv == SS_SUCCESS);
b411b363
PR
4230 } else if (mdev->state.conn != C_WF_BITMAP_S) {
4231 /* admin may have requested C_DISCONNECTING,
4232 * other threads may have noticed network errors */
4233 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
4234 drbd_conn_str(mdev->state.conn));
4235 }
82bc0194 4236 err = 0;
b411b363 4237
b411b363 4238 out:
20ceb2b2 4239 drbd_bm_unlock(mdev);
82bc0194 4240 if (!err && mdev->state.conn == C_WF_BITMAP_S)
b411b363 4241 drbd_start_resync(mdev, C_SYNC_SOURCE);
82bc0194 4242 return err;
b411b363
PR
4243}
4244
4a76b161 4245static int receive_skip(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 4246{
4a76b161 4247 conn_warn(tconn, "skipping unknown optional packet type %d, l: %d!\n",
e2857216 4248 pi->cmd, pi->size);
2de876ef 4249
4a76b161 4250 return ignore_remaining_packet(tconn, pi);
2de876ef
PR
4251}
4252
4a76b161 4253static int receive_UnplugRemote(struct drbd_tconn *tconn, struct packet_info *pi)
0ced55a3 4254{
e7f52dfb
LE
4255 /* Make sure we've acked all the TCP data associated
4256 * with the data requests being unplugged */
4a76b161 4257 drbd_tcp_quickack(tconn->data.socket);
0ced55a3 4258
82bc0194 4259 return 0;
0ced55a3
PR
4260}
4261
4a76b161 4262static int receive_out_of_sync(struct drbd_tconn *tconn, struct packet_info *pi)
73a01a18 4263{
4a76b161 4264 struct drbd_conf *mdev;
e658983a 4265 struct p_block_desc *p = pi->data;
4a76b161
AG
4266
4267 mdev = vnr_to_mdev(tconn, pi->vnr);
4268 if (!mdev)
4269 return -EIO;
73a01a18 4270
f735e363
LE
4271 switch (mdev->state.conn) {
4272 case C_WF_SYNC_UUID:
4273 case C_WF_BITMAP_T:
4274 case C_BEHIND:
4275 break;
4276 default:
4277 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
4278 drbd_conn_str(mdev->state.conn));
4279 }
4280
73a01a18
PR
4281 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
4282
82bc0194 4283 return 0;
73a01a18
PR
4284}
4285
02918be2
PR
4286struct data_cmd {
4287 int expect_payload;
4288 size_t pkt_size;
4a76b161 4289 int (*fn)(struct drbd_tconn *, struct packet_info *);
02918be2
PR
4290};
4291
4292static struct data_cmd drbd_cmd_handler[] = {
4a76b161
AG
4293 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4294 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4295 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4296 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
e658983a
AG
4297 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4298 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4299 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
4a76b161
AG
4300 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4301 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
e658983a
AG
4302 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4303 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
4a76b161
AG
4304 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4305 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4306 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4307 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4308 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4309 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4310 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4311 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4312 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4313 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
4314 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
4315 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
036b17ea 4316 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
b411b363
PR
4317};
4318
eefc2f7d 4319static void drbdd(struct drbd_tconn *tconn)
b411b363 4320{
77351055 4321 struct packet_info pi;
02918be2 4322 size_t shs; /* sub header size */
82bc0194 4323 int err;
b411b363 4324
eefc2f7d 4325 while (get_t_state(&tconn->receiver) == RUNNING) {
deebe195
AG
4326 struct data_cmd *cmd;
4327
eefc2f7d 4328 drbd_thread_current_set_cpu(&tconn->receiver);
69bc7bc3 4329 if (drbd_recv_header(tconn, &pi))
02918be2 4330 goto err_out;
b411b363 4331
deebe195 4332 cmd = &drbd_cmd_handler[pi.cmd];
4a76b161 4333 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
2fcb8f30
AG
4334 conn_err(tconn, "Unexpected data packet %s (0x%04x)",
4335 cmdname(pi.cmd), pi.cmd);
02918be2 4336 goto err_out;
0b33a916 4337 }
b411b363 4338
e658983a
AG
4339 shs = cmd->pkt_size;
4340 if (pi.size > shs && !cmd->expect_payload) {
2fcb8f30
AG
4341 conn_err(tconn, "No payload expected %s l:%d\n",
4342 cmdname(pi.cmd), pi.size);
02918be2 4343 goto err_out;
b411b363 4344 }
b411b363 4345
c13f7e1a 4346 if (shs) {
e658983a 4347 err = drbd_recv_all_warn(tconn, pi.data, shs);
a5c31904 4348 if (err)
c13f7e1a 4349 goto err_out;
e2857216 4350 pi.size -= shs;
c13f7e1a
LE
4351 }
4352
4a76b161
AG
4353 err = cmd->fn(tconn, &pi);
4354 if (err) {
9f5bdc33
AG
4355 conn_err(tconn, "error receiving %s, e: %d l: %d!\n",
4356 cmdname(pi.cmd), err, pi.size);
02918be2 4357 goto err_out;
b411b363
PR
4358 }
4359 }
82bc0194 4360 return;
b411b363 4361
82bc0194
AG
4362 err_out:
4363 conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
b411b363
PR
4364}
4365
0e29d163 4366void conn_flush_workqueue(struct drbd_tconn *tconn)
b411b363
PR
4367{
4368 struct drbd_wq_barrier barr;
4369
4370 barr.w.cb = w_prev_work_done;
0e29d163 4371 barr.w.tconn = tconn;
b411b363 4372 init_completion(&barr.done);
0e29d163 4373 drbd_queue_work(&tconn->data.work, &barr.w);
b411b363
PR
4374 wait_for_completion(&barr.done);
4375}
4376
81fa2e67 4377static void conn_disconnect(struct drbd_tconn *tconn)
b411b363 4378{
c141ebda 4379 struct drbd_conf *mdev;
bbeb641c 4380 enum drbd_conns oc;
376694a0 4381 int vnr;
b411b363 4382
bbeb641c 4383 if (tconn->cstate == C_STANDALONE)
b411b363 4384 return;
b411b363 4385
b8853dbd
PR
4386 /* We are about to start the cleanup after connection loss.
4387 * Make sure drbd_make_request knows about that.
4388 * Usually we should be in some network failure state already,
4389 * but just in case we are not, we fix it up here.
4390 */
4391 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
4392
b411b363 4393 /* asender does not clean up anything. it must not interfere, either */
360cc740
PR
4394 drbd_thread_stop(&tconn->asender);
4395 drbd_free_sock(tconn);
4396
c141ebda
PR
4397 rcu_read_lock();
4398 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
4399 kref_get(&mdev->kref);
4400 rcu_read_unlock();
4401 drbd_disconnected(mdev);
4402 kref_put(&mdev->kref, &drbd_minor_destroy);
4403 rcu_read_lock();
4404 }
4405 rcu_read_unlock();
4406
12038a3a
PR
4407 if (!list_empty(&tconn->current_epoch->list))
4408 conn_err(tconn, "ASSERTION FAILED: tconn->current_epoch->list not empty\n");
4409 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
4410 atomic_set(&tconn->current_epoch->epoch_size, 0);
4411
360cc740
PR
4412 conn_info(tconn, "Connection closed\n");
4413
cb703454
PR
4414 if (conn_highest_role(tconn) == R_PRIMARY && conn_highest_pdsk(tconn) >= D_UNKNOWN)
4415 conn_try_outdate_peer_async(tconn);
4416
360cc740 4417 spin_lock_irq(&tconn->req_lock);
bbeb641c
PR
4418 oc = tconn->cstate;
4419 if (oc >= C_UNCONNECTED)
376694a0 4420 _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
bbeb641c 4421
360cc740
PR
4422 spin_unlock_irq(&tconn->req_lock);
4423
f3dfa40a 4424 if (oc == C_DISCONNECTING)
d9cc6e23 4425 conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
360cc740
PR
4426}
4427
c141ebda 4428static int drbd_disconnected(struct drbd_conf *mdev)
360cc740 4429{
360cc740 4430 unsigned int i;
b411b363 4431
85719573 4432 /* wait for current activity to cease. */
87eeee41 4433 spin_lock_irq(&mdev->tconn->req_lock);
b411b363
PR
4434 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
4435 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
4436 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
87eeee41 4437 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
4438
4439 /* We do not have data structures that would allow us to
4440 * get the rs_pending_cnt down to 0 again.
4441 * * On C_SYNC_TARGET we do not have any data structures describing
4442 * the pending RSDataRequest's we have sent.
4443 * * On C_SYNC_SOURCE there is no data structure that tracks
4444 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4445 * And no, it is not the sum of the reference counts in the
4446 * resync_LRU. The resync_LRU tracks the whole operation including
4447 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4448 * on the fly. */
4449 drbd_rs_cancel_all(mdev);
4450 mdev->rs_total = 0;
4451 mdev->rs_failed = 0;
4452 atomic_set(&mdev->rs_pending_cnt, 0);
4453 wake_up(&mdev->misc_wait);
4454
b411b363 4455 del_timer_sync(&mdev->resync_timer);
b411b363
PR
4456 resync_timer_fn((unsigned long)mdev);
4457
b411b363
PR
4458 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4459 * w_make_resync_request etc. which may still be on the worker queue
4460 * to be "canceled" */
a21e9298 4461 drbd_flush_workqueue(mdev);
b411b363 4462
a990be46 4463 drbd_finish_peer_reqs(mdev);
b411b363 4464
d10b4ea3
PR
4465 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4466 might have issued a work again. The one before drbd_finish_peer_reqs() is
4467 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
4468 drbd_flush_workqueue(mdev);
4469
b411b363
PR
4470 kfree(mdev->p_uuid);
4471 mdev->p_uuid = NULL;
4472
2aebfabb 4473 if (!drbd_suspended(mdev))
2f5cdd0b 4474 tl_clear(mdev->tconn);
b411b363 4475
b411b363
PR
4476 drbd_md_sync(mdev);
4477
20ceb2b2
LE
4478 /* serialize with bitmap writeout triggered by the state change,
4479 * if any. */
4480 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
4481
b411b363
PR
4482 /* tcp_close and release of sendpage pages can be deferred. I don't
4483 * want to use SO_LINGER, because apparently it can be deferred for
4484 * more than 20 seconds (longest time I checked).
4485 *
4486 * Actually we don't care for exactly when the network stack does its
4487 * put_page(), but release our reference on these pages right here.
4488 */
7721f567 4489 i = drbd_free_peer_reqs(mdev, &mdev->net_ee);
b411b363
PR
4490 if (i)
4491 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
435f0740
LE
4492 i = atomic_read(&mdev->pp_in_use_by_net);
4493 if (i)
4494 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
b411b363
PR
4495 i = atomic_read(&mdev->pp_in_use);
4496 if (i)
45bb912b 4497 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
b411b363
PR
4498
4499 D_ASSERT(list_empty(&mdev->read_ee));
4500 D_ASSERT(list_empty(&mdev->active_ee));
4501 D_ASSERT(list_empty(&mdev->sync_ee));
4502 D_ASSERT(list_empty(&mdev->done_ee));
4503
360cc740 4504 return 0;
b411b363
PR
4505}
4506
4507/*
4508 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4509 * we can agree on is stored in agreed_pro_version.
4510 *
4511 * feature flags and the reserved array should be enough room for future
4512 * enhancements of the handshake protocol, and possible plugins...
4513 *
4514 * for now, they are expected to be zero, but ignored.
4515 */
6038178e 4516static int drbd_send_features(struct drbd_tconn *tconn)
b411b363 4517{
9f5bdc33
AG
4518 struct drbd_socket *sock;
4519 struct p_connection_features *p;
b411b363 4520
9f5bdc33
AG
4521 sock = &tconn->data;
4522 p = conn_prepare_command(tconn, sock);
4523 if (!p)
e8d17b01 4524 return -EIO;
b411b363
PR
4525 memset(p, 0, sizeof(*p));
4526 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4527 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
9f5bdc33 4528 return conn_send_command(tconn, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
b411b363
PR
4529}
4530
4531/*
4532 * return values:
4533 * 1 yes, we have a valid connection
4534 * 0 oops, did not work out, please try again
4535 * -1 peer talks different language,
4536 * no point in trying again, please go standalone.
4537 */
6038178e 4538static int drbd_do_features(struct drbd_tconn *tconn)
b411b363 4539{
65d11ed6 4540 /* ASSERT current == tconn->receiver ... */
e658983a
AG
4541 struct p_connection_features *p;
4542 const int expect = sizeof(struct p_connection_features);
77351055 4543 struct packet_info pi;
a5c31904 4544 int err;
b411b363 4545
6038178e 4546 err = drbd_send_features(tconn);
e8d17b01 4547 if (err)
b411b363
PR
4548 return 0;
4549
69bc7bc3
AG
4550 err = drbd_recv_header(tconn, &pi);
4551 if (err)
b411b363
PR
4552 return 0;
4553
6038178e
AG
4554 if (pi.cmd != P_CONNECTION_FEATURES) {
4555 conn_err(tconn, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
2fcb8f30 4556 cmdname(pi.cmd), pi.cmd);
b411b363
PR
4557 return -1;
4558 }
4559
77351055 4560 if (pi.size != expect) {
6038178e 4561 conn_err(tconn, "expected ConnectionFeatures length: %u, received: %u\n",
77351055 4562 expect, pi.size);
b411b363
PR
4563 return -1;
4564 }
4565
e658983a
AG
4566 p = pi.data;
4567 err = drbd_recv_all_warn(tconn, p, expect);
a5c31904 4568 if (err)
b411b363 4569 return 0;
b411b363 4570
b411b363
PR
4571 p->protocol_min = be32_to_cpu(p->protocol_min);
4572 p->protocol_max = be32_to_cpu(p->protocol_max);
4573 if (p->protocol_max == 0)
4574 p->protocol_max = p->protocol_min;
4575
4576 if (PRO_VERSION_MAX < p->protocol_min ||
4577 PRO_VERSION_MIN > p->protocol_max)
4578 goto incompat;
4579
65d11ed6 4580 tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
b411b363 4581
65d11ed6
PR
4582 conn_info(tconn, "Handshake successful: "
4583 "Agreed network protocol version %d\n", tconn->agreed_pro_version);
b411b363
PR
4584
4585 return 1;
4586
4587 incompat:
65d11ed6 4588 conn_err(tconn, "incompatible DRBD dialects: "
b411b363
PR
4589 "I support %d-%d, peer supports %d-%d\n",
4590 PRO_VERSION_MIN, PRO_VERSION_MAX,
4591 p->protocol_min, p->protocol_max);
4592 return -1;
4593}
4594
4595#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
13e6037d 4596static int drbd_do_auth(struct drbd_tconn *tconn)
b411b363
PR
4597{
4598 dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4599 dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
b10d96cb 4600 return -1;
b411b363
PR
4601}
4602#else
4603#define CHALLENGE_LEN 64
b10d96cb
JT
4604
4605/* Return value:
4606 1 - auth succeeded,
4607 0 - failed, try again (network error),
4608 -1 - auth failed, don't try again.
4609*/
4610
13e6037d 4611static int drbd_do_auth(struct drbd_tconn *tconn)
b411b363 4612{
9f5bdc33 4613 struct drbd_socket *sock;
b411b363
PR
4614 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4615 struct scatterlist sg;
4616 char *response = NULL;
4617 char *right_response = NULL;
4618 char *peers_ch = NULL;
44ed167d
PR
4619 unsigned int key_len;
4620 char secret[SHARED_SECRET_MAX]; /* 64 byte */
b411b363
PR
4621 unsigned int resp_size;
4622 struct hash_desc desc;
77351055 4623 struct packet_info pi;
44ed167d 4624 struct net_conf *nc;
69bc7bc3 4625 int err, rv;
b411b363 4626
9f5bdc33
AG
4627 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
4628
44ed167d
PR
4629 rcu_read_lock();
4630 nc = rcu_dereference(tconn->net_conf);
4631 key_len = strlen(nc->shared_secret);
4632 memcpy(secret, nc->shared_secret, key_len);
4633 rcu_read_unlock();
4634
13e6037d 4635 desc.tfm = tconn->cram_hmac_tfm;
b411b363
PR
4636 desc.flags = 0;
4637
44ed167d 4638 rv = crypto_hash_setkey(tconn->cram_hmac_tfm, (u8 *)secret, key_len);
b411b363 4639 if (rv) {
13e6037d 4640 conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
b10d96cb 4641 rv = -1;
b411b363
PR
4642 goto fail;
4643 }
4644
4645 get_random_bytes(my_challenge, CHALLENGE_LEN);
4646
9f5bdc33
AG
4647 sock = &tconn->data;
4648 if (!conn_prepare_command(tconn, sock)) {
4649 rv = 0;
4650 goto fail;
4651 }
e658983a 4652 rv = !conn_send_command(tconn, sock, P_AUTH_CHALLENGE, 0,
9f5bdc33 4653 my_challenge, CHALLENGE_LEN);
b411b363
PR
4654 if (!rv)
4655 goto fail;
4656
69bc7bc3
AG
4657 err = drbd_recv_header(tconn, &pi);
4658 if (err) {
4659 rv = 0;
b411b363 4660 goto fail;
69bc7bc3 4661 }
b411b363 4662
77351055 4663 if (pi.cmd != P_AUTH_CHALLENGE) {
13e6037d 4664 conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
2fcb8f30 4665 cmdname(pi.cmd), pi.cmd);
b411b363
PR
4666 rv = 0;
4667 goto fail;
4668 }
4669
77351055 4670 if (pi.size > CHALLENGE_LEN * 2) {
13e6037d 4671 conn_err(tconn, "expected AuthChallenge payload too big.\n");
b10d96cb 4672 rv = -1;
b411b363
PR
4673 goto fail;
4674 }
4675
77351055 4676 peers_ch = kmalloc(pi.size, GFP_NOIO);
b411b363 4677 if (peers_ch == NULL) {
13e6037d 4678 conn_err(tconn, "kmalloc of peers_ch failed\n");
b10d96cb 4679 rv = -1;
b411b363
PR
4680 goto fail;
4681 }
4682
a5c31904
AG
4683 err = drbd_recv_all_warn(tconn, peers_ch, pi.size);
4684 if (err) {
b411b363
PR
4685 rv = 0;
4686 goto fail;
4687 }
4688
13e6037d 4689 resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
b411b363
PR
4690 response = kmalloc(resp_size, GFP_NOIO);
4691 if (response == NULL) {
13e6037d 4692 conn_err(tconn, "kmalloc of response failed\n");
b10d96cb 4693 rv = -1;
b411b363
PR
4694 goto fail;
4695 }
4696
4697 sg_init_table(&sg, 1);
77351055 4698 sg_set_buf(&sg, peers_ch, pi.size);
b411b363
PR
4699
4700 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4701 if (rv) {
13e6037d 4702 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
b10d96cb 4703 rv = -1;
b411b363
PR
4704 goto fail;
4705 }
4706
9f5bdc33
AG
4707 if (!conn_prepare_command(tconn, sock)) {
4708 rv = 0;
4709 goto fail;
4710 }
e658983a 4711 rv = !conn_send_command(tconn, sock, P_AUTH_RESPONSE, 0,
9f5bdc33 4712 response, resp_size);
b411b363
PR
4713 if (!rv)
4714 goto fail;
4715
69bc7bc3
AG
4716 err = drbd_recv_header(tconn, &pi);
4717 if (err) {
4718 rv = 0;
b411b363 4719 goto fail;
69bc7bc3 4720 }
b411b363 4721
77351055 4722 if (pi.cmd != P_AUTH_RESPONSE) {
13e6037d 4723 conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
2fcb8f30 4724 cmdname(pi.cmd), pi.cmd);
b411b363
PR
4725 rv = 0;
4726 goto fail;
4727 }
4728
77351055 4729 if (pi.size != resp_size) {
13e6037d 4730 conn_err(tconn, "expected AuthResponse payload of wrong size\n");
b411b363
PR
4731 rv = 0;
4732 goto fail;
4733 }
4734
a5c31904
AG
4735 err = drbd_recv_all_warn(tconn, response , resp_size);
4736 if (err) {
b411b363
PR
4737 rv = 0;
4738 goto fail;
4739 }
4740
4741 right_response = kmalloc(resp_size, GFP_NOIO);
2d1ee87d 4742 if (right_response == NULL) {
13e6037d 4743 conn_err(tconn, "kmalloc of right_response failed\n");
b10d96cb 4744 rv = -1;
b411b363
PR
4745 goto fail;
4746 }
4747
4748 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4749
4750 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4751 if (rv) {
13e6037d 4752 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
b10d96cb 4753 rv = -1;
b411b363
PR
4754 goto fail;
4755 }
4756
4757 rv = !memcmp(response, right_response, resp_size);
4758
4759 if (rv)
44ed167d
PR
4760 conn_info(tconn, "Peer authenticated using %d bytes HMAC\n",
4761 resp_size);
b10d96cb
JT
4762 else
4763 rv = -1;
b411b363
PR
4764
4765 fail:
4766 kfree(peers_ch);
4767 kfree(response);
4768 kfree(right_response);
4769
4770 return rv;
4771}
4772#endif
4773
4774int drbdd_init(struct drbd_thread *thi)
4775{
392c8801 4776 struct drbd_tconn *tconn = thi->tconn;
b411b363
PR
4777 int h;
4778
4d641dd7 4779 conn_info(tconn, "receiver (re)started\n");
b411b363
PR
4780
4781 do {
81fa2e67 4782 h = conn_connect(tconn);
b411b363 4783 if (h == 0) {
81fa2e67 4784 conn_disconnect(tconn);
20ee6390 4785 schedule_timeout_interruptible(HZ);
b411b363
PR
4786 }
4787 if (h == -1) {
4d641dd7 4788 conn_warn(tconn, "Discarding network configuration.\n");
bbeb641c 4789 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363
PR
4790 }
4791 } while (h == 0);
4792
91fd4dad
PR
4793 if (h > 0)
4794 drbdd(tconn);
b411b363 4795
81fa2e67 4796 conn_disconnect(tconn);
b411b363 4797
4d641dd7 4798 conn_info(tconn, "receiver terminated\n");
b411b363
PR
4799 return 0;
4800}
4801
4802/* ********* acknowledge sender ******** */
4803
e05e1e59 4804static int got_conn_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi)
e4f78ede 4805{
e658983a 4806 struct p_req_state_reply *p = pi->data;
e4f78ede
PR
4807 int retcode = be32_to_cpu(p->retcode);
4808
4809 if (retcode >= SS_SUCCESS) {
4810 set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags);
4811 } else {
4812 set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags);
4813 conn_err(tconn, "Requested state change failed by peer: %s (%d)\n",
4814 drbd_set_st_err_str(retcode), retcode);
4815 }
4816 wake_up(&tconn->ping_wait);
4817
2735a594 4818 return 0;
e4f78ede
PR
4819}
4820
1952e916 4821static int got_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 4822{
1952e916 4823 struct drbd_conf *mdev;
e658983a 4824 struct p_req_state_reply *p = pi->data;
b411b363
PR
4825 int retcode = be32_to_cpu(p->retcode);
4826
1952e916
AG
4827 mdev = vnr_to_mdev(tconn, pi->vnr);
4828 if (!mdev)
2735a594 4829 return -EIO;
1952e916 4830
4d0fc3fd
PR
4831 if (test_bit(CONN_WD_ST_CHG_REQ, &tconn->flags)) {
4832 D_ASSERT(tconn->agreed_pro_version < 100);
4833 return got_conn_RqSReply(tconn, pi);
4834 }
4835
e4f78ede
PR
4836 if (retcode >= SS_SUCCESS) {
4837 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4838 } else {
4839 set_bit(CL_ST_CHG_FAIL, &mdev->flags);
4840 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
4841 drbd_set_st_err_str(retcode), retcode);
b411b363 4842 }
e4f78ede
PR
4843 wake_up(&mdev->state_wait);
4844
2735a594 4845 return 0;
b411b363
PR
4846}
4847
e05e1e59 4848static int got_Ping(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 4849{
2735a594 4850 return drbd_send_ping_ack(tconn);
b411b363
PR
4851
4852}
4853
e05e1e59 4854static int got_PingAck(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363
PR
4855{
4856 /* restore idle timeout */
2a67d8b9
PR
4857 tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
4858 if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags))
4859 wake_up(&tconn->ping_wait);
b411b363 4860
2735a594 4861 return 0;
b411b363
PR
4862}
4863
1952e916 4864static int got_IsInSync(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 4865{
1952e916 4866 struct drbd_conf *mdev;
e658983a 4867 struct p_block_ack *p = pi->data;
b411b363
PR
4868 sector_t sector = be64_to_cpu(p->sector);
4869 int blksize = be32_to_cpu(p->blksize);
4870
1952e916
AG
4871 mdev = vnr_to_mdev(tconn, pi->vnr);
4872 if (!mdev)
2735a594 4873 return -EIO;
1952e916 4874
31890f4a 4875 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
b411b363
PR
4876
4877 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4878
1d53f09e
LE
4879 if (get_ldev(mdev)) {
4880 drbd_rs_complete_io(mdev, sector);
4881 drbd_set_in_sync(mdev, sector, blksize);
4882 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4883 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4884 put_ldev(mdev);
4885 }
b411b363 4886 dec_rs_pending(mdev);
778f271d 4887 atomic_add(blksize >> 9, &mdev->rs_sect_in);
b411b363 4888
2735a594 4889 return 0;
b411b363
PR
4890}
4891
bc9c5c41
AG
4892static int
4893validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4894 struct rb_root *root, const char *func,
4895 enum drbd_req_event what, bool missing_ok)
b411b363
PR
4896{
4897 struct drbd_request *req;
4898 struct bio_and_error m;
4899
87eeee41 4900 spin_lock_irq(&mdev->tconn->req_lock);
bc9c5c41 4901 req = find_request(mdev, root, id, sector, missing_ok, func);
b411b363 4902 if (unlikely(!req)) {
87eeee41 4903 spin_unlock_irq(&mdev->tconn->req_lock);
85997675 4904 return -EIO;
b411b363
PR
4905 }
4906 __req_mod(req, what, &m);
87eeee41 4907 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
4908
4909 if (m.bio)
4910 complete_master_bio(mdev, &m);
85997675 4911 return 0;
b411b363
PR
4912}
4913
1952e916 4914static int got_BlockAck(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 4915{
1952e916 4916 struct drbd_conf *mdev;
e658983a 4917 struct p_block_ack *p = pi->data;
b411b363
PR
4918 sector_t sector = be64_to_cpu(p->sector);
4919 int blksize = be32_to_cpu(p->blksize);
4920 enum drbd_req_event what;
4921
1952e916
AG
4922 mdev = vnr_to_mdev(tconn, pi->vnr);
4923 if (!mdev)
2735a594 4924 return -EIO;
1952e916 4925
b411b363
PR
4926 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4927
579b57ed 4928 if (p->block_id == ID_SYNCER) {
b411b363
PR
4929 drbd_set_in_sync(mdev, sector, blksize);
4930 dec_rs_pending(mdev);
2735a594 4931 return 0;
b411b363 4932 }
e05e1e59 4933 switch (pi->cmd) {
b411b363 4934 case P_RS_WRITE_ACK:
8554df1c 4935 what = WRITE_ACKED_BY_PEER_AND_SIS;
b411b363
PR
4936 break;
4937 case P_WRITE_ACK:
8554df1c 4938 what = WRITE_ACKED_BY_PEER;
b411b363
PR
4939 break;
4940 case P_RECV_ACK:
8554df1c 4941 what = RECV_ACKED_BY_PEER;
b411b363 4942 break;
7be8da07 4943 case P_DISCARD_WRITE:
7be8da07
AG
4944 what = DISCARD_WRITE;
4945 break;
4946 case P_RETRY_WRITE:
7be8da07 4947 what = POSTPONE_WRITE;
b411b363
PR
4948 break;
4949 default:
2735a594 4950 BUG();
b411b363
PR
4951 }
4952
2735a594
AG
4953 return validate_req_change_req_state(mdev, p->block_id, sector,
4954 &mdev->write_requests, __func__,
4955 what, false);
b411b363
PR
4956}
4957
1952e916 4958static int got_NegAck(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 4959{
1952e916 4960 struct drbd_conf *mdev;
e658983a 4961 struct p_block_ack *p = pi->data;
b411b363 4962 sector_t sector = be64_to_cpu(p->sector);
2deb8336 4963 int size = be32_to_cpu(p->blksize);
85997675 4964 int err;
b411b363 4965
1952e916
AG
4966 mdev = vnr_to_mdev(tconn, pi->vnr);
4967 if (!mdev)
2735a594 4968 return -EIO;
1952e916 4969
b411b363
PR
4970 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4971
579b57ed 4972 if (p->block_id == ID_SYNCER) {
b411b363
PR
4973 dec_rs_pending(mdev);
4974 drbd_rs_failed_io(mdev, sector, size);
2735a594 4975 return 0;
b411b363 4976 }
2deb8336 4977
85997675
AG
4978 err = validate_req_change_req_state(mdev, p->block_id, sector,
4979 &mdev->write_requests, __func__,
303d1448 4980 NEG_ACKED, true);
85997675 4981 if (err) {
c3afd8f5
AG
4982 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
4983 The master bio might already be completed, therefore the
4984 request is no longer in the collision hash. */
4985 /* In Protocol B we might already have got a P_RECV_ACK
4986 but then get a P_NEG_ACK afterwards. */
c3afd8f5 4987 drbd_set_out_of_sync(mdev, sector, size);
2deb8336 4988 }
2735a594 4989 return 0;
b411b363
PR
4990}
4991
1952e916 4992static int got_NegDReply(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 4993{
1952e916 4994 struct drbd_conf *mdev;
e658983a 4995 struct p_block_ack *p = pi->data;
b411b363
PR
4996 sector_t sector = be64_to_cpu(p->sector);
4997
1952e916
AG
4998 mdev = vnr_to_mdev(tconn, pi->vnr);
4999 if (!mdev)
2735a594 5000 return -EIO;
1952e916 5001
b411b363 5002 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
7be8da07 5003
380207d0 5004 dev_err(DEV, "Got NegDReply; Sector %llus, len %u.\n",
b411b363
PR
5005 (unsigned long long)sector, be32_to_cpu(p->blksize));
5006
2735a594
AG
5007 return validate_req_change_req_state(mdev, p->block_id, sector,
5008 &mdev->read_requests, __func__,
5009 NEG_ACKED, false);
b411b363
PR
5010}
5011
1952e916 5012static int got_NegRSDReply(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 5013{
1952e916 5014 struct drbd_conf *mdev;
b411b363
PR
5015 sector_t sector;
5016 int size;
e658983a 5017 struct p_block_ack *p = pi->data;
1952e916
AG
5018
5019 mdev = vnr_to_mdev(tconn, pi->vnr);
5020 if (!mdev)
2735a594 5021 return -EIO;
b411b363
PR
5022
5023 sector = be64_to_cpu(p->sector);
5024 size = be32_to_cpu(p->blksize);
b411b363
PR
5025
5026 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
5027
5028 dec_rs_pending(mdev);
5029
5030 if (get_ldev_if_state(mdev, D_FAILED)) {
5031 drbd_rs_complete_io(mdev, sector);
e05e1e59 5032 switch (pi->cmd) {
d612d309
PR
5033 case P_NEG_RS_DREPLY:
5034 drbd_rs_failed_io(mdev, sector, size);
5035 case P_RS_CANCEL:
5036 break;
5037 default:
2735a594 5038 BUG();
d612d309 5039 }
b411b363
PR
5040 put_ldev(mdev);
5041 }
5042
2735a594 5043 return 0;
b411b363
PR
5044}
5045
1952e916 5046static int got_BarrierAck(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 5047{
1952e916 5048 struct drbd_conf *mdev;
e658983a 5049 struct p_barrier_ack *p = pi->data;
1952e916
AG
5050
5051 mdev = vnr_to_mdev(tconn, pi->vnr);
5052 if (!mdev)
2735a594 5053 return -EIO;
b411b363 5054
2f5cdd0b 5055 tl_release(mdev->tconn, p->barrier, be32_to_cpu(p->set_size));
b411b363 5056
c4752ef1
PR
5057 if (mdev->state.conn == C_AHEAD &&
5058 atomic_read(&mdev->ap_in_flight) == 0 &&
36baf611 5059 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags)) {
370a43e7
PR
5060 mdev->start_resync_timer.expires = jiffies + HZ;
5061 add_timer(&mdev->start_resync_timer);
c4752ef1
PR
5062 }
5063
2735a594 5064 return 0;
b411b363
PR
5065}
5066
1952e916 5067static int got_OVResult(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 5068{
1952e916 5069 struct drbd_conf *mdev;
e658983a 5070 struct p_block_ack *p = pi->data;
b411b363
PR
5071 struct drbd_work *w;
5072 sector_t sector;
5073 int size;
5074
1952e916
AG
5075 mdev = vnr_to_mdev(tconn, pi->vnr);
5076 if (!mdev)
2735a594 5077 return -EIO;
1952e916 5078
b411b363
PR
5079 sector = be64_to_cpu(p->sector);
5080 size = be32_to_cpu(p->blksize);
5081
5082 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
5083
5084 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
8f7bed77 5085 drbd_ov_out_of_sync_found(mdev, sector, size);
b411b363 5086 else
8f7bed77 5087 ov_out_of_sync_print(mdev);
b411b363 5088
1d53f09e 5089 if (!get_ldev(mdev))
2735a594 5090 return 0;
1d53f09e 5091
b411b363
PR
5092 drbd_rs_complete_io(mdev, sector);
5093 dec_rs_pending(mdev);
5094
ea5442af
LE
5095 --mdev->ov_left;
5096
5097 /* let's advance progress step marks only for every other megabyte */
5098 if ((mdev->ov_left & 0x200) == 0x200)
5099 drbd_advance_rs_marks(mdev, mdev->ov_left);
5100
5101 if (mdev->ov_left == 0) {
b411b363
PR
5102 w = kmalloc(sizeof(*w), GFP_NOIO);
5103 if (w) {
5104 w->cb = w_ov_finished;
a21e9298 5105 w->mdev = mdev;
e42325a5 5106 drbd_queue_work_front(&mdev->tconn->data.work, w);
b411b363
PR
5107 } else {
5108 dev_err(DEV, "kmalloc(w) failed.");
8f7bed77 5109 ov_out_of_sync_print(mdev);
b411b363
PR
5110 drbd_resync_finished(mdev);
5111 }
5112 }
1d53f09e 5113 put_ldev(mdev);
2735a594 5114 return 0;
b411b363
PR
5115}
5116
1952e916 5117static int got_skip(struct drbd_tconn *tconn, struct packet_info *pi)
0ced55a3 5118{
2735a594 5119 return 0;
0ced55a3
PR
5120}
5121
a990be46 5122static int tconn_finish_peer_reqs(struct drbd_tconn *tconn)
32862ec7 5123{
082a3439 5124 struct drbd_conf *mdev;
c141ebda 5125 int vnr, not_empty = 0;
32862ec7
PR
5126
5127 do {
5128 clear_bit(SIGNAL_ASENDER, &tconn->flags);
5129 flush_signals(current);
c141ebda
PR
5130
5131 rcu_read_lock();
5132 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
5133 kref_get(&mdev->kref);
5134 rcu_read_unlock();
d3fcb490 5135 if (drbd_finish_peer_reqs(mdev)) {
c141ebda
PR
5136 kref_put(&mdev->kref, &drbd_minor_destroy);
5137 return 1;
d3fcb490 5138 }
c141ebda
PR
5139 kref_put(&mdev->kref, &drbd_minor_destroy);
5140 rcu_read_lock();
082a3439 5141 }
32862ec7 5142 set_bit(SIGNAL_ASENDER, &tconn->flags);
082a3439
PR
5143
5144 spin_lock_irq(&tconn->req_lock);
c141ebda 5145 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
082a3439
PR
5146 not_empty = !list_empty(&mdev->done_ee);
5147 if (not_empty)
5148 break;
5149 }
5150 spin_unlock_irq(&tconn->req_lock);
c141ebda 5151 rcu_read_unlock();
32862ec7
PR
5152 } while (not_empty);
5153
5154 return 0;
5155}
5156
7201b972
AG
5157struct asender_cmd {
5158 size_t pkt_size;
1952e916 5159 int (*fn)(struct drbd_tconn *tconn, struct packet_info *);
7201b972
AG
5160};
5161
5162static struct asender_cmd asender_tbl[] = {
e658983a
AG
5163 [P_PING] = { 0, got_Ping },
5164 [P_PING_ACK] = { 0, got_PingAck },
1952e916
AG
5165 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5166 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5167 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5168 [P_DISCARD_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
5169 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5170 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
5171 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
5172 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5173 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5174 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5175 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
5176 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
5177 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5178 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5179 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
7201b972
AG
5180};
5181
b411b363
PR
5182int drbd_asender(struct drbd_thread *thi)
5183{
392c8801 5184 struct drbd_tconn *tconn = thi->tconn;
b411b363 5185 struct asender_cmd *cmd = NULL;
77351055 5186 struct packet_info pi;
257d0af6 5187 int rv;
e658983a 5188 void *buf = tconn->meta.rbuf;
b411b363 5189 int received = 0;
52b061a4
AG
5190 unsigned int header_size = drbd_header_size(tconn);
5191 int expect = header_size;
44ed167d
PR
5192 bool ping_timeout_active = false;
5193 struct net_conf *nc;
bb77d34e 5194 int ping_timeo, tcp_cork, ping_int;
b411b363 5195
b411b363
PR
5196 current->policy = SCHED_RR; /* Make this a realtime task! */
5197 current->rt_priority = 2; /* more important than all other tasks */
5198
e77a0a5c 5199 while (get_t_state(thi) == RUNNING) {
80822284 5200 drbd_thread_current_set_cpu(thi);
44ed167d
PR
5201
5202 rcu_read_lock();
5203 nc = rcu_dereference(tconn->net_conf);
5204 ping_timeo = nc->ping_timeo;
bb77d34e 5205 tcp_cork = nc->tcp_cork;
44ed167d
PR
5206 ping_int = nc->ping_int;
5207 rcu_read_unlock();
5208
32862ec7 5209 if (test_and_clear_bit(SEND_PING, &tconn->flags)) {
a17647aa 5210 if (drbd_send_ping(tconn)) {
32862ec7 5211 conn_err(tconn, "drbd_send_ping has failed\n");
841ce241
AG
5212 goto reconnect;
5213 }
44ed167d
PR
5214 tconn->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
5215 ping_timeout_active = true;
b411b363
PR
5216 }
5217
32862ec7
PR
5218 /* TODO: conditionally cork; it may hurt latency if we cork without
5219 much to send */
bb77d34e 5220 if (tcp_cork)
32862ec7 5221 drbd_tcp_cork(tconn->meta.socket);
a990be46
AG
5222 if (tconn_finish_peer_reqs(tconn)) {
5223 conn_err(tconn, "tconn_finish_peer_reqs() failed\n");
32862ec7 5224 goto reconnect;
082a3439 5225 }
b411b363 5226 /* but unconditionally uncork unless disabled */
bb77d34e 5227 if (tcp_cork)
32862ec7 5228 drbd_tcp_uncork(tconn->meta.socket);
b411b363
PR
5229
5230 /* short circuit, recv_msg would return EINTR anyways. */
5231 if (signal_pending(current))
5232 continue;
5233
32862ec7
PR
5234 rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0);
5235 clear_bit(SIGNAL_ASENDER, &tconn->flags);
b411b363
PR
5236
5237 flush_signals(current);
5238
5239 /* Note:
5240 * -EINTR (on meta) we got a signal
5241 * -EAGAIN (on meta) rcvtimeo expired
5242 * -ECONNRESET other side closed the connection
5243 * -ERESTARTSYS (on data) we got a signal
5244 * rv < 0 other than above: unexpected error!
5245 * rv == expected: full header or command
5246 * rv < expected: "woken" by signal during receive
5247 * rv == 0 : "connection shut down by peer"
5248 */
5249 if (likely(rv > 0)) {
5250 received += rv;
5251 buf += rv;
5252 } else if (rv == 0) {
32862ec7 5253 conn_err(tconn, "meta connection shut down by peer.\n");
b411b363
PR
5254 goto reconnect;
5255 } else if (rv == -EAGAIN) {
cb6518cb
LE
5256 /* If the data socket received something meanwhile,
5257 * that is good enough: peer is still alive. */
32862ec7
PR
5258 if (time_after(tconn->last_received,
5259 jiffies - tconn->meta.socket->sk->sk_rcvtimeo))
cb6518cb 5260 continue;
f36af18c 5261 if (ping_timeout_active) {
32862ec7 5262 conn_err(tconn, "PingAck did not arrive in time.\n");
b411b363
PR
5263 goto reconnect;
5264 }
32862ec7 5265 set_bit(SEND_PING, &tconn->flags);
b411b363
PR
5266 continue;
5267 } else if (rv == -EINTR) {
5268 continue;
5269 } else {
32862ec7 5270 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
b411b363
PR
5271 goto reconnect;
5272 }
5273
5274 if (received == expect && cmd == NULL) {
e658983a 5275 if (decode_header(tconn, tconn->meta.rbuf, &pi))
b411b363 5276 goto reconnect;
7201b972 5277 cmd = &asender_tbl[pi.cmd];
1952e916 5278 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
2fcb8f30
AG
5279 conn_err(tconn, "Unexpected meta packet %s (0x%04x)\n",
5280 cmdname(pi.cmd), pi.cmd);
b411b363
PR
5281 goto disconnect;
5282 }
e658983a 5283 expect = header_size + cmd->pkt_size;
52b061a4 5284 if (pi.size != expect - header_size) {
32862ec7 5285 conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n",
77351055 5286 pi.cmd, pi.size);
b411b363 5287 goto reconnect;
257d0af6 5288 }
b411b363
PR
5289 }
5290 if (received == expect) {
2735a594 5291 bool err;
a4fbda8e 5292
2735a594
AG
5293 err = cmd->fn(tconn, &pi);
5294 if (err) {
1952e916 5295 conn_err(tconn, "%pf failed\n", cmd->fn);
b411b363 5296 goto reconnect;
1952e916 5297 }
b411b363 5298
a4fbda8e
PR
5299 tconn->last_received = jiffies;
5300
44ed167d
PR
5301 if (cmd == &asender_tbl[P_PING_ACK]) {
5302 /* restore idle timeout */
5303 tconn->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
5304 ping_timeout_active = false;
5305 }
f36af18c 5306
e658983a 5307 buf = tconn->meta.rbuf;
b411b363 5308 received = 0;
52b061a4 5309 expect = header_size;
b411b363
PR
5310 cmd = NULL;
5311 }
5312 }
5313
5314 if (0) {
5315reconnect:
bbeb641c 5316 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
b411b363
PR
5317 }
5318 if (0) {
5319disconnect:
bbeb641c 5320 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363 5321 }
32862ec7 5322 clear_bit(SIGNAL_ASENDER, &tconn->flags);
b411b363 5323
32862ec7 5324 conn_info(tconn, "asender terminated\n");
b411b363
PR
5325
5326 return 0;
5327}
This page took 0.501156 seconds and 5 git commands to generate.