drbd: refactor use of first_peer_device()
[deliverable/linux.git] / drivers / block / drbd / drbd_receiver.c
CommitLineData
b411b363
PR
1/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
b411b363
PR
26#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
b411b363
PR
31#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
b411b363
PR
39#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
b411b363
PR
44#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
a3603a6e 47#include "drbd_protocol.h"
b411b363 48#include "drbd_req.h"
b411b363
PR
49#include "drbd_vli.h"
50
20c68fde
LE
51#define PRO_FEATURES (FF_TRIM)
52
77351055
PR
53struct packet_info {
54 enum drbd_packet cmd;
e2857216
AG
55 unsigned int size;
56 unsigned int vnr;
e658983a 57 void *data;
77351055
PR
58};
59
b411b363
PR
60enum finish_epoch {
61 FE_STILL_LIVE,
62 FE_DESTROYED,
63 FE_RECYCLED,
64};
65
bde89a9e
AG
66static int drbd_do_features(struct drbd_connection *connection);
67static int drbd_do_auth(struct drbd_connection *connection);
69a22773 68static int drbd_disconnected(struct drbd_peer_device *);
a0fb3c47 69static void conn_wait_active_ee_empty(struct drbd_connection *connection);
bde89a9e 70static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
99920dc5 71static int e_end_block(struct drbd_work *, int);
b411b363 72
b411b363
PR
73
74#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
75
45bb912b
LE
76/*
77 * some helper functions to deal with single linked page lists,
78 * page->private being our "next" pointer.
79 */
80
81/* If at least n pages are linked at head, get n pages off.
82 * Otherwise, don't modify head, and return NULL.
83 * Locking is the responsibility of the caller.
84 */
85static struct page *page_chain_del(struct page **head, int n)
86{
87 struct page *page;
88 struct page *tmp;
89
90 BUG_ON(!n);
91 BUG_ON(!head);
92
93 page = *head;
23ce4227
PR
94
95 if (!page)
96 return NULL;
97
45bb912b
LE
98 while (page) {
99 tmp = page_chain_next(page);
100 if (--n == 0)
101 break; /* found sufficient pages */
102 if (tmp == NULL)
103 /* insufficient pages, don't use any of them. */
104 return NULL;
105 page = tmp;
106 }
107
108 /* add end of list marker for the returned list */
109 set_page_private(page, 0);
110 /* actual return value, and adjustment of head */
111 page = *head;
112 *head = tmp;
113 return page;
114}
115
116/* may be used outside of locks to find the tail of a (usually short)
117 * "private" page chain, before adding it back to a global chain head
118 * with page_chain_add() under a spinlock. */
119static struct page *page_chain_tail(struct page *page, int *len)
120{
121 struct page *tmp;
122 int i = 1;
123 while ((tmp = page_chain_next(page)))
124 ++i, page = tmp;
125 if (len)
126 *len = i;
127 return page;
128}
129
130static int page_chain_free(struct page *page)
131{
132 struct page *tmp;
133 int i = 0;
134 page_chain_for_each_safe(page, tmp) {
135 put_page(page);
136 ++i;
137 }
138 return i;
139}
140
141static void page_chain_add(struct page **head,
142 struct page *chain_first, struct page *chain_last)
143{
144#if 1
145 struct page *tmp;
146 tmp = page_chain_tail(chain_first, NULL);
147 BUG_ON(tmp != chain_last);
148#endif
149
150 /* add chain to head */
151 set_page_private(chain_last, (unsigned long)*head);
152 *head = chain_first;
153}
154
b30ab791 155static struct page *__drbd_alloc_pages(struct drbd_device *device,
18c2d522 156 unsigned int number)
b411b363
PR
157{
158 struct page *page = NULL;
45bb912b 159 struct page *tmp = NULL;
18c2d522 160 unsigned int i = 0;
b411b363
PR
161
162 /* Yes, testing drbd_pp_vacant outside the lock is racy.
163 * So what. It saves a spin_lock. */
45bb912b 164 if (drbd_pp_vacant >= number) {
b411b363 165 spin_lock(&drbd_pp_lock);
45bb912b
LE
166 page = page_chain_del(&drbd_pp_pool, number);
167 if (page)
168 drbd_pp_vacant -= number;
b411b363 169 spin_unlock(&drbd_pp_lock);
45bb912b
LE
170 if (page)
171 return page;
b411b363 172 }
45bb912b 173
b411b363
PR
174 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
175 * "criss-cross" setup, that might cause write-out on some other DRBD,
176 * which in turn might block on the other node at this very place. */
45bb912b
LE
177 for (i = 0; i < number; i++) {
178 tmp = alloc_page(GFP_TRY);
179 if (!tmp)
180 break;
181 set_page_private(tmp, (unsigned long)page);
182 page = tmp;
183 }
184
185 if (i == number)
186 return page;
187
188 /* Not enough pages immediately available this time.
c37c8ecf 189 * No need to jump around here, drbd_alloc_pages will retry this
45bb912b
LE
190 * function "soon". */
191 if (page) {
192 tmp = page_chain_tail(page, NULL);
193 spin_lock(&drbd_pp_lock);
194 page_chain_add(&drbd_pp_pool, page, tmp);
195 drbd_pp_vacant += i;
196 spin_unlock(&drbd_pp_lock);
197 }
198 return NULL;
b411b363
PR
199}
200
b30ab791 201static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
a990be46 202 struct list_head *to_be_freed)
b411b363 203{
a8cd15ba 204 struct drbd_peer_request *peer_req, *tmp;
b411b363
PR
205
206 /* The EEs are always appended to the end of the list. Since
207 they are sent in order over the wire, they have to finish
208 in order. As soon as we see the first not finished we can
209 stop to examine the list... */
210
a8cd15ba 211 list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
045417f7 212 if (drbd_peer_req_has_active_page(peer_req))
b411b363 213 break;
a8cd15ba 214 list_move(&peer_req->w.list, to_be_freed);
b411b363
PR
215 }
216}
217
b30ab791 218static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
b411b363
PR
219{
220 LIST_HEAD(reclaimed);
db830c46 221 struct drbd_peer_request *peer_req, *t;
b411b363 222
0500813f 223 spin_lock_irq(&device->resource->req_lock);
b30ab791 224 reclaim_finished_net_peer_reqs(device, &reclaimed);
0500813f 225 spin_unlock_irq(&device->resource->req_lock);
b411b363 226
a8cd15ba 227 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
b30ab791 228 drbd_free_net_peer_req(device, peer_req);
b411b363
PR
229}
230
231/**
c37c8ecf 232 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
b30ab791 233 * @device: DRBD device.
45bb912b
LE
234 * @number: number of pages requested
235 * @retry: whether to retry, if not enough pages are available right now
236 *
237 * Tries to allocate number pages, first from our own page pool, then from
0e49d7b0 238 * the kernel.
45bb912b 239 * Possibly retry until DRBD frees sufficient pages somewhere else.
b411b363 240 *
0e49d7b0
LE
241 * If this allocation would exceed the max_buffers setting, we throttle
242 * allocation (schedule_timeout) to give the system some room to breathe.
243 *
244 * We do not use max-buffers as hard limit, because it could lead to
245 * congestion and further to a distributed deadlock during online-verify or
246 * (checksum based) resync, if the max-buffers, socket buffer sizes and
247 * resync-rate settings are mis-configured.
248 *
45bb912b 249 * Returns a page chain linked via page->private.
b411b363 250 */
69a22773 251struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
c37c8ecf 252 bool retry)
b411b363 253{
69a22773 254 struct drbd_device *device = peer_device->device;
b411b363 255 struct page *page = NULL;
44ed167d 256 struct net_conf *nc;
b411b363 257 DEFINE_WAIT(wait);
0e49d7b0 258 unsigned int mxb;
b411b363 259
44ed167d 260 rcu_read_lock();
69a22773 261 nc = rcu_dereference(peer_device->connection->net_conf);
44ed167d
PR
262 mxb = nc ? nc->max_buffers : 1000000;
263 rcu_read_unlock();
264
b30ab791
AG
265 if (atomic_read(&device->pp_in_use) < mxb)
266 page = __drbd_alloc_pages(device, number);
b411b363 267
45bb912b 268 while (page == NULL) {
b411b363
PR
269 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
270
b30ab791 271 drbd_kick_lo_and_reclaim_net(device);
b411b363 272
b30ab791
AG
273 if (atomic_read(&device->pp_in_use) < mxb) {
274 page = __drbd_alloc_pages(device, number);
b411b363
PR
275 if (page)
276 break;
277 }
278
279 if (!retry)
280 break;
281
282 if (signal_pending(current)) {
d0180171 283 drbd_warn(device, "drbd_alloc_pages interrupted!\n");
b411b363
PR
284 break;
285 }
286
0e49d7b0
LE
287 if (schedule_timeout(HZ/10) == 0)
288 mxb = UINT_MAX;
b411b363
PR
289 }
290 finish_wait(&drbd_pp_wait, &wait);
291
45bb912b 292 if (page)
b30ab791 293 atomic_add(number, &device->pp_in_use);
b411b363
PR
294 return page;
295}
296
c37c8ecf 297/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
0500813f 298 * Is also used from inside an other spin_lock_irq(&resource->req_lock);
45bb912b
LE
299 * Either links the page chain back to the global pool,
300 * or returns all pages to the system. */
b30ab791 301static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
b411b363 302{
b30ab791 303 atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
b411b363 304 int i;
435f0740 305
a73ff323
LE
306 if (page == NULL)
307 return;
308
81a5d60e 309 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
45bb912b
LE
310 i = page_chain_free(page);
311 else {
312 struct page *tmp;
313 tmp = page_chain_tail(page, &i);
314 spin_lock(&drbd_pp_lock);
315 page_chain_add(&drbd_pp_pool, page, tmp);
316 drbd_pp_vacant += i;
317 spin_unlock(&drbd_pp_lock);
b411b363 318 }
435f0740 319 i = atomic_sub_return(i, a);
45bb912b 320 if (i < 0)
d0180171 321 drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
435f0740 322 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
b411b363
PR
323 wake_up(&drbd_pp_wait);
324}
325
326/*
327You need to hold the req_lock:
328 _drbd_wait_ee_list_empty()
329
330You must not have the req_lock:
3967deb1 331 drbd_free_peer_req()
0db55363 332 drbd_alloc_peer_req()
7721f567 333 drbd_free_peer_reqs()
b411b363 334 drbd_ee_fix_bhs()
a990be46 335 drbd_finish_peer_reqs()
b411b363
PR
336 drbd_clear_done_ee()
337 drbd_wait_ee_list_empty()
338*/
339
f6ffca9f 340struct drbd_peer_request *
69a22773 341drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
a0fb3c47 342 unsigned int data_size, bool has_payload, gfp_t gfp_mask) __must_hold(local)
b411b363 343{
69a22773 344 struct drbd_device *device = peer_device->device;
db830c46 345 struct drbd_peer_request *peer_req;
a73ff323 346 struct page *page = NULL;
45bb912b 347 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
b411b363 348
b30ab791 349 if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
b411b363
PR
350 return NULL;
351
db830c46
AG
352 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
353 if (!peer_req) {
b411b363 354 if (!(gfp_mask & __GFP_NOWARN))
d0180171 355 drbd_err(device, "%s: allocation failed\n", __func__);
b411b363
PR
356 return NULL;
357 }
358
a0fb3c47 359 if (has_payload && data_size) {
69a22773 360 page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT));
a73ff323
LE
361 if (!page)
362 goto fail;
363 }
b411b363 364
db830c46
AG
365 drbd_clear_interval(&peer_req->i);
366 peer_req->i.size = data_size;
367 peer_req->i.sector = sector;
368 peer_req->i.local = false;
369 peer_req->i.waiting = false;
370
371 peer_req->epoch = NULL;
a8cd15ba 372 peer_req->peer_device = peer_device;
db830c46
AG
373 peer_req->pages = page;
374 atomic_set(&peer_req->pending_bios, 0);
375 peer_req->flags = 0;
9a8e7753
AG
376 /*
377 * The block_id is opaque to the receiver. It is not endianness
378 * converted, and sent back to the sender unchanged.
379 */
db830c46 380 peer_req->block_id = id;
b411b363 381
db830c46 382 return peer_req;
b411b363 383
45bb912b 384 fail:
db830c46 385 mempool_free(peer_req, drbd_ee_mempool);
b411b363
PR
386 return NULL;
387}
388
b30ab791 389void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
f6ffca9f 390 int is_net)
b411b363 391{
db830c46
AG
392 if (peer_req->flags & EE_HAS_DIGEST)
393 kfree(peer_req->digest);
b30ab791 394 drbd_free_pages(device, peer_req->pages, is_net);
0b0ba1ef
AG
395 D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
396 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
db830c46 397 mempool_free(peer_req, drbd_ee_mempool);
b411b363
PR
398}
399
b30ab791 400int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
b411b363
PR
401{
402 LIST_HEAD(work_list);
db830c46 403 struct drbd_peer_request *peer_req, *t;
b411b363 404 int count = 0;
b30ab791 405 int is_net = list == &device->net_ee;
b411b363 406
0500813f 407 spin_lock_irq(&device->resource->req_lock);
b411b363 408 list_splice_init(list, &work_list);
0500813f 409 spin_unlock_irq(&device->resource->req_lock);
b411b363 410
a8cd15ba 411 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
b30ab791 412 __drbd_free_peer_req(device, peer_req, is_net);
b411b363
PR
413 count++;
414 }
415 return count;
416}
417
b411b363 418/*
a990be46 419 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
b411b363 420 */
b30ab791 421static int drbd_finish_peer_reqs(struct drbd_device *device)
b411b363
PR
422{
423 LIST_HEAD(work_list);
424 LIST_HEAD(reclaimed);
db830c46 425 struct drbd_peer_request *peer_req, *t;
e2b3032b 426 int err = 0;
b411b363 427
0500813f 428 spin_lock_irq(&device->resource->req_lock);
b30ab791
AG
429 reclaim_finished_net_peer_reqs(device, &reclaimed);
430 list_splice_init(&device->done_ee, &work_list);
0500813f 431 spin_unlock_irq(&device->resource->req_lock);
b411b363 432
a8cd15ba 433 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
b30ab791 434 drbd_free_net_peer_req(device, peer_req);
b411b363
PR
435
436 /* possible callbacks here:
d4dabbe2 437 * e_end_block, and e_end_resync_block, e_send_superseded.
b411b363
PR
438 * all ignore the last argument.
439 */
a8cd15ba 440 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
e2b3032b
AG
441 int err2;
442
b411b363 443 /* list_del not necessary, next/prev members not touched */
a8cd15ba 444 err2 = peer_req->w.cb(&peer_req->w, !!err);
e2b3032b
AG
445 if (!err)
446 err = err2;
b30ab791 447 drbd_free_peer_req(device, peer_req);
b411b363 448 }
b30ab791 449 wake_up(&device->ee_wait);
b411b363 450
e2b3032b 451 return err;
b411b363
PR
452}
453
b30ab791 454static void _drbd_wait_ee_list_empty(struct drbd_device *device,
d4da1537 455 struct list_head *head)
b411b363
PR
456{
457 DEFINE_WAIT(wait);
458
459 /* avoids spin_lock/unlock
460 * and calling prepare_to_wait in the fast path */
461 while (!list_empty(head)) {
b30ab791 462 prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
0500813f 463 spin_unlock_irq(&device->resource->req_lock);
7eaceacc 464 io_schedule();
b30ab791 465 finish_wait(&device->ee_wait, &wait);
0500813f 466 spin_lock_irq(&device->resource->req_lock);
b411b363
PR
467 }
468}
469
b30ab791 470static void drbd_wait_ee_list_empty(struct drbd_device *device,
d4da1537 471 struct list_head *head)
b411b363 472{
0500813f 473 spin_lock_irq(&device->resource->req_lock);
b30ab791 474 _drbd_wait_ee_list_empty(device, head);
0500813f 475 spin_unlock_irq(&device->resource->req_lock);
b411b363
PR
476}
477
dbd9eea0 478static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
b411b363 479{
b411b363
PR
480 struct kvec iov = {
481 .iov_base = buf,
482 .iov_len = size,
483 };
484 struct msghdr msg = {
b411b363
PR
485 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
486 };
f730c848 487 return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags);
b411b363
PR
488}
489
bde89a9e 490static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
b411b363 491{
b411b363
PR
492 int rv;
493
bde89a9e 494 rv = drbd_recv_short(connection->data.socket, buf, size, 0);
b411b363 495
dbd0820c
PR
496 if (rv < 0) {
497 if (rv == -ECONNRESET)
1ec861eb 498 drbd_info(connection, "sock was reset by peer\n");
dbd0820c 499 else if (rv != -ERESTARTSYS)
1ec861eb 500 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
dbd0820c 501 } else if (rv == 0) {
bde89a9e 502 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
b66623e3
PR
503 long t;
504 rcu_read_lock();
bde89a9e 505 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
b66623e3
PR
506 rcu_read_unlock();
507
bde89a9e 508 t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
b66623e3 509
599377ac
PR
510 if (t)
511 goto out;
512 }
1ec861eb 513 drbd_info(connection, "sock was shut down by peer\n");
599377ac
PR
514 }
515
b411b363 516 if (rv != size)
bde89a9e 517 conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
b411b363 518
599377ac 519out:
b411b363
PR
520 return rv;
521}
522
bde89a9e 523static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
c6967746
AG
524{
525 int err;
526
bde89a9e 527 err = drbd_recv(connection, buf, size);
c6967746
AG
528 if (err != size) {
529 if (err >= 0)
530 err = -EIO;
531 } else
532 err = 0;
533 return err;
534}
535
bde89a9e 536static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
a5c31904
AG
537{
538 int err;
539
bde89a9e 540 err = drbd_recv_all(connection, buf, size);
a5c31904 541 if (err && !signal_pending(current))
1ec861eb 542 drbd_warn(connection, "short read (expected size %d)\n", (int)size);
a5c31904
AG
543 return err;
544}
545
5dbf1673
LE
546/* quoting tcp(7):
547 * On individual connections, the socket buffer size must be set prior to the
548 * listen(2) or connect(2) calls in order to have it take effect.
549 * This is our wrapper to do so.
550 */
551static void drbd_setbufsize(struct socket *sock, unsigned int snd,
552 unsigned int rcv)
553{
554 /* open coded SO_SNDBUF, SO_RCVBUF */
555 if (snd) {
556 sock->sk->sk_sndbuf = snd;
557 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
558 }
559 if (rcv) {
560 sock->sk->sk_rcvbuf = rcv;
561 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
562 }
563}
564
bde89a9e 565static struct socket *drbd_try_connect(struct drbd_connection *connection)
b411b363
PR
566{
567 const char *what;
568 struct socket *sock;
569 struct sockaddr_in6 src_in6;
44ed167d
PR
570 struct sockaddr_in6 peer_in6;
571 struct net_conf *nc;
572 int err, peer_addr_len, my_addr_len;
69ef82de 573 int sndbuf_size, rcvbuf_size, connect_int;
b411b363
PR
574 int disconnect_on_error = 1;
575
44ed167d 576 rcu_read_lock();
bde89a9e 577 nc = rcu_dereference(connection->net_conf);
44ed167d
PR
578 if (!nc) {
579 rcu_read_unlock();
b411b363 580 return NULL;
44ed167d 581 }
44ed167d
PR
582 sndbuf_size = nc->sndbuf_size;
583 rcvbuf_size = nc->rcvbuf_size;
69ef82de 584 connect_int = nc->connect_int;
089c075d 585 rcu_read_unlock();
44ed167d 586
bde89a9e
AG
587 my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
588 memcpy(&src_in6, &connection->my_addr, my_addr_len);
44ed167d 589
bde89a9e 590 if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
44ed167d
PR
591 src_in6.sin6_port = 0;
592 else
593 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
594
bde89a9e
AG
595 peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
596 memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
b411b363
PR
597
598 what = "sock_create_kern";
44ed167d
PR
599 err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
600 SOCK_STREAM, IPPROTO_TCP, &sock);
b411b363
PR
601 if (err < 0) {
602 sock = NULL;
603 goto out;
604 }
605
606 sock->sk->sk_rcvtimeo =
69ef82de 607 sock->sk->sk_sndtimeo = connect_int * HZ;
44ed167d 608 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
b411b363
PR
609
610 /* explicitly bind to the configured IP as source IP
611 * for the outgoing connections.
612 * This is needed for multihomed hosts and to be
613 * able to use lo: interfaces for drbd.
614 * Make sure to use 0 as port number, so linux selects
615 * a free one dynamically.
616 */
b411b363 617 what = "bind before connect";
44ed167d 618 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
b411b363
PR
619 if (err < 0)
620 goto out;
621
622 /* connect may fail, peer not yet available.
623 * stay C_WF_CONNECTION, don't go Disconnecting! */
624 disconnect_on_error = 0;
625 what = "connect";
44ed167d 626 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
b411b363
PR
627
628out:
629 if (err < 0) {
630 if (sock) {
631 sock_release(sock);
632 sock = NULL;
633 }
634 switch (-err) {
635 /* timeout, busy, signal pending */
636 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
637 case EINTR: case ERESTARTSYS:
638 /* peer not (yet) available, network problem */
639 case ECONNREFUSED: case ENETUNREACH:
640 case EHOSTDOWN: case EHOSTUNREACH:
641 disconnect_on_error = 0;
642 break;
643 default:
1ec861eb 644 drbd_err(connection, "%s failed, err = %d\n", what, err);
b411b363
PR
645 }
646 if (disconnect_on_error)
bde89a9e 647 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363 648 }
44ed167d 649
b411b363
PR
650 return sock;
651}
652
7a426fd8 653struct accept_wait_data {
bde89a9e 654 struct drbd_connection *connection;
7a426fd8
PR
655 struct socket *s_listen;
656 struct completion door_bell;
657 void (*original_sk_state_change)(struct sock *sk);
658
659};
660
715306f6 661static void drbd_incoming_connection(struct sock *sk)
7a426fd8
PR
662{
663 struct accept_wait_data *ad = sk->sk_user_data;
715306f6 664 void (*state_change)(struct sock *sk);
7a426fd8 665
715306f6
AG
666 state_change = ad->original_sk_state_change;
667 if (sk->sk_state == TCP_ESTABLISHED)
668 complete(&ad->door_bell);
669 state_change(sk);
7a426fd8
PR
670}
671
bde89a9e 672static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
b411b363 673{
1f3e509b 674 int err, sndbuf_size, rcvbuf_size, my_addr_len;
44ed167d 675 struct sockaddr_in6 my_addr;
1f3e509b 676 struct socket *s_listen;
44ed167d 677 struct net_conf *nc;
b411b363
PR
678 const char *what;
679
44ed167d 680 rcu_read_lock();
bde89a9e 681 nc = rcu_dereference(connection->net_conf);
44ed167d
PR
682 if (!nc) {
683 rcu_read_unlock();
7a426fd8 684 return -EIO;
44ed167d 685 }
44ed167d
PR
686 sndbuf_size = nc->sndbuf_size;
687 rcvbuf_size = nc->rcvbuf_size;
44ed167d 688 rcu_read_unlock();
b411b363 689
bde89a9e
AG
690 my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
691 memcpy(&my_addr, &connection->my_addr, my_addr_len);
b411b363
PR
692
693 what = "sock_create_kern";
44ed167d 694 err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
1f3e509b 695 SOCK_STREAM, IPPROTO_TCP, &s_listen);
b411b363
PR
696 if (err) {
697 s_listen = NULL;
698 goto out;
699 }
700
98683650 701 s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
44ed167d 702 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
b411b363
PR
703
704 what = "bind before listen";
44ed167d 705 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
b411b363
PR
706 if (err < 0)
707 goto out;
708
7a426fd8
PR
709 ad->s_listen = s_listen;
710 write_lock_bh(&s_listen->sk->sk_callback_lock);
711 ad->original_sk_state_change = s_listen->sk->sk_state_change;
715306f6 712 s_listen->sk->sk_state_change = drbd_incoming_connection;
7a426fd8
PR
713 s_listen->sk->sk_user_data = ad;
714 write_unlock_bh(&s_listen->sk->sk_callback_lock);
b411b363 715
2820fd39
PR
716 what = "listen";
717 err = s_listen->ops->listen(s_listen, 5);
718 if (err < 0)
719 goto out;
720
7a426fd8 721 return 0;
b411b363
PR
722out:
723 if (s_listen)
724 sock_release(s_listen);
725 if (err < 0) {
726 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
1ec861eb 727 drbd_err(connection, "%s failed, err = %d\n", what, err);
bde89a9e 728 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363
PR
729 }
730 }
b411b363 731
7a426fd8 732 return -EIO;
b411b363
PR
733}
734
715306f6 735static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
b411b363 736{
715306f6
AG
737 write_lock_bh(&sk->sk_callback_lock);
738 sk->sk_state_change = ad->original_sk_state_change;
739 sk->sk_user_data = NULL;
740 write_unlock_bh(&sk->sk_callback_lock);
b411b363
PR
741}
742
bde89a9e 743static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
b411b363 744{
1f3e509b
PR
745 int timeo, connect_int, err = 0;
746 struct socket *s_estab = NULL;
1f3e509b
PR
747 struct net_conf *nc;
748
749 rcu_read_lock();
bde89a9e 750 nc = rcu_dereference(connection->net_conf);
1f3e509b
PR
751 if (!nc) {
752 rcu_read_unlock();
753 return NULL;
754 }
755 connect_int = nc->connect_int;
756 rcu_read_unlock();
757
758 timeo = connect_int * HZ;
38b682b2
AM
759 /* 28.5% random jitter */
760 timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
1f3e509b 761
7a426fd8
PR
762 err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
763 if (err <= 0)
764 return NULL;
b411b363 765
7a426fd8 766 err = kernel_accept(ad->s_listen, &s_estab, 0);
b411b363
PR
767 if (err < 0) {
768 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
1ec861eb 769 drbd_err(connection, "accept failed, err = %d\n", err);
bde89a9e 770 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363
PR
771 }
772 }
b411b363 773
715306f6
AG
774 if (s_estab)
775 unregister_state_change(s_estab->sk, ad);
b411b363 776
b411b363
PR
777 return s_estab;
778}
b411b363 779
bde89a9e 780static int decode_header(struct drbd_connection *, void *, struct packet_info *);
b411b363 781
bde89a9e 782static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
9f5bdc33
AG
783 enum drbd_packet cmd)
784{
bde89a9e 785 if (!conn_prepare_command(connection, sock))
9f5bdc33 786 return -EIO;
bde89a9e 787 return conn_send_command(connection, sock, cmd, 0, NULL, 0);
b411b363
PR
788}
789
bde89a9e 790static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
b411b363 791{
bde89a9e 792 unsigned int header_size = drbd_header_size(connection);
9f5bdc33
AG
793 struct packet_info pi;
794 int err;
b411b363 795
bde89a9e 796 err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
9f5bdc33
AG
797 if (err != header_size) {
798 if (err >= 0)
799 err = -EIO;
800 return err;
801 }
bde89a9e 802 err = decode_header(connection, connection->data.rbuf, &pi);
9f5bdc33
AG
803 if (err)
804 return err;
805 return pi.cmd;
b411b363
PR
806}
807
808/**
809 * drbd_socket_okay() - Free the socket if its connection is not okay
b411b363
PR
810 * @sock: pointer to the pointer to the socket.
811 */
dbd9eea0 812static int drbd_socket_okay(struct socket **sock)
b411b363
PR
813{
814 int rr;
815 char tb[4];
816
817 if (!*sock)
81e84650 818 return false;
b411b363 819
dbd9eea0 820 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
b411b363
PR
821
822 if (rr > 0 || rr == -EAGAIN) {
81e84650 823 return true;
b411b363
PR
824 } else {
825 sock_release(*sock);
826 *sock = NULL;
81e84650 827 return false;
b411b363
PR
828 }
829}
2325eb66
PR
830/* Gets called if a connection is established, or if a new minor gets created
831 in a connection */
69a22773 832int drbd_connected(struct drbd_peer_device *peer_device)
907599e0 833{
69a22773 834 struct drbd_device *device = peer_device->device;
0829f5ed 835 int err;
907599e0 836
b30ab791
AG
837 atomic_set(&device->packet_seq, 0);
838 device->peer_seq = 0;
907599e0 839
69a22773
AG
840 device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
841 &peer_device->connection->cstate_mutex :
b30ab791 842 &device->own_state_mutex;
8410da8f 843
69a22773 844 err = drbd_send_sync_param(peer_device);
0829f5ed 845 if (!err)
69a22773 846 err = drbd_send_sizes(peer_device, 0, 0);
0829f5ed 847 if (!err)
69a22773 848 err = drbd_send_uuids(peer_device);
0829f5ed 849 if (!err)
69a22773 850 err = drbd_send_current_state(peer_device);
b30ab791
AG
851 clear_bit(USE_DEGR_WFC_T, &device->flags);
852 clear_bit(RESIZE_PENDING, &device->flags);
853 atomic_set(&device->ap_in_flight, 0);
854 mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
0829f5ed 855 return err;
907599e0 856}
b411b363
PR
857
858/*
859 * return values:
860 * 1 yes, we have a valid connection
861 * 0 oops, did not work out, please try again
862 * -1 peer talks different language,
863 * no point in trying again, please go standalone.
864 * -2 We do not have a network config...
865 */
bde89a9e 866static int conn_connect(struct drbd_connection *connection)
b411b363 867{
7da35862 868 struct drbd_socket sock, msock;
c06ece6b 869 struct drbd_peer_device *peer_device;
44ed167d 870 struct net_conf *nc;
92f14951 871 int vnr, timeout, h, ok;
08b165ba 872 bool discard_my_data;
197296ff 873 enum drbd_state_rv rv;
7a426fd8 874 struct accept_wait_data ad = {
bde89a9e 875 .connection = connection,
7a426fd8
PR
876 .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
877 };
b411b363 878
bde89a9e
AG
879 clear_bit(DISCONNECT_SENT, &connection->flags);
880 if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
b411b363
PR
881 return -2;
882
7da35862 883 mutex_init(&sock.mutex);
bde89a9e
AG
884 sock.sbuf = connection->data.sbuf;
885 sock.rbuf = connection->data.rbuf;
7da35862
PR
886 sock.socket = NULL;
887 mutex_init(&msock.mutex);
bde89a9e
AG
888 msock.sbuf = connection->meta.sbuf;
889 msock.rbuf = connection->meta.rbuf;
7da35862
PR
890 msock.socket = NULL;
891
0916e0e3 892 /* Assume that the peer only understands protocol 80 until we know better. */
bde89a9e 893 connection->agreed_pro_version = 80;
b411b363 894
bde89a9e 895 if (prepare_listen_socket(connection, &ad))
7a426fd8 896 return 0;
b411b363
PR
897
898 do {
2bf89621 899 struct socket *s;
b411b363 900
bde89a9e 901 s = drbd_try_connect(connection);
b411b363 902 if (s) {
7da35862
PR
903 if (!sock.socket) {
904 sock.socket = s;
bde89a9e 905 send_first_packet(connection, &sock, P_INITIAL_DATA);
7da35862 906 } else if (!msock.socket) {
bde89a9e 907 clear_bit(RESOLVE_CONFLICTS, &connection->flags);
7da35862 908 msock.socket = s;
bde89a9e 909 send_first_packet(connection, &msock, P_INITIAL_META);
b411b363 910 } else {
1ec861eb 911 drbd_err(connection, "Logic error in conn_connect()\n");
b411b363
PR
912 goto out_release_sockets;
913 }
914 }
915
7da35862
PR
916 if (sock.socket && msock.socket) {
917 rcu_read_lock();
bde89a9e 918 nc = rcu_dereference(connection->net_conf);
7da35862
PR
919 timeout = nc->ping_timeo * HZ / 10;
920 rcu_read_unlock();
921 schedule_timeout_interruptible(timeout);
922 ok = drbd_socket_okay(&sock.socket);
923 ok = drbd_socket_okay(&msock.socket) && ok;
b411b363
PR
924 if (ok)
925 break;
926 }
927
928retry:
bde89a9e 929 s = drbd_wait_for_connect(connection, &ad);
b411b363 930 if (s) {
bde89a9e 931 int fp = receive_first_packet(connection, s);
7da35862
PR
932 drbd_socket_okay(&sock.socket);
933 drbd_socket_okay(&msock.socket);
92f14951 934 switch (fp) {
e5d6f33a 935 case P_INITIAL_DATA:
7da35862 936 if (sock.socket) {
1ec861eb 937 drbd_warn(connection, "initial packet S crossed\n");
7da35862 938 sock_release(sock.socket);
80c6eed4
PR
939 sock.socket = s;
940 goto randomize;
b411b363 941 }
7da35862 942 sock.socket = s;
b411b363 943 break;
e5d6f33a 944 case P_INITIAL_META:
bde89a9e 945 set_bit(RESOLVE_CONFLICTS, &connection->flags);
7da35862 946 if (msock.socket) {
1ec861eb 947 drbd_warn(connection, "initial packet M crossed\n");
7da35862 948 sock_release(msock.socket);
80c6eed4
PR
949 msock.socket = s;
950 goto randomize;
b411b363 951 }
7da35862 952 msock.socket = s;
b411b363
PR
953 break;
954 default:
1ec861eb 955 drbd_warn(connection, "Error receiving initial packet\n");
b411b363 956 sock_release(s);
80c6eed4 957randomize:
38b682b2 958 if (prandom_u32() & 1)
b411b363
PR
959 goto retry;
960 }
961 }
962
bde89a9e 963 if (connection->cstate <= C_DISCONNECTING)
b411b363
PR
964 goto out_release_sockets;
965 if (signal_pending(current)) {
966 flush_signals(current);
967 smp_rmb();
bde89a9e 968 if (get_t_state(&connection->receiver) == EXITING)
b411b363
PR
969 goto out_release_sockets;
970 }
971
b666dbf8
PR
972 ok = drbd_socket_okay(&sock.socket);
973 ok = drbd_socket_okay(&msock.socket) && ok;
974 } while (!ok);
b411b363 975
7a426fd8
PR
976 if (ad.s_listen)
977 sock_release(ad.s_listen);
b411b363 978
98683650
PR
979 sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
980 msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
b411b363 981
7da35862
PR
982 sock.socket->sk->sk_allocation = GFP_NOIO;
983 msock.socket->sk->sk_allocation = GFP_NOIO;
b411b363 984
7da35862
PR
985 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
986 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
b411b363 987
b411b363 988 /* NOT YET ...
bde89a9e 989 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
7da35862 990 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
6038178e 991 * first set it to the P_CONNECTION_FEATURES timeout,
b411b363 992 * which we set to 4x the configured ping_timeout. */
44ed167d 993 rcu_read_lock();
bde89a9e 994 nc = rcu_dereference(connection->net_conf);
44ed167d 995
7da35862
PR
996 sock.socket->sk->sk_sndtimeo =
997 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
b411b363 998
7da35862 999 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
44ed167d 1000 timeout = nc->timeout * HZ / 10;
08b165ba 1001 discard_my_data = nc->discard_my_data;
44ed167d 1002 rcu_read_unlock();
b411b363 1003
7da35862 1004 msock.socket->sk->sk_sndtimeo = timeout;
b411b363
PR
1005
1006 /* we don't want delays.
25985edc 1007 * we use TCP_CORK where appropriate, though */
7da35862
PR
1008 drbd_tcp_nodelay(sock.socket);
1009 drbd_tcp_nodelay(msock.socket);
b411b363 1010
bde89a9e
AG
1011 connection->data.socket = sock.socket;
1012 connection->meta.socket = msock.socket;
1013 connection->last_received = jiffies;
b411b363 1014
bde89a9e 1015 h = drbd_do_features(connection);
b411b363
PR
1016 if (h <= 0)
1017 return h;
1018
bde89a9e 1019 if (connection->cram_hmac_tfm) {
b30ab791 1020 /* drbd_request_state(device, NS(conn, WFAuth)); */
bde89a9e 1021 switch (drbd_do_auth(connection)) {
b10d96cb 1022 case -1:
1ec861eb 1023 drbd_err(connection, "Authentication of peer failed\n");
b411b363 1024 return -1;
b10d96cb 1025 case 0:
1ec861eb 1026 drbd_err(connection, "Authentication of peer failed, trying again.\n");
b10d96cb 1027 return 0;
b411b363
PR
1028 }
1029 }
1030
bde89a9e
AG
1031 connection->data.socket->sk->sk_sndtimeo = timeout;
1032 connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
b411b363 1033
bde89a9e 1034 if (drbd_send_protocol(connection) == -EOPNOTSUPP)
7e2455c1 1035 return -1;
b411b363 1036
31007745
PR
1037 /* Prevent a race between resync-handshake and
1038 * being promoted to Primary.
1039 *
1040 * Grab and release the state mutex, so we know that any current
1041 * drbd_set_role() is finished, and any incoming drbd_set_role
1042 * will see the STATE_SENT flag, and wait for it to be cleared.
1043 */
1044 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1045 mutex_lock(peer_device->device->state_mutex);
1046
bde89a9e 1047 set_bit(STATE_SENT, &connection->flags);
a1096a6e 1048
31007745
PR
1049 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1050 mutex_unlock(peer_device->device->state_mutex);
1051
c141ebda 1052 rcu_read_lock();
c06ece6b
AG
1053 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1054 struct drbd_device *device = peer_device->device;
b30ab791 1055 kref_get(&device->kref);
26ea8f92
AG
1056 rcu_read_unlock();
1057
08b165ba 1058 if (discard_my_data)
b30ab791 1059 set_bit(DISCARD_MY_DATA, &device->flags);
08b165ba 1060 else
b30ab791 1061 clear_bit(DISCARD_MY_DATA, &device->flags);
08b165ba 1062
69a22773 1063 drbd_connected(peer_device);
05a10ec7 1064 kref_put(&device->kref, drbd_destroy_device);
c141ebda
PR
1065 rcu_read_lock();
1066 }
1067 rcu_read_unlock();
1068
bde89a9e
AG
1069 rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1070 if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1071 clear_bit(STATE_SENT, &connection->flags);
1e86ac48 1072 return 0;
a1096a6e 1073 }
1e86ac48 1074
bde89a9e 1075 drbd_thread_start(&connection->asender);
b411b363 1076
0500813f 1077 mutex_lock(&connection->resource->conf_update);
08b165ba
PR
1078 /* The discard_my_data flag is a single-shot modifier to the next
1079 * connection attempt, the handshake of which is now well underway.
1080 * No need for rcu style copying of the whole struct
1081 * just to clear a single value. */
bde89a9e 1082 connection->net_conf->discard_my_data = 0;
0500813f 1083 mutex_unlock(&connection->resource->conf_update);
08b165ba 1084
d3fcb490 1085 return h;
b411b363
PR
1086
1087out_release_sockets:
7a426fd8
PR
1088 if (ad.s_listen)
1089 sock_release(ad.s_listen);
7da35862
PR
1090 if (sock.socket)
1091 sock_release(sock.socket);
1092 if (msock.socket)
1093 sock_release(msock.socket);
b411b363
PR
1094 return -1;
1095}
1096
bde89a9e 1097static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
b411b363 1098{
bde89a9e 1099 unsigned int header_size = drbd_header_size(connection);
e658983a 1100
0c8e36d9
AG
1101 if (header_size == sizeof(struct p_header100) &&
1102 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1103 struct p_header100 *h = header;
1104 if (h->pad != 0) {
1ec861eb 1105 drbd_err(connection, "Header padding is not zero\n");
0c8e36d9
AG
1106 return -EINVAL;
1107 }
1108 pi->vnr = be16_to_cpu(h->volume);
1109 pi->cmd = be16_to_cpu(h->command);
1110 pi->size = be32_to_cpu(h->length);
1111 } else if (header_size == sizeof(struct p_header95) &&
1112 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
e658983a 1113 struct p_header95 *h = header;
e658983a 1114 pi->cmd = be16_to_cpu(h->command);
b55d84ba
AG
1115 pi->size = be32_to_cpu(h->length);
1116 pi->vnr = 0;
e658983a
AG
1117 } else if (header_size == sizeof(struct p_header80) &&
1118 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1119 struct p_header80 *h = header;
1120 pi->cmd = be16_to_cpu(h->command);
1121 pi->size = be16_to_cpu(h->length);
77351055 1122 pi->vnr = 0;
02918be2 1123 } else {
1ec861eb 1124 drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
e658983a 1125 be32_to_cpu(*(__be32 *)header),
bde89a9e 1126 connection->agreed_pro_version);
8172f3e9 1127 return -EINVAL;
b411b363 1128 }
e658983a 1129 pi->data = header + header_size;
8172f3e9 1130 return 0;
257d0af6 1131}
b411b363 1132
bde89a9e 1133static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
257d0af6 1134{
bde89a9e 1135 void *buffer = connection->data.rbuf;
69bc7bc3 1136 int err;
257d0af6 1137
bde89a9e 1138 err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
a5c31904 1139 if (err)
69bc7bc3 1140 return err;
257d0af6 1141
bde89a9e
AG
1142 err = decode_header(connection, buffer, pi);
1143 connection->last_received = jiffies;
b411b363 1144
69bc7bc3 1145 return err;
b411b363
PR
1146}
1147
bde89a9e 1148static void drbd_flush(struct drbd_connection *connection)
b411b363
PR
1149{
1150 int rv;
c06ece6b 1151 struct drbd_peer_device *peer_device;
4b0007c0
PR
1152 int vnr;
1153
e9526580 1154 if (connection->resource->write_ordering >= WO_bdev_flush) {
615e087f 1155 rcu_read_lock();
c06ece6b
AG
1156 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1157 struct drbd_device *device = peer_device->device;
1158
b30ab791 1159 if (!get_ldev(device))
615e087f 1160 continue;
b30ab791 1161 kref_get(&device->kref);
615e087f
LE
1162 rcu_read_unlock();
1163
b30ab791 1164 rv = blkdev_issue_flush(device->ldev->backing_bdev,
615e087f
LE
1165 GFP_NOIO, NULL);
1166 if (rv) {
d0180171 1167 drbd_info(device, "local disk flush failed with status %d\n", rv);
615e087f
LE
1168 /* would rather check on EOPNOTSUPP, but that is not reliable.
1169 * don't try again for ANY return value != 0
1170 * if (rv == -EOPNOTSUPP) */
8fe39aac 1171 drbd_bump_write_ordering(connection->resource, NULL, WO_drain_io);
4b0007c0 1172 }
b30ab791 1173 put_ldev(device);
05a10ec7 1174 kref_put(&device->kref, drbd_destroy_device);
b411b363 1175
615e087f
LE
1176 rcu_read_lock();
1177 if (rv)
1178 break;
b411b363 1179 }
615e087f 1180 rcu_read_unlock();
b411b363 1181 }
b411b363
PR
1182}
1183
1184/**
1185 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
b30ab791 1186 * @device: DRBD device.
b411b363
PR
1187 * @epoch: Epoch object.
1188 * @ev: Epoch event.
1189 */
bde89a9e 1190static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
b411b363
PR
1191 struct drbd_epoch *epoch,
1192 enum epoch_event ev)
1193{
2451fc3b 1194 int epoch_size;
b411b363 1195 struct drbd_epoch *next_epoch;
b411b363
PR
1196 enum finish_epoch rv = FE_STILL_LIVE;
1197
bde89a9e 1198 spin_lock(&connection->epoch_lock);
b411b363
PR
1199 do {
1200 next_epoch = NULL;
b411b363
PR
1201
1202 epoch_size = atomic_read(&epoch->epoch_size);
1203
1204 switch (ev & ~EV_CLEANUP) {
1205 case EV_PUT:
1206 atomic_dec(&epoch->active);
1207 break;
1208 case EV_GOT_BARRIER_NR:
1209 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
b411b363
PR
1210 break;
1211 case EV_BECAME_LAST:
1212 /* nothing to do*/
1213 break;
1214 }
1215
b411b363
PR
1216 if (epoch_size != 0 &&
1217 atomic_read(&epoch->active) == 0 &&
80f9fd55 1218 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
b411b363 1219 if (!(ev & EV_CLEANUP)) {
bde89a9e
AG
1220 spin_unlock(&connection->epoch_lock);
1221 drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1222 spin_lock(&connection->epoch_lock);
b411b363 1223 }
9ed57dcb
LE
1224#if 0
1225 /* FIXME: dec unacked on connection, once we have
1226 * something to count pending connection packets in. */
80f9fd55 1227 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
bde89a9e 1228 dec_unacked(epoch->connection);
9ed57dcb 1229#endif
b411b363 1230
bde89a9e 1231 if (connection->current_epoch != epoch) {
b411b363
PR
1232 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1233 list_del(&epoch->list);
1234 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
bde89a9e 1235 connection->epochs--;
b411b363
PR
1236 kfree(epoch);
1237
1238 if (rv == FE_STILL_LIVE)
1239 rv = FE_DESTROYED;
1240 } else {
1241 epoch->flags = 0;
1242 atomic_set(&epoch->epoch_size, 0);
698f9315 1243 /* atomic_set(&epoch->active, 0); is already zero */
b411b363
PR
1244 if (rv == FE_STILL_LIVE)
1245 rv = FE_RECYCLED;
1246 }
1247 }
1248
1249 if (!next_epoch)
1250 break;
1251
1252 epoch = next_epoch;
1253 } while (1);
1254
bde89a9e 1255 spin_unlock(&connection->epoch_lock);
b411b363 1256
b411b363
PR
1257 return rv;
1258}
1259
8fe39aac
PR
1260static enum write_ordering_e
1261max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
1262{
1263 struct disk_conf *dc;
1264
1265 dc = rcu_dereference(bdev->disk_conf);
1266
1267 if (wo == WO_bdev_flush && !dc->disk_flushes)
1268 wo = WO_drain_io;
1269 if (wo == WO_drain_io && !dc->disk_drain)
1270 wo = WO_none;
1271
1272 return wo;
1273}
1274
b411b363
PR
1275/**
1276 * drbd_bump_write_ordering() - Fall back to an other write ordering method
bde89a9e 1277 * @connection: DRBD connection.
b411b363
PR
1278 * @wo: Write ordering method to try.
1279 */
8fe39aac
PR
1280void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
1281 enum write_ordering_e wo)
b411b363 1282{
e9526580 1283 struct drbd_device *device;
b411b363 1284 enum write_ordering_e pwo;
4b0007c0 1285 int vnr;
b411b363
PR
1286 static char *write_ordering_str[] = {
1287 [WO_none] = "none",
1288 [WO_drain_io] = "drain",
1289 [WO_bdev_flush] = "flush",
b411b363
PR
1290 };
1291
e9526580 1292 pwo = resource->write_ordering;
b411b363 1293 wo = min(pwo, wo);
daeda1cc 1294 rcu_read_lock();
e9526580 1295 idr_for_each_entry(&resource->devices, device, vnr) {
8fe39aac
PR
1296 if (get_ldev(device)) {
1297 wo = max_allowed_wo(device->ldev, wo);
1298 if (device->ldev == bdev)
1299 bdev = NULL;
1300 put_ldev(device);
1301 }
4b0007c0 1302 }
daeda1cc 1303 rcu_read_unlock();
8fe39aac
PR
1304
1305 if (bdev)
1306 wo = max_allowed_wo(bdev, wo);
1307
e9526580
PR
1308 resource->write_ordering = wo;
1309 if (pwo != resource->write_ordering || wo == WO_bdev_flush)
1310 drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
b411b363
PR
1311}
1312
45bb912b 1313/**
fbe29dec 1314 * drbd_submit_peer_request()
b30ab791 1315 * @device: DRBD device.
db830c46 1316 * @peer_req: peer request
45bb912b 1317 * @rw: flag field, see bio->bi_rw
10f6d992
LE
1318 *
1319 * May spread the pages to multiple bios,
1320 * depending on bio_add_page restrictions.
1321 *
1322 * Returns 0 if all bios have been submitted,
1323 * -ENOMEM if we could not allocate enough bios,
1324 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1325 * single page to an empty bio (which should never happen and likely indicates
1326 * that the lower level IO stack is in some way broken). This has been observed
1327 * on certain Xen deployments.
45bb912b
LE
1328 */
1329/* TODO allocate from our own bio_set. */
b30ab791 1330int drbd_submit_peer_request(struct drbd_device *device,
fbe29dec
AG
1331 struct drbd_peer_request *peer_req,
1332 const unsigned rw, const int fault_type)
45bb912b
LE
1333{
1334 struct bio *bios = NULL;
1335 struct bio *bio;
db830c46
AG
1336 struct page *page = peer_req->pages;
1337 sector_t sector = peer_req->i.sector;
1338 unsigned ds = peer_req->i.size;
45bb912b
LE
1339 unsigned n_bios = 0;
1340 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
10f6d992 1341 int err = -ENOMEM;
45bb912b 1342
a0fb3c47
LE
1343 if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) {
1344 /* wait for all pending IO completions, before we start
1345 * zeroing things out. */
1346 conn_wait_active_ee_empty(first_peer_device(device)->connection);
1347 if (blkdev_issue_zeroout(device->ldev->backing_bdev,
1348 sector, ds >> 9, GFP_NOIO))
1349 peer_req->flags |= EE_WAS_ERROR;
1350 drbd_endio_write_sec_final(peer_req);
1351 return 0;
1352 }
1353
54ed4ed8
LE
1354 /* Discards don't have any payload.
1355 * But the scsi layer still expects a bio_vec it can use internally,
1356 * see sd_setup_discard_cmnd() and blk_add_request_payload(). */
a0fb3c47 1357 if (peer_req->flags & EE_IS_TRIM)
54ed4ed8 1358 nr_pages = 1;
a0fb3c47 1359
45bb912b
LE
1360 /* In most cases, we will only need one bio. But in case the lower
1361 * level restrictions happen to be different at this offset on this
1362 * side than those of the sending peer, we may need to submit the
9476f39d
LE
1363 * request in more than one bio.
1364 *
1365 * Plain bio_alloc is good enough here, this is no DRBD internally
1366 * generated bio, but a bio allocated on behalf of the peer.
1367 */
45bb912b
LE
1368next_bio:
1369 bio = bio_alloc(GFP_NOIO, nr_pages);
1370 if (!bio) {
a0fb3c47 1371 drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
45bb912b
LE
1372 goto fail;
1373 }
db830c46 1374 /* > peer_req->i.sector, unless this is the first bio */
4f024f37 1375 bio->bi_iter.bi_sector = sector;
b30ab791 1376 bio->bi_bdev = device->ldev->backing_bdev;
45bb912b 1377 bio->bi_rw = rw;
db830c46 1378 bio->bi_private = peer_req;
fcefa62e 1379 bio->bi_end_io = drbd_peer_request_endio;
45bb912b
LE
1380
1381 bio->bi_next = bios;
1382 bios = bio;
1383 ++n_bios;
1384
a0fb3c47
LE
1385 if (rw & REQ_DISCARD) {
1386 bio->bi_iter.bi_size = ds;
1387 goto submit;
1388 }
1389
45bb912b
LE
1390 page_chain_for_each(page) {
1391 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1392 if (!bio_add_page(bio, page, len, 0)) {
10f6d992
LE
1393 /* A single page must always be possible!
1394 * But in case it fails anyways,
1395 * we deal with it, and complain (below). */
1396 if (bio->bi_vcnt == 0) {
d0180171 1397 drbd_err(device,
10f6d992
LE
1398 "bio_add_page failed for len=%u, "
1399 "bi_vcnt=0 (bi_sector=%llu)\n",
4f024f37 1400 len, (uint64_t)bio->bi_iter.bi_sector);
10f6d992
LE
1401 err = -ENOSPC;
1402 goto fail;
1403 }
45bb912b
LE
1404 goto next_bio;
1405 }
1406 ds -= len;
1407 sector += len >> 9;
1408 --nr_pages;
1409 }
0b0ba1ef 1410 D_ASSERT(device, ds == 0);
a0fb3c47
LE
1411submit:
1412 D_ASSERT(device, page == NULL);
45bb912b 1413
db830c46 1414 atomic_set(&peer_req->pending_bios, n_bios);
45bb912b
LE
1415 do {
1416 bio = bios;
1417 bios = bios->bi_next;
1418 bio->bi_next = NULL;
1419
b30ab791 1420 drbd_generic_make_request(device, fault_type, bio);
45bb912b 1421 } while (bios);
45bb912b
LE
1422 return 0;
1423
1424fail:
1425 while (bios) {
1426 bio = bios;
1427 bios = bios->bi_next;
1428 bio_put(bio);
1429 }
10f6d992 1430 return err;
45bb912b
LE
1431}
1432
b30ab791 1433static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
db830c46 1434 struct drbd_peer_request *peer_req)
53840641 1435{
db830c46 1436 struct drbd_interval *i = &peer_req->i;
53840641 1437
b30ab791 1438 drbd_remove_interval(&device->write_requests, i);
53840641
AG
1439 drbd_clear_interval(i);
1440
6c852bec 1441 /* Wake up any processes waiting for this peer request to complete. */
53840641 1442 if (i->waiting)
b30ab791 1443 wake_up(&device->misc_wait);
53840641
AG
1444}
1445
bde89a9e 1446static void conn_wait_active_ee_empty(struct drbd_connection *connection)
77fede51 1447{
c06ece6b 1448 struct drbd_peer_device *peer_device;
77fede51
PR
1449 int vnr;
1450
1451 rcu_read_lock();
c06ece6b
AG
1452 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1453 struct drbd_device *device = peer_device->device;
1454
b30ab791 1455 kref_get(&device->kref);
77fede51 1456 rcu_read_unlock();
b30ab791 1457 drbd_wait_ee_list_empty(device, &device->active_ee);
05a10ec7 1458 kref_put(&device->kref, drbd_destroy_device);
77fede51
PR
1459 rcu_read_lock();
1460 }
1461 rcu_read_unlock();
1462}
1463
9f4fe9ad
AG
1464static struct drbd_peer_device *
1465conn_peer_device(struct drbd_connection *connection, int volume_number)
1466{
1467 return idr_find(&connection->peer_devices, volume_number);
1468}
1469
bde89a9e 1470static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
b411b363 1471{
2451fc3b 1472 int rv;
e658983a 1473 struct p_barrier *p = pi->data;
b411b363
PR
1474 struct drbd_epoch *epoch;
1475
9ed57dcb
LE
1476 /* FIXME these are unacked on connection,
1477 * not a specific (peer)device.
1478 */
bde89a9e
AG
1479 connection->current_epoch->barrier_nr = p->barrier;
1480 connection->current_epoch->connection = connection;
1481 rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
b411b363
PR
1482
1483 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1484 * the activity log, which means it would not be resynced in case the
1485 * R_PRIMARY crashes now.
1486 * Therefore we must send the barrier_ack after the barrier request was
1487 * completed. */
e9526580 1488 switch (connection->resource->write_ordering) {
b411b363
PR
1489 case WO_none:
1490 if (rv == FE_RECYCLED)
82bc0194 1491 return 0;
2451fc3b
PR
1492
1493 /* receiver context, in the writeout path of the other node.
1494 * avoid potential distributed deadlock */
1495 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1496 if (epoch)
1497 break;
1498 else
1ec861eb 1499 drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
2451fc3b 1500 /* Fall through */
b411b363
PR
1501
1502 case WO_bdev_flush:
1503 case WO_drain_io:
bde89a9e
AG
1504 conn_wait_active_ee_empty(connection);
1505 drbd_flush(connection);
2451fc3b 1506
bde89a9e 1507 if (atomic_read(&connection->current_epoch->epoch_size)) {
2451fc3b
PR
1508 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1509 if (epoch)
1510 break;
b411b363
PR
1511 }
1512
82bc0194 1513 return 0;
2451fc3b 1514 default:
e9526580
PR
1515 drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
1516 connection->resource->write_ordering);
82bc0194 1517 return -EIO;
b411b363
PR
1518 }
1519
1520 epoch->flags = 0;
1521 atomic_set(&epoch->epoch_size, 0);
1522 atomic_set(&epoch->active, 0);
1523
bde89a9e
AG
1524 spin_lock(&connection->epoch_lock);
1525 if (atomic_read(&connection->current_epoch->epoch_size)) {
1526 list_add(&epoch->list, &connection->current_epoch->list);
1527 connection->current_epoch = epoch;
1528 connection->epochs++;
b411b363
PR
1529 } else {
1530 /* The current_epoch got recycled while we allocated this one... */
1531 kfree(epoch);
1532 }
bde89a9e 1533 spin_unlock(&connection->epoch_lock);
b411b363 1534
82bc0194 1535 return 0;
b411b363
PR
1536}
1537
1538/* used from receive_RSDataReply (recv_resync_read)
1539 * and from receive_Data */
f6ffca9f 1540static struct drbd_peer_request *
69a22773 1541read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
a0fb3c47 1542 struct packet_info *pi) __must_hold(local)
b411b363 1543{
69a22773 1544 struct drbd_device *device = peer_device->device;
b30ab791 1545 const sector_t capacity = drbd_get_capacity(device->this_bdev);
db830c46 1546 struct drbd_peer_request *peer_req;
b411b363 1547 struct page *page;
a5c31904 1548 int dgs, ds, err;
a0fb3c47 1549 int data_size = pi->size;
69a22773
AG
1550 void *dig_in = peer_device->connection->int_dig_in;
1551 void *dig_vv = peer_device->connection->int_dig_vv;
6b4388ac 1552 unsigned long *data;
a0fb3c47 1553 struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
b411b363 1554
88104ca4 1555 dgs = 0;
a0fb3c47 1556 if (!trim && peer_device->connection->peer_integrity_tfm) {
69a22773 1557 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
9f5bdc33
AG
1558 /*
1559 * FIXME: Receive the incoming digest into the receive buffer
1560 * here, together with its struct p_data?
1561 */
69a22773 1562 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
a5c31904 1563 if (err)
b411b363 1564 return NULL;
88104ca4 1565 data_size -= dgs;
b411b363
PR
1566 }
1567
a0fb3c47
LE
1568 if (trim) {
1569 D_ASSERT(peer_device, data_size == 0);
1570 data_size = be32_to_cpu(trim->size);
1571 }
1572
841ce241
AG
1573 if (!expect(IS_ALIGNED(data_size, 512)))
1574 return NULL;
a0fb3c47
LE
1575 /* prepare for larger trim requests. */
1576 if (!trim && !expect(data_size <= DRBD_MAX_BIO_SIZE))
841ce241 1577 return NULL;
b411b363 1578
6666032a
LE
1579 /* even though we trust out peer,
1580 * we sometimes have to double check. */
1581 if (sector + (data_size>>9) > capacity) {
d0180171 1582 drbd_err(device, "request from peer beyond end of local disk: "
fdda6544 1583 "capacity: %llus < sector: %llus + size: %u\n",
6666032a
LE
1584 (unsigned long long)capacity,
1585 (unsigned long long)sector, data_size);
1586 return NULL;
1587 }
1588
b411b363
PR
1589 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1590 * "criss-cross" setup, that might cause write-out on some other DRBD,
1591 * which in turn might block on the other node at this very place. */
a0fb3c47 1592 peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, trim == NULL, GFP_NOIO);
db830c46 1593 if (!peer_req)
b411b363 1594 return NULL;
45bb912b 1595
a0fb3c47 1596 if (trim)
81a3537a 1597 return peer_req;
a73ff323 1598
b411b363 1599 ds = data_size;
db830c46 1600 page = peer_req->pages;
45bb912b
LE
1601 page_chain_for_each(page) {
1602 unsigned len = min_t(int, ds, PAGE_SIZE);
6b4388ac 1603 data = kmap(page);
69a22773 1604 err = drbd_recv_all_warn(peer_device->connection, data, len);
b30ab791 1605 if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
d0180171 1606 drbd_err(device, "Fault injection: Corrupting data on receive\n");
6b4388ac
PR
1607 data[0] = data[0] ^ (unsigned long)-1;
1608 }
b411b363 1609 kunmap(page);
a5c31904 1610 if (err) {
b30ab791 1611 drbd_free_peer_req(device, peer_req);
b411b363
PR
1612 return NULL;
1613 }
a5c31904 1614 ds -= len;
b411b363
PR
1615 }
1616
1617 if (dgs) {
69a22773 1618 drbd_csum_ee(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv);
b411b363 1619 if (memcmp(dig_in, dig_vv, dgs)) {
d0180171 1620 drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
470be44a 1621 (unsigned long long)sector, data_size);
b30ab791 1622 drbd_free_peer_req(device, peer_req);
b411b363
PR
1623 return NULL;
1624 }
1625 }
b30ab791 1626 device->recv_cnt += data_size>>9;
db830c46 1627 return peer_req;
b411b363
PR
1628}
1629
1630/* drbd_drain_block() just takes a data block
1631 * out of the socket input buffer, and discards it.
1632 */
69a22773 1633static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
b411b363
PR
1634{
1635 struct page *page;
a5c31904 1636 int err = 0;
b411b363
PR
1637 void *data;
1638
c3470cde 1639 if (!data_size)
fc5be839 1640 return 0;
c3470cde 1641
69a22773 1642 page = drbd_alloc_pages(peer_device, 1, 1);
b411b363
PR
1643
1644 data = kmap(page);
1645 while (data_size) {
fc5be839
AG
1646 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1647
69a22773 1648 err = drbd_recv_all_warn(peer_device->connection, data, len);
a5c31904 1649 if (err)
b411b363 1650 break;
a5c31904 1651 data_size -= len;
b411b363
PR
1652 }
1653 kunmap(page);
69a22773 1654 drbd_free_pages(peer_device->device, page, 0);
fc5be839 1655 return err;
b411b363
PR
1656}
1657
69a22773 1658static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
b411b363
PR
1659 sector_t sector, int data_size)
1660{
7988613b
KO
1661 struct bio_vec bvec;
1662 struct bvec_iter iter;
b411b363 1663 struct bio *bio;
7988613b 1664 int dgs, err, expect;
69a22773
AG
1665 void *dig_in = peer_device->connection->int_dig_in;
1666 void *dig_vv = peer_device->connection->int_dig_vv;
b411b363 1667
88104ca4 1668 dgs = 0;
69a22773
AG
1669 if (peer_device->connection->peer_integrity_tfm) {
1670 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
1671 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
a5c31904
AG
1672 if (err)
1673 return err;
88104ca4 1674 data_size -= dgs;
b411b363
PR
1675 }
1676
b411b363
PR
1677 /* optimistically update recv_cnt. if receiving fails below,
1678 * we disconnect anyways, and counters will be reset. */
69a22773 1679 peer_device->device->recv_cnt += data_size>>9;
b411b363
PR
1680
1681 bio = req->master_bio;
69a22773 1682 D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
b411b363 1683
7988613b
KO
1684 bio_for_each_segment(bvec, bio, iter) {
1685 void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
1686 expect = min_t(int, data_size, bvec.bv_len);
69a22773 1687 err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
7988613b 1688 kunmap(bvec.bv_page);
a5c31904
AG
1689 if (err)
1690 return err;
1691 data_size -= expect;
b411b363
PR
1692 }
1693
1694 if (dgs) {
69a22773 1695 drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
b411b363 1696 if (memcmp(dig_in, dig_vv, dgs)) {
69a22773 1697 drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
28284cef 1698 return -EINVAL;
b411b363
PR
1699 }
1700 }
1701
69a22773 1702 D_ASSERT(peer_device->device, data_size == 0);
28284cef 1703 return 0;
b411b363
PR
1704}
1705
a990be46
AG
1706/*
1707 * e_end_resync_block() is called in asender context via
1708 * drbd_finish_peer_reqs().
1709 */
99920dc5 1710static int e_end_resync_block(struct drbd_work *w, int unused)
b411b363 1711{
8050e6d0 1712 struct drbd_peer_request *peer_req =
a8cd15ba
AG
1713 container_of(w, struct drbd_peer_request, w);
1714 struct drbd_peer_device *peer_device = peer_req->peer_device;
1715 struct drbd_device *device = peer_device->device;
db830c46 1716 sector_t sector = peer_req->i.sector;
99920dc5 1717 int err;
b411b363 1718
0b0ba1ef 1719 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
b411b363 1720
db830c46 1721 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b30ab791 1722 drbd_set_in_sync(device, sector, peer_req->i.size);
a8cd15ba 1723 err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
b411b363
PR
1724 } else {
1725 /* Record failure to sync */
b30ab791 1726 drbd_rs_failed_io(device, sector, peer_req->i.size);
b411b363 1727
a8cd15ba 1728 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
b411b363 1729 }
b30ab791 1730 dec_unacked(device);
b411b363 1731
99920dc5 1732 return err;
b411b363
PR
1733}
1734
69a22773 1735static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
a0fb3c47 1736 struct packet_info *pi) __releases(local)
b411b363 1737{
69a22773 1738 struct drbd_device *device = peer_device->device;
db830c46 1739 struct drbd_peer_request *peer_req;
b411b363 1740
a0fb3c47 1741 peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
db830c46 1742 if (!peer_req)
45bb912b 1743 goto fail;
b411b363 1744
b30ab791 1745 dec_rs_pending(device);
b411b363 1746
b30ab791 1747 inc_unacked(device);
b411b363
PR
1748 /* corresponding dec_unacked() in e_end_resync_block()
1749 * respective _drbd_clear_done_ee */
1750
a8cd15ba 1751 peer_req->w.cb = e_end_resync_block;
45bb912b 1752
0500813f 1753 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 1754 list_add(&peer_req->w.list, &device->sync_ee);
0500813f 1755 spin_unlock_irq(&device->resource->req_lock);
b411b363 1756
a0fb3c47 1757 atomic_add(pi->size >> 9, &device->rs_sect_ev);
b30ab791 1758 if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
e1c1b0fc 1759 return 0;
b411b363 1760
10f6d992 1761 /* don't care for the reason here */
d0180171 1762 drbd_err(device, "submit failed, triggering re-connect\n");
0500813f 1763 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 1764 list_del(&peer_req->w.list);
0500813f 1765 spin_unlock_irq(&device->resource->req_lock);
22cc37a9 1766
b30ab791 1767 drbd_free_peer_req(device, peer_req);
45bb912b 1768fail:
b30ab791 1769 put_ldev(device);
e1c1b0fc 1770 return -EIO;
b411b363
PR
1771}
1772
668eebc6 1773static struct drbd_request *
b30ab791 1774find_request(struct drbd_device *device, struct rb_root *root, u64 id,
bc9c5c41 1775 sector_t sector, bool missing_ok, const char *func)
51624585 1776{
51624585
AG
1777 struct drbd_request *req;
1778
bc9c5c41
AG
1779 /* Request object according to our peer */
1780 req = (struct drbd_request *)(unsigned long)id;
5e472264 1781 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
668eebc6 1782 return req;
c3afd8f5 1783 if (!missing_ok) {
d0180171 1784 drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
c3afd8f5
AG
1785 (unsigned long)id, (unsigned long long)sector);
1786 }
51624585 1787 return NULL;
b411b363
PR
1788}
1789
bde89a9e 1790static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 1791{
9f4fe9ad 1792 struct drbd_peer_device *peer_device;
b30ab791 1793 struct drbd_device *device;
b411b363
PR
1794 struct drbd_request *req;
1795 sector_t sector;
82bc0194 1796 int err;
e658983a 1797 struct p_data *p = pi->data;
4a76b161 1798
9f4fe9ad
AG
1799 peer_device = conn_peer_device(connection, pi->vnr);
1800 if (!peer_device)
4a76b161 1801 return -EIO;
9f4fe9ad 1802 device = peer_device->device;
b411b363
PR
1803
1804 sector = be64_to_cpu(p->sector);
1805
0500813f 1806 spin_lock_irq(&device->resource->req_lock);
b30ab791 1807 req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
0500813f 1808 spin_unlock_irq(&device->resource->req_lock);
c3afd8f5 1809 if (unlikely(!req))
82bc0194 1810 return -EIO;
b411b363 1811
24c4830c 1812 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
b411b363
PR
1813 * special casing it there for the various failure cases.
1814 * still no race with drbd_fail_pending_reads */
69a22773 1815 err = recv_dless_read(peer_device, req, sector, pi->size);
82bc0194 1816 if (!err)
8554df1c 1817 req_mod(req, DATA_RECEIVED);
b411b363
PR
1818 /* else: nothing. handled from drbd_disconnect...
1819 * I don't think we may complete this just yet
1820 * in case we are "on-disconnect: freeze" */
1821
82bc0194 1822 return err;
b411b363
PR
1823}
1824
bde89a9e 1825static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 1826{
9f4fe9ad 1827 struct drbd_peer_device *peer_device;
b30ab791 1828 struct drbd_device *device;
b411b363 1829 sector_t sector;
82bc0194 1830 int err;
e658983a 1831 struct p_data *p = pi->data;
4a76b161 1832
9f4fe9ad
AG
1833 peer_device = conn_peer_device(connection, pi->vnr);
1834 if (!peer_device)
4a76b161 1835 return -EIO;
9f4fe9ad 1836 device = peer_device->device;
b411b363
PR
1837
1838 sector = be64_to_cpu(p->sector);
0b0ba1ef 1839 D_ASSERT(device, p->block_id == ID_SYNCER);
b411b363 1840
b30ab791 1841 if (get_ldev(device)) {
b411b363
PR
1842 /* data is submitted to disk within recv_resync_read.
1843 * corresponding put_ldev done below on error,
fcefa62e 1844 * or in drbd_peer_request_endio. */
a0fb3c47 1845 err = recv_resync_read(peer_device, sector, pi);
b411b363
PR
1846 } else {
1847 if (__ratelimit(&drbd_ratelimit_state))
d0180171 1848 drbd_err(device, "Can not write resync data to local disk.\n");
b411b363 1849
69a22773 1850 err = drbd_drain_block(peer_device, pi->size);
b411b363 1851
69a22773 1852 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
b411b363
PR
1853 }
1854
b30ab791 1855 atomic_add(pi->size >> 9, &device->rs_sect_in);
778f271d 1856
82bc0194 1857 return err;
b411b363
PR
1858}
1859
b30ab791 1860static void restart_conflicting_writes(struct drbd_device *device,
7be8da07 1861 sector_t sector, int size)
b411b363 1862{
7be8da07
AG
1863 struct drbd_interval *i;
1864 struct drbd_request *req;
1865
b30ab791 1866 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
7be8da07
AG
1867 if (!i->local)
1868 continue;
1869 req = container_of(i, struct drbd_request, i);
1870 if (req->rq_state & RQ_LOCAL_PENDING ||
1871 !(req->rq_state & RQ_POSTPONED))
1872 continue;
2312f0b3
LE
1873 /* as it is RQ_POSTPONED, this will cause it to
1874 * be queued on the retry workqueue. */
d4dabbe2 1875 __req_mod(req, CONFLICT_RESOLVED, NULL);
7be8da07
AG
1876 }
1877}
b411b363 1878
a990be46
AG
1879/*
1880 * e_end_block() is called in asender context via drbd_finish_peer_reqs().
b411b363 1881 */
99920dc5 1882static int e_end_block(struct drbd_work *w, int cancel)
b411b363 1883{
8050e6d0 1884 struct drbd_peer_request *peer_req =
a8cd15ba
AG
1885 container_of(w, struct drbd_peer_request, w);
1886 struct drbd_peer_device *peer_device = peer_req->peer_device;
1887 struct drbd_device *device = peer_device->device;
db830c46 1888 sector_t sector = peer_req->i.sector;
99920dc5 1889 int err = 0, pcmd;
b411b363 1890
303d1448 1891 if (peer_req->flags & EE_SEND_WRITE_ACK) {
db830c46 1892 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b30ab791
AG
1893 pcmd = (device->state.conn >= C_SYNC_SOURCE &&
1894 device->state.conn <= C_PAUSED_SYNC_T &&
db830c46 1895 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
b411b363 1896 P_RS_WRITE_ACK : P_WRITE_ACK;
a8cd15ba 1897 err = drbd_send_ack(peer_device, pcmd, peer_req);
b411b363 1898 if (pcmd == P_RS_WRITE_ACK)
b30ab791 1899 drbd_set_in_sync(device, sector, peer_req->i.size);
b411b363 1900 } else {
a8cd15ba 1901 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
b411b363
PR
1902 /* we expect it to be marked out of sync anyways...
1903 * maybe assert this? */
1904 }
b30ab791 1905 dec_unacked(device);
b411b363
PR
1906 }
1907 /* we delete from the conflict detection hash _after_ we sent out the
1908 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
302bdeae 1909 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
0500813f 1910 spin_lock_irq(&device->resource->req_lock);
0b0ba1ef 1911 D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
b30ab791 1912 drbd_remove_epoch_entry_interval(device, peer_req);
7be8da07 1913 if (peer_req->flags & EE_RESTART_REQUESTS)
b30ab791 1914 restart_conflicting_writes(device, sector, peer_req->i.size);
0500813f 1915 spin_unlock_irq(&device->resource->req_lock);
bb3bfe96 1916 } else
0b0ba1ef 1917 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
b411b363 1918
a6b32bc3 1919 drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
b411b363 1920
99920dc5 1921 return err;
b411b363
PR
1922}
1923
a8cd15ba 1924static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
b411b363 1925{
8050e6d0 1926 struct drbd_peer_request *peer_req =
a8cd15ba
AG
1927 container_of(w, struct drbd_peer_request, w);
1928 struct drbd_peer_device *peer_device = peer_req->peer_device;
99920dc5 1929 int err;
b411b363 1930
a8cd15ba
AG
1931 err = drbd_send_ack(peer_device, ack, peer_req);
1932 dec_unacked(peer_device->device);
b411b363 1933
99920dc5 1934 return err;
b411b363
PR
1935}
1936
d4dabbe2 1937static int e_send_superseded(struct drbd_work *w, int unused)
7be8da07 1938{
a8cd15ba 1939 return e_send_ack(w, P_SUPERSEDED);
7be8da07
AG
1940}
1941
99920dc5 1942static int e_send_retry_write(struct drbd_work *w, int unused)
7be8da07 1943{
a8cd15ba
AG
1944 struct drbd_peer_request *peer_req =
1945 container_of(w, struct drbd_peer_request, w);
1946 struct drbd_connection *connection = peer_req->peer_device->connection;
7be8da07 1947
a8cd15ba 1948 return e_send_ack(w, connection->agreed_pro_version >= 100 ?
d4dabbe2 1949 P_RETRY_WRITE : P_SUPERSEDED);
7be8da07 1950}
b411b363 1951
3e394da1
AG
1952static bool seq_greater(u32 a, u32 b)
1953{
1954 /*
1955 * We assume 32-bit wrap-around here.
1956 * For 24-bit wrap-around, we would have to shift:
1957 * a <<= 8; b <<= 8;
1958 */
1959 return (s32)a - (s32)b > 0;
1960}
b411b363 1961
3e394da1
AG
1962static u32 seq_max(u32 a, u32 b)
1963{
1964 return seq_greater(a, b) ? a : b;
b411b363
PR
1965}
1966
69a22773 1967static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
3e394da1 1968{
69a22773 1969 struct drbd_device *device = peer_device->device;
3c13b680 1970 unsigned int newest_peer_seq;
3e394da1 1971
69a22773 1972 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
b30ab791
AG
1973 spin_lock(&device->peer_seq_lock);
1974 newest_peer_seq = seq_max(device->peer_seq, peer_seq);
1975 device->peer_seq = newest_peer_seq;
1976 spin_unlock(&device->peer_seq_lock);
1977 /* wake up only if we actually changed device->peer_seq */
3c13b680 1978 if (peer_seq == newest_peer_seq)
b30ab791 1979 wake_up(&device->seq_wait);
7be8da07 1980 }
b411b363
PR
1981}
1982
d93f6302 1983static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
b6a370ba 1984{
d93f6302
LE
1985 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
1986}
b6a370ba 1987
d93f6302 1988/* maybe change sync_ee into interval trees as well? */
b30ab791 1989static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
d93f6302
LE
1990{
1991 struct drbd_peer_request *rs_req;
b6a370ba
PR
1992 bool rv = 0;
1993
0500813f 1994 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 1995 list_for_each_entry(rs_req, &device->sync_ee, w.list) {
d93f6302
LE
1996 if (overlaps(peer_req->i.sector, peer_req->i.size,
1997 rs_req->i.sector, rs_req->i.size)) {
b6a370ba
PR
1998 rv = 1;
1999 break;
2000 }
2001 }
0500813f 2002 spin_unlock_irq(&device->resource->req_lock);
b6a370ba
PR
2003
2004 return rv;
2005}
2006
b411b363
PR
2007/* Called from receive_Data.
2008 * Synchronize packets on sock with packets on msock.
2009 *
2010 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
2011 * packet traveling on msock, they are still processed in the order they have
2012 * been sent.
2013 *
2014 * Note: we don't care for Ack packets overtaking P_DATA packets.
2015 *
b30ab791 2016 * In case packet_seq is larger than device->peer_seq number, there are
b411b363 2017 * outstanding packets on the msock. We wait for them to arrive.
b30ab791 2018 * In case we are the logically next packet, we update device->peer_seq
b411b363
PR
2019 * ourselves. Correctly handles 32bit wrap around.
2020 *
2021 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
2022 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
2023 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
2024 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
2025 *
2026 * returns 0 if we may process the packet,
2027 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
69a22773 2028static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
b411b363 2029{
69a22773 2030 struct drbd_device *device = peer_device->device;
b411b363 2031 DEFINE_WAIT(wait);
b411b363 2032 long timeout;
b874d231 2033 int ret = 0, tp;
7be8da07 2034
69a22773 2035 if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
7be8da07
AG
2036 return 0;
2037
b30ab791 2038 spin_lock(&device->peer_seq_lock);
b411b363 2039 for (;;) {
b30ab791
AG
2040 if (!seq_greater(peer_seq - 1, device->peer_seq)) {
2041 device->peer_seq = seq_max(device->peer_seq, peer_seq);
b411b363 2042 break;
7be8da07 2043 }
b874d231 2044
b411b363
PR
2045 if (signal_pending(current)) {
2046 ret = -ERESTARTSYS;
2047 break;
2048 }
b874d231
PR
2049
2050 rcu_read_lock();
a6b32bc3 2051 tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries;
b874d231
PR
2052 rcu_read_unlock();
2053
2054 if (!tp)
2055 break;
2056
2057 /* Only need to wait if two_primaries is enabled */
b30ab791
AG
2058 prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2059 spin_unlock(&device->peer_seq_lock);
44ed167d 2060 rcu_read_lock();
69a22773 2061 timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
44ed167d 2062 rcu_read_unlock();
71b1c1eb 2063 timeout = schedule_timeout(timeout);
b30ab791 2064 spin_lock(&device->peer_seq_lock);
7be8da07 2065 if (!timeout) {
b411b363 2066 ret = -ETIMEDOUT;
d0180171 2067 drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
b411b363
PR
2068 break;
2069 }
2070 }
b30ab791
AG
2071 spin_unlock(&device->peer_seq_lock);
2072 finish_wait(&device->seq_wait, &wait);
b411b363
PR
2073 return ret;
2074}
2075
688593c5
LE
2076/* see also bio_flags_to_wire()
2077 * DRBD_REQ_*, because we need to semantically map the flags to data packet
2078 * flags and back. We may replicate to other kernel versions. */
81f0ffd2 2079static unsigned long wire_flags_to_bio(u32 dpf)
76d2e7ec 2080{
688593c5
LE
2081 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
2082 (dpf & DP_FUA ? REQ_FUA : 0) |
2083 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
2084 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
76d2e7ec
PR
2085}
2086
b30ab791 2087static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
7be8da07
AG
2088 unsigned int size)
2089{
2090 struct drbd_interval *i;
2091
2092 repeat:
b30ab791 2093 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
7be8da07
AG
2094 struct drbd_request *req;
2095 struct bio_and_error m;
2096
2097 if (!i->local)
2098 continue;
2099 req = container_of(i, struct drbd_request, i);
2100 if (!(req->rq_state & RQ_POSTPONED))
2101 continue;
2102 req->rq_state &= ~RQ_POSTPONED;
2103 __req_mod(req, NEG_ACKED, &m);
0500813f 2104 spin_unlock_irq(&device->resource->req_lock);
7be8da07 2105 if (m.bio)
b30ab791 2106 complete_master_bio(device, &m);
0500813f 2107 spin_lock_irq(&device->resource->req_lock);
7be8da07
AG
2108 goto repeat;
2109 }
2110}
2111
b30ab791 2112static int handle_write_conflicts(struct drbd_device *device,
7be8da07
AG
2113 struct drbd_peer_request *peer_req)
2114{
e33b32de 2115 struct drbd_connection *connection = peer_req->peer_device->connection;
bde89a9e 2116 bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
7be8da07
AG
2117 sector_t sector = peer_req->i.sector;
2118 const unsigned int size = peer_req->i.size;
2119 struct drbd_interval *i;
2120 bool equal;
2121 int err;
2122
2123 /*
2124 * Inserting the peer request into the write_requests tree will prevent
2125 * new conflicting local requests from being added.
2126 */
b30ab791 2127 drbd_insert_interval(&device->write_requests, &peer_req->i);
7be8da07
AG
2128
2129 repeat:
b30ab791 2130 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
7be8da07
AG
2131 if (i == &peer_req->i)
2132 continue;
2133
2134 if (!i->local) {
2135 /*
2136 * Our peer has sent a conflicting remote request; this
2137 * should not happen in a two-node setup. Wait for the
2138 * earlier peer request to complete.
2139 */
b30ab791 2140 err = drbd_wait_misc(device, i);
7be8da07
AG
2141 if (err)
2142 goto out;
2143 goto repeat;
2144 }
2145
2146 equal = i->sector == sector && i->size == size;
2147 if (resolve_conflicts) {
2148 /*
2149 * If the peer request is fully contained within the
d4dabbe2
LE
2150 * overlapping request, it can be considered overwritten
2151 * and thus superseded; otherwise, it will be retried
2152 * once all overlapping requests have completed.
7be8da07 2153 */
d4dabbe2 2154 bool superseded = i->sector <= sector && i->sector +
7be8da07
AG
2155 (i->size >> 9) >= sector + (size >> 9);
2156
2157 if (!equal)
d0180171 2158 drbd_alert(device, "Concurrent writes detected: "
7be8da07
AG
2159 "local=%llus +%u, remote=%llus +%u, "
2160 "assuming %s came first\n",
2161 (unsigned long long)i->sector, i->size,
2162 (unsigned long long)sector, size,
d4dabbe2 2163 superseded ? "local" : "remote");
7be8da07 2164
b30ab791 2165 inc_unacked(device);
a8cd15ba 2166 peer_req->w.cb = superseded ? e_send_superseded :
7be8da07 2167 e_send_retry_write;
a8cd15ba 2168 list_add_tail(&peer_req->w.list, &device->done_ee);
e33b32de 2169 wake_asender(connection);
7be8da07
AG
2170
2171 err = -ENOENT;
2172 goto out;
2173 } else {
2174 struct drbd_request *req =
2175 container_of(i, struct drbd_request, i);
2176
2177 if (!equal)
d0180171 2178 drbd_alert(device, "Concurrent writes detected: "
7be8da07
AG
2179 "local=%llus +%u, remote=%llus +%u\n",
2180 (unsigned long long)i->sector, i->size,
2181 (unsigned long long)sector, size);
2182
2183 if (req->rq_state & RQ_LOCAL_PENDING ||
2184 !(req->rq_state & RQ_POSTPONED)) {
2185 /*
2186 * Wait for the node with the discard flag to
d4dabbe2
LE
2187 * decide if this request has been superseded
2188 * or needs to be retried.
2189 * Requests that have been superseded will
7be8da07
AG
2190 * disappear from the write_requests tree.
2191 *
2192 * In addition, wait for the conflicting
2193 * request to finish locally before submitting
2194 * the conflicting peer request.
2195 */
b30ab791 2196 err = drbd_wait_misc(device, &req->i);
7be8da07 2197 if (err) {
e33b32de 2198 _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
b30ab791 2199 fail_postponed_requests(device, sector, size);
7be8da07
AG
2200 goto out;
2201 }
2202 goto repeat;
2203 }
2204 /*
2205 * Remember to restart the conflicting requests after
2206 * the new peer request has completed.
2207 */
2208 peer_req->flags |= EE_RESTART_REQUESTS;
2209 }
2210 }
2211 err = 0;
2212
2213 out:
2214 if (err)
b30ab791 2215 drbd_remove_epoch_entry_interval(device, peer_req);
7be8da07
AG
2216 return err;
2217}
2218
b411b363 2219/* mirrored write */
bde89a9e 2220static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
b411b363 2221{
9f4fe9ad 2222 struct drbd_peer_device *peer_device;
b30ab791 2223 struct drbd_device *device;
b411b363 2224 sector_t sector;
db830c46 2225 struct drbd_peer_request *peer_req;
e658983a 2226 struct p_data *p = pi->data;
7be8da07 2227 u32 peer_seq = be32_to_cpu(p->seq_num);
b411b363
PR
2228 int rw = WRITE;
2229 u32 dp_flags;
302bdeae 2230 int err, tp;
b411b363 2231
9f4fe9ad
AG
2232 peer_device = conn_peer_device(connection, pi->vnr);
2233 if (!peer_device)
4a76b161 2234 return -EIO;
9f4fe9ad 2235 device = peer_device->device;
b411b363 2236
b30ab791 2237 if (!get_ldev(device)) {
82bc0194
AG
2238 int err2;
2239
69a22773
AG
2240 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
2241 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
bde89a9e 2242 atomic_inc(&connection->current_epoch->epoch_size);
69a22773 2243 err2 = drbd_drain_block(peer_device, pi->size);
82bc0194
AG
2244 if (!err)
2245 err = err2;
2246 return err;
b411b363
PR
2247 }
2248
fcefa62e
AG
2249 /*
2250 * Corresponding put_ldev done either below (on various errors), or in
2251 * drbd_peer_request_endio, if we successfully submit the data at the
2252 * end of this function.
2253 */
b411b363
PR
2254
2255 sector = be64_to_cpu(p->sector);
a0fb3c47 2256 peer_req = read_in_block(peer_device, p->block_id, sector, pi);
db830c46 2257 if (!peer_req) {
b30ab791 2258 put_ldev(device);
82bc0194 2259 return -EIO;
b411b363
PR
2260 }
2261
a8cd15ba 2262 peer_req->w.cb = e_end_block;
b411b363 2263
688593c5 2264 dp_flags = be32_to_cpu(p->dp_flags);
81f0ffd2 2265 rw |= wire_flags_to_bio(dp_flags);
a0fb3c47
LE
2266 if (pi->cmd == P_TRIM) {
2267 struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
2268 peer_req->flags |= EE_IS_TRIM;
2269 if (!blk_queue_discard(q))
2270 peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
2271 D_ASSERT(peer_device, peer_req->i.size > 0);
2272 D_ASSERT(peer_device, rw & REQ_DISCARD);
2273 D_ASSERT(peer_device, peer_req->pages == NULL);
2274 } else if (peer_req->pages == NULL) {
0b0ba1ef
AG
2275 D_ASSERT(device, peer_req->i.size == 0);
2276 D_ASSERT(device, dp_flags & DP_FLUSH);
a73ff323 2277 }
688593c5
LE
2278
2279 if (dp_flags & DP_MAY_SET_IN_SYNC)
db830c46 2280 peer_req->flags |= EE_MAY_SET_IN_SYNC;
688593c5 2281
bde89a9e
AG
2282 spin_lock(&connection->epoch_lock);
2283 peer_req->epoch = connection->current_epoch;
db830c46
AG
2284 atomic_inc(&peer_req->epoch->epoch_size);
2285 atomic_inc(&peer_req->epoch->active);
bde89a9e 2286 spin_unlock(&connection->epoch_lock);
b411b363 2287
302bdeae 2288 rcu_read_lock();
9f4fe9ad 2289 tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
302bdeae
PR
2290 rcu_read_unlock();
2291 if (tp) {
2292 peer_req->flags |= EE_IN_INTERVAL_TREE;
69a22773 2293 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
7be8da07 2294 if (err)
b411b363 2295 goto out_interrupted;
0500813f 2296 spin_lock_irq(&device->resource->req_lock);
b30ab791 2297 err = handle_write_conflicts(device, peer_req);
7be8da07 2298 if (err) {
0500813f 2299 spin_unlock_irq(&device->resource->req_lock);
7be8da07 2300 if (err == -ENOENT) {
b30ab791 2301 put_ldev(device);
82bc0194 2302 return 0;
b411b363 2303 }
7be8da07 2304 goto out_interrupted;
b411b363 2305 }
b874d231 2306 } else {
69a22773 2307 update_peer_seq(peer_device, peer_seq);
0500813f 2308 spin_lock_irq(&device->resource->req_lock);
b874d231 2309 }
a0fb3c47
LE
2310 /* if we use the zeroout fallback code, we process synchronously
2311 * and we wait for all pending requests, respectively wait for
2312 * active_ee to become empty in drbd_submit_peer_request();
2313 * better not add ourselves here. */
2314 if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0)
2315 list_add(&peer_req->w.list, &device->active_ee);
0500813f 2316 spin_unlock_irq(&device->resource->req_lock);
b411b363 2317
b30ab791
AG
2318 if (device->state.conn == C_SYNC_TARGET)
2319 wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
b411b363 2320
9f4fe9ad 2321 if (peer_device->connection->agreed_pro_version < 100) {
44ed167d 2322 rcu_read_lock();
9f4fe9ad 2323 switch (rcu_dereference(peer_device->connection->net_conf)->wire_protocol) {
303d1448
PR
2324 case DRBD_PROT_C:
2325 dp_flags |= DP_SEND_WRITE_ACK;
2326 break;
2327 case DRBD_PROT_B:
2328 dp_flags |= DP_SEND_RECEIVE_ACK;
2329 break;
b411b363 2330 }
44ed167d 2331 rcu_read_unlock();
b411b363
PR
2332 }
2333
303d1448
PR
2334 if (dp_flags & DP_SEND_WRITE_ACK) {
2335 peer_req->flags |= EE_SEND_WRITE_ACK;
b30ab791 2336 inc_unacked(device);
b411b363
PR
2337 /* corresponding dec_unacked() in e_end_block()
2338 * respective _drbd_clear_done_ee */
303d1448
PR
2339 }
2340
2341 if (dp_flags & DP_SEND_RECEIVE_ACK) {
b411b363
PR
2342 /* I really don't like it that the receiver thread
2343 * sends on the msock, but anyways */
69a22773 2344 drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
b411b363
PR
2345 }
2346
b30ab791 2347 if (device->state.pdsk < D_INCONSISTENT) {
b411b363 2348 /* In case we have the only disk of the cluster, */
b30ab791 2349 drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
db830c46
AG
2350 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2351 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
b30ab791 2352 drbd_al_begin_io(device, &peer_req->i, true);
b411b363
PR
2353 }
2354
b30ab791 2355 err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR);
82bc0194
AG
2356 if (!err)
2357 return 0;
b411b363 2358
10f6d992 2359 /* don't care for the reason here */
d0180171 2360 drbd_err(device, "submit failed, triggering re-connect\n");
0500813f 2361 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 2362 list_del(&peer_req->w.list);
b30ab791 2363 drbd_remove_epoch_entry_interval(device, peer_req);
0500813f 2364 spin_unlock_irq(&device->resource->req_lock);
db830c46 2365 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
b30ab791 2366 drbd_al_complete_io(device, &peer_req->i);
22cc37a9 2367
b411b363 2368out_interrupted:
bde89a9e 2369 drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP);
b30ab791
AG
2370 put_ldev(device);
2371 drbd_free_peer_req(device, peer_req);
82bc0194 2372 return err;
b411b363
PR
2373}
2374
0f0601f4
LE
2375/* We may throttle resync, if the lower device seems to be busy,
2376 * and current sync rate is above c_min_rate.
2377 *
2378 * To decide whether or not the lower device is busy, we use a scheme similar
2379 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2380 * (more than 64 sectors) of activity we cannot account for with our own resync
2381 * activity, it obviously is "busy".
2382 *
2383 * The current sync rate used here uses only the most recent two step marks,
2384 * to have a short time average so we can react faster.
2385 */
e8299874 2386bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
0f0601f4 2387{
e3555d85 2388 struct lc_element *tmp;
e8299874 2389 bool throttle = true;
daeda1cc 2390
e8299874
LE
2391 if (!drbd_rs_c_min_rate_throttle(device))
2392 return false;
0f0601f4 2393
b30ab791
AG
2394 spin_lock_irq(&device->al_lock);
2395 tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
e3555d85
PR
2396 if (tmp) {
2397 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
e8299874
LE
2398 if (test_bit(BME_PRIORITY, &bm_ext->flags))
2399 throttle = false;
e3555d85
PR
2400 /* Do not slow down if app IO is already waiting for this extent */
2401 }
b30ab791 2402 spin_unlock_irq(&device->al_lock);
e3555d85 2403
e8299874
LE
2404 return throttle;
2405}
2406
2407bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
2408{
2409 struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
2410 unsigned long db, dt, dbdt;
2411 unsigned int c_min_rate;
2412 int curr_events;
2413
2414 rcu_read_lock();
2415 c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
2416 rcu_read_unlock();
2417
2418 /* feature disabled? */
2419 if (c_min_rate == 0)
2420 return false;
2421
0f0601f4
LE
2422 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2423 (int)part_stat_read(&disk->part0, sectors[1]) -
b30ab791 2424 atomic_read(&device->rs_sect_ev);
b30ab791 2425 if (!device->rs_last_events || curr_events - device->rs_last_events > 64) {
0f0601f4
LE
2426 unsigned long rs_left;
2427 int i;
2428
b30ab791 2429 device->rs_last_events = curr_events;
0f0601f4
LE
2430
2431 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2432 * approx. */
b30ab791 2433 i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
2649f080 2434
b30ab791
AG
2435 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2436 rs_left = device->ov_left;
2649f080 2437 else
b30ab791 2438 rs_left = drbd_bm_total_weight(device) - device->rs_failed;
0f0601f4 2439
b30ab791 2440 dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
0f0601f4
LE
2441 if (!dt)
2442 dt++;
b30ab791 2443 db = device->rs_mark_left[i] - rs_left;
0f0601f4
LE
2444 dbdt = Bit2KB(db/dt);
2445
daeda1cc 2446 if (dbdt > c_min_rate)
e8299874 2447 return true;
0f0601f4 2448 }
e8299874 2449 return false;
0f0601f4
LE
2450}
2451
bde89a9e 2452static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
b411b363 2453{
9f4fe9ad 2454 struct drbd_peer_device *peer_device;
b30ab791 2455 struct drbd_device *device;
b411b363 2456 sector_t sector;
4a76b161 2457 sector_t capacity;
db830c46 2458 struct drbd_peer_request *peer_req;
b411b363 2459 struct digest_info *di = NULL;
b18b37be 2460 int size, verb;
b411b363 2461 unsigned int fault_type;
e658983a 2462 struct p_block_req *p = pi->data;
4a76b161 2463
9f4fe9ad
AG
2464 peer_device = conn_peer_device(connection, pi->vnr);
2465 if (!peer_device)
4a76b161 2466 return -EIO;
9f4fe9ad 2467 device = peer_device->device;
b30ab791 2468 capacity = drbd_get_capacity(device->this_bdev);
b411b363
PR
2469
2470 sector = be64_to_cpu(p->sector);
2471 size = be32_to_cpu(p->blksize);
2472
c670a398 2473 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
d0180171 2474 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
b411b363 2475 (unsigned long long)sector, size);
82bc0194 2476 return -EINVAL;
b411b363
PR
2477 }
2478 if (sector + (size>>9) > capacity) {
d0180171 2479 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
b411b363 2480 (unsigned long long)sector, size);
82bc0194 2481 return -EINVAL;
b411b363
PR
2482 }
2483
b30ab791 2484 if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
b18b37be 2485 verb = 1;
e2857216 2486 switch (pi->cmd) {
b18b37be 2487 case P_DATA_REQUEST:
69a22773 2488 drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
b18b37be
PR
2489 break;
2490 case P_RS_DATA_REQUEST:
2491 case P_CSUM_RS_REQUEST:
2492 case P_OV_REQUEST:
69a22773 2493 drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
b18b37be
PR
2494 break;
2495 case P_OV_REPLY:
2496 verb = 0;
b30ab791 2497 dec_rs_pending(device);
69a22773 2498 drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
b18b37be
PR
2499 break;
2500 default:
49ba9b1b 2501 BUG();
b18b37be
PR
2502 }
2503 if (verb && __ratelimit(&drbd_ratelimit_state))
d0180171 2504 drbd_err(device, "Can not satisfy peer's read request, "
b411b363 2505 "no local data.\n");
b18b37be 2506
a821cc4a 2507 /* drain possibly payload */
69a22773 2508 return drbd_drain_block(peer_device, pi->size);
b411b363
PR
2509 }
2510
2511 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2512 * "criss-cross" setup, that might cause write-out on some other DRBD,
2513 * which in turn might block on the other node at this very place. */
a0fb3c47
LE
2514 peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
2515 true /* has real payload */, GFP_NOIO);
db830c46 2516 if (!peer_req) {
b30ab791 2517 put_ldev(device);
82bc0194 2518 return -ENOMEM;
b411b363
PR
2519 }
2520
e2857216 2521 switch (pi->cmd) {
b411b363 2522 case P_DATA_REQUEST:
a8cd15ba 2523 peer_req->w.cb = w_e_end_data_req;
b411b363 2524 fault_type = DRBD_FAULT_DT_RD;
80a40e43
LE
2525 /* application IO, don't drbd_rs_begin_io */
2526 goto submit;
2527
b411b363 2528 case P_RS_DATA_REQUEST:
a8cd15ba 2529 peer_req->w.cb = w_e_end_rsdata_req;
b411b363 2530 fault_type = DRBD_FAULT_RS_RD;
5f9915bb 2531 /* used in the sector offset progress display */
b30ab791 2532 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
b411b363
PR
2533 break;
2534
2535 case P_OV_REPLY:
2536 case P_CSUM_RS_REQUEST:
2537 fault_type = DRBD_FAULT_RS_RD;
e2857216 2538 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
b411b363
PR
2539 if (!di)
2540 goto out_free_e;
2541
e2857216 2542 di->digest_size = pi->size;
b411b363
PR
2543 di->digest = (((char *)di)+sizeof(struct digest_info));
2544
db830c46
AG
2545 peer_req->digest = di;
2546 peer_req->flags |= EE_HAS_DIGEST;
c36c3ced 2547
9f4fe9ad 2548 if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
b411b363
PR
2549 goto out_free_e;
2550
e2857216 2551 if (pi->cmd == P_CSUM_RS_REQUEST) {
9f4fe9ad 2552 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
a8cd15ba 2553 peer_req->w.cb = w_e_end_csum_rs_req;
5f9915bb 2554 /* used in the sector offset progress display */
b30ab791 2555 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
e2857216 2556 } else if (pi->cmd == P_OV_REPLY) {
2649f080 2557 /* track progress, we may need to throttle */
b30ab791 2558 atomic_add(size >> 9, &device->rs_sect_in);
a8cd15ba 2559 peer_req->w.cb = w_e_end_ov_reply;
b30ab791 2560 dec_rs_pending(device);
0f0601f4
LE
2561 /* drbd_rs_begin_io done when we sent this request,
2562 * but accounting still needs to be done. */
2563 goto submit_for_resync;
b411b363
PR
2564 }
2565 break;
2566
2567 case P_OV_REQUEST:
b30ab791 2568 if (device->ov_start_sector == ~(sector_t)0 &&
9f4fe9ad 2569 peer_device->connection->agreed_pro_version >= 90) {
de228bba
LE
2570 unsigned long now = jiffies;
2571 int i;
b30ab791
AG
2572 device->ov_start_sector = sector;
2573 device->ov_position = sector;
2574 device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2575 device->rs_total = device->ov_left;
de228bba 2576 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
b30ab791
AG
2577 device->rs_mark_left[i] = device->ov_left;
2578 device->rs_mark_time[i] = now;
de228bba 2579 }
d0180171 2580 drbd_info(device, "Online Verify start sector: %llu\n",
b411b363
PR
2581 (unsigned long long)sector);
2582 }
a8cd15ba 2583 peer_req->w.cb = w_e_end_ov_req;
b411b363 2584 fault_type = DRBD_FAULT_RS_RD;
b411b363
PR
2585 break;
2586
b411b363 2587 default:
49ba9b1b 2588 BUG();
b411b363
PR
2589 }
2590
0f0601f4
LE
2591 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2592 * wrt the receiver, but it is not as straightforward as it may seem.
2593 * Various places in the resync start and stop logic assume resync
2594 * requests are processed in order, requeuing this on the worker thread
2595 * introduces a bunch of new code for synchronization between threads.
2596 *
2597 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2598 * "forever", throttling after drbd_rs_begin_io will lock that extent
2599 * for application writes for the same time. For now, just throttle
2600 * here, where the rest of the code expects the receiver to sleep for
2601 * a while, anyways.
2602 */
2603
2604 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2605 * this defers syncer requests for some time, before letting at least
2606 * on request through. The resync controller on the receiving side
2607 * will adapt to the incoming rate accordingly.
2608 *
2609 * We cannot throttle here if remote is Primary/SyncTarget:
2610 * we would also throttle its application reads.
2611 * In that case, throttling is done on the SyncTarget only.
2612 */
b30ab791 2613 if (device->state.peer != R_PRIMARY && drbd_rs_should_slow_down(device, sector))
e3555d85 2614 schedule_timeout_uninterruptible(HZ/10);
b30ab791 2615 if (drbd_rs_begin_io(device, sector))
80a40e43 2616 goto out_free_e;
b411b363 2617
0f0601f4 2618submit_for_resync:
b30ab791 2619 atomic_add(size >> 9, &device->rs_sect_ev);
0f0601f4 2620
80a40e43 2621submit:
b30ab791 2622 inc_unacked(device);
0500813f 2623 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 2624 list_add_tail(&peer_req->w.list, &device->read_ee);
0500813f 2625 spin_unlock_irq(&device->resource->req_lock);
b411b363 2626
b30ab791 2627 if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0)
82bc0194 2628 return 0;
b411b363 2629
10f6d992 2630 /* don't care for the reason here */
d0180171 2631 drbd_err(device, "submit failed, triggering re-connect\n");
0500813f 2632 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 2633 list_del(&peer_req->w.list);
0500813f 2634 spin_unlock_irq(&device->resource->req_lock);
22cc37a9
LE
2635 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2636
b411b363 2637out_free_e:
b30ab791
AG
2638 put_ldev(device);
2639 drbd_free_peer_req(device, peer_req);
82bc0194 2640 return -EIO;
b411b363
PR
2641}
2642
69a22773
AG
2643/**
2644 * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries
2645 */
2646static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
b411b363 2647{
69a22773 2648 struct drbd_device *device = peer_device->device;
b411b363
PR
2649 int self, peer, rv = -100;
2650 unsigned long ch_self, ch_peer;
44ed167d 2651 enum drbd_after_sb_p after_sb_0p;
b411b363 2652
b30ab791
AG
2653 self = device->ldev->md.uuid[UI_BITMAP] & 1;
2654 peer = device->p_uuid[UI_BITMAP] & 1;
b411b363 2655
b30ab791
AG
2656 ch_peer = device->p_uuid[UI_SIZE];
2657 ch_self = device->comm_bm_set;
b411b363 2658
44ed167d 2659 rcu_read_lock();
69a22773 2660 after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
44ed167d
PR
2661 rcu_read_unlock();
2662 switch (after_sb_0p) {
b411b363
PR
2663 case ASB_CONSENSUS:
2664 case ASB_DISCARD_SECONDARY:
2665 case ASB_CALL_HELPER:
44ed167d 2666 case ASB_VIOLENTLY:
d0180171 2667 drbd_err(device, "Configuration error.\n");
b411b363
PR
2668 break;
2669 case ASB_DISCONNECT:
2670 break;
2671 case ASB_DISCARD_YOUNGER_PRI:
2672 if (self == 0 && peer == 1) {
2673 rv = -1;
2674 break;
2675 }
2676 if (self == 1 && peer == 0) {
2677 rv = 1;
2678 break;
2679 }
2680 /* Else fall through to one of the other strategies... */
2681 case ASB_DISCARD_OLDER_PRI:
2682 if (self == 0 && peer == 1) {
2683 rv = 1;
2684 break;
2685 }
2686 if (self == 1 && peer == 0) {
2687 rv = -1;
2688 break;
2689 }
2690 /* Else fall through to one of the other strategies... */
d0180171 2691 drbd_warn(device, "Discard younger/older primary did not find a decision\n"
b411b363
PR
2692 "Using discard-least-changes instead\n");
2693 case ASB_DISCARD_ZERO_CHG:
2694 if (ch_peer == 0 && ch_self == 0) {
69a22773 2695 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
b411b363
PR
2696 ? -1 : 1;
2697 break;
2698 } else {
2699 if (ch_peer == 0) { rv = 1; break; }
2700 if (ch_self == 0) { rv = -1; break; }
2701 }
44ed167d 2702 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
b411b363
PR
2703 break;
2704 case ASB_DISCARD_LEAST_CHG:
2705 if (ch_self < ch_peer)
2706 rv = -1;
2707 else if (ch_self > ch_peer)
2708 rv = 1;
2709 else /* ( ch_self == ch_peer ) */
2710 /* Well, then use something else. */
69a22773 2711 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
b411b363
PR
2712 ? -1 : 1;
2713 break;
2714 case ASB_DISCARD_LOCAL:
2715 rv = -1;
2716 break;
2717 case ASB_DISCARD_REMOTE:
2718 rv = 1;
2719 }
2720
2721 return rv;
2722}
2723
69a22773
AG
2724/**
2725 * drbd_asb_recover_1p - Recover after split-brain with one remaining primary
2726 */
2727static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
b411b363 2728{
69a22773 2729 struct drbd_device *device = peer_device->device;
6184ea21 2730 int hg, rv = -100;
44ed167d 2731 enum drbd_after_sb_p after_sb_1p;
b411b363 2732
44ed167d 2733 rcu_read_lock();
69a22773 2734 after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
44ed167d
PR
2735 rcu_read_unlock();
2736 switch (after_sb_1p) {
b411b363
PR
2737 case ASB_DISCARD_YOUNGER_PRI:
2738 case ASB_DISCARD_OLDER_PRI:
2739 case ASB_DISCARD_LEAST_CHG:
2740 case ASB_DISCARD_LOCAL:
2741 case ASB_DISCARD_REMOTE:
44ed167d 2742 case ASB_DISCARD_ZERO_CHG:
d0180171 2743 drbd_err(device, "Configuration error.\n");
b411b363
PR
2744 break;
2745 case ASB_DISCONNECT:
2746 break;
2747 case ASB_CONSENSUS:
69a22773 2748 hg = drbd_asb_recover_0p(peer_device);
b30ab791 2749 if (hg == -1 && device->state.role == R_SECONDARY)
b411b363 2750 rv = hg;
b30ab791 2751 if (hg == 1 && device->state.role == R_PRIMARY)
b411b363
PR
2752 rv = hg;
2753 break;
2754 case ASB_VIOLENTLY:
69a22773 2755 rv = drbd_asb_recover_0p(peer_device);
b411b363
PR
2756 break;
2757 case ASB_DISCARD_SECONDARY:
b30ab791 2758 return device->state.role == R_PRIMARY ? 1 : -1;
b411b363 2759 case ASB_CALL_HELPER:
69a22773 2760 hg = drbd_asb_recover_0p(peer_device);
b30ab791 2761 if (hg == -1 && device->state.role == R_PRIMARY) {
bb437946
AG
2762 enum drbd_state_rv rv2;
2763
b411b363
PR
2764 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2765 * we might be here in C_WF_REPORT_PARAMS which is transient.
2766 * we do not need to wait for the after state change work either. */
b30ab791 2767 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
bb437946 2768 if (rv2 != SS_SUCCESS) {
b30ab791 2769 drbd_khelper(device, "pri-lost-after-sb");
b411b363 2770 } else {
d0180171 2771 drbd_warn(device, "Successfully gave up primary role.\n");
b411b363
PR
2772 rv = hg;
2773 }
2774 } else
2775 rv = hg;
2776 }
2777
2778 return rv;
2779}
2780
69a22773
AG
2781/**
2782 * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries
2783 */
2784static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
b411b363 2785{
69a22773 2786 struct drbd_device *device = peer_device->device;
6184ea21 2787 int hg, rv = -100;
44ed167d 2788 enum drbd_after_sb_p after_sb_2p;
b411b363 2789
44ed167d 2790 rcu_read_lock();
69a22773 2791 after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
44ed167d
PR
2792 rcu_read_unlock();
2793 switch (after_sb_2p) {
b411b363
PR
2794 case ASB_DISCARD_YOUNGER_PRI:
2795 case ASB_DISCARD_OLDER_PRI:
2796 case ASB_DISCARD_LEAST_CHG:
2797 case ASB_DISCARD_LOCAL:
2798 case ASB_DISCARD_REMOTE:
2799 case ASB_CONSENSUS:
2800 case ASB_DISCARD_SECONDARY:
44ed167d 2801 case ASB_DISCARD_ZERO_CHG:
d0180171 2802 drbd_err(device, "Configuration error.\n");
b411b363
PR
2803 break;
2804 case ASB_VIOLENTLY:
69a22773 2805 rv = drbd_asb_recover_0p(peer_device);
b411b363
PR
2806 break;
2807 case ASB_DISCONNECT:
2808 break;
2809 case ASB_CALL_HELPER:
69a22773 2810 hg = drbd_asb_recover_0p(peer_device);
b411b363 2811 if (hg == -1) {
bb437946
AG
2812 enum drbd_state_rv rv2;
2813
b411b363
PR
2814 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2815 * we might be here in C_WF_REPORT_PARAMS which is transient.
2816 * we do not need to wait for the after state change work either. */
b30ab791 2817 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
bb437946 2818 if (rv2 != SS_SUCCESS) {
b30ab791 2819 drbd_khelper(device, "pri-lost-after-sb");
b411b363 2820 } else {
d0180171 2821 drbd_warn(device, "Successfully gave up primary role.\n");
b411b363
PR
2822 rv = hg;
2823 }
2824 } else
2825 rv = hg;
2826 }
2827
2828 return rv;
2829}
2830
b30ab791 2831static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
b411b363
PR
2832 u64 bits, u64 flags)
2833{
2834 if (!uuid) {
d0180171 2835 drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
b411b363
PR
2836 return;
2837 }
d0180171 2838 drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
b411b363
PR
2839 text,
2840 (unsigned long long)uuid[UI_CURRENT],
2841 (unsigned long long)uuid[UI_BITMAP],
2842 (unsigned long long)uuid[UI_HISTORY_START],
2843 (unsigned long long)uuid[UI_HISTORY_END],
2844 (unsigned long long)bits,
2845 (unsigned long long)flags);
2846}
2847
2848/*
2849 100 after split brain try auto recover
2850 2 C_SYNC_SOURCE set BitMap
2851 1 C_SYNC_SOURCE use BitMap
2852 0 no Sync
2853 -1 C_SYNC_TARGET use BitMap
2854 -2 C_SYNC_TARGET set BitMap
2855 -100 after split brain, disconnect
2856-1000 unrelated data
4a23f264
PR
2857-1091 requires proto 91
2858-1096 requires proto 96
b411b363 2859 */
44a4d551 2860static int drbd_uuid_compare(struct drbd_device *const device, int *rule_nr) __must_hold(local)
b411b363 2861{
44a4d551
LE
2862 struct drbd_peer_device *const peer_device = first_peer_device(device);
2863 struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
b411b363
PR
2864 u64 self, peer;
2865 int i, j;
2866
b30ab791
AG
2867 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2868 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
b411b363
PR
2869
2870 *rule_nr = 10;
2871 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2872 return 0;
2873
2874 *rule_nr = 20;
2875 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2876 peer != UUID_JUST_CREATED)
2877 return -2;
2878
2879 *rule_nr = 30;
2880 if (self != UUID_JUST_CREATED &&
2881 (peer == UUID_JUST_CREATED || peer == (u64)0))
2882 return 2;
2883
2884 if (self == peer) {
2885 int rct, dc; /* roles at crash time */
2886
b30ab791 2887 if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
b411b363 2888
44a4d551 2889 if (connection->agreed_pro_version < 91)
4a23f264 2890 return -1091;
b411b363 2891
b30ab791
AG
2892 if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2893 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
d0180171 2894 drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
b30ab791
AG
2895 drbd_uuid_move_history(device);
2896 device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
2897 device->ldev->md.uuid[UI_BITMAP] = 0;
b411b363 2898
b30ab791
AG
2899 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2900 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
b411b363
PR
2901 *rule_nr = 34;
2902 } else {
d0180171 2903 drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
b411b363
PR
2904 *rule_nr = 36;
2905 }
2906
2907 return 1;
2908 }
2909
b30ab791 2910 if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
b411b363 2911
44a4d551 2912 if (connection->agreed_pro_version < 91)
4a23f264 2913 return -1091;
b411b363 2914
b30ab791
AG
2915 if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2916 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
d0180171 2917 drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
b411b363 2918
b30ab791
AG
2919 device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
2920 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
2921 device->p_uuid[UI_BITMAP] = 0UL;
b411b363 2922
b30ab791 2923 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
b411b363
PR
2924 *rule_nr = 35;
2925 } else {
d0180171 2926 drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
b411b363
PR
2927 *rule_nr = 37;
2928 }
2929
2930 return -1;
2931 }
2932
2933 /* Common power [off|failure] */
b30ab791
AG
2934 rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
2935 (device->p_uuid[UI_FLAGS] & 2);
b411b363
PR
2936 /* lowest bit is set when we were primary,
2937 * next bit (weight 2) is set when peer was primary */
2938 *rule_nr = 40;
2939
2940 switch (rct) {
2941 case 0: /* !self_pri && !peer_pri */ return 0;
2942 case 1: /* self_pri && !peer_pri */ return 1;
2943 case 2: /* !self_pri && peer_pri */ return -1;
2944 case 3: /* self_pri && peer_pri */
44a4d551 2945 dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
b411b363
PR
2946 return dc ? -1 : 1;
2947 }
2948 }
2949
2950 *rule_nr = 50;
b30ab791 2951 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
b411b363
PR
2952 if (self == peer)
2953 return -1;
2954
2955 *rule_nr = 51;
b30ab791 2956 peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
b411b363 2957 if (self == peer) {
44a4d551 2958 if (connection->agreed_pro_version < 96 ?
b30ab791
AG
2959 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2960 (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2961 peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
b411b363
PR
2962 /* The last P_SYNC_UUID did not get though. Undo the last start of
2963 resync as sync source modifications of the peer's UUIDs. */
2964
44a4d551 2965 if (connection->agreed_pro_version < 91)
4a23f264 2966 return -1091;
b411b363 2967
b30ab791
AG
2968 device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
2969 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
4a23f264 2970
d0180171 2971 drbd_info(device, "Lost last syncUUID packet, corrected:\n");
b30ab791 2972 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
4a23f264 2973
b411b363
PR
2974 return -1;
2975 }
2976 }
2977
2978 *rule_nr = 60;
b30ab791 2979 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
b411b363 2980 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
b30ab791 2981 peer = device->p_uuid[i] & ~((u64)1);
b411b363
PR
2982 if (self == peer)
2983 return -2;
2984 }
2985
2986 *rule_nr = 70;
b30ab791
AG
2987 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2988 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
b411b363
PR
2989 if (self == peer)
2990 return 1;
2991
2992 *rule_nr = 71;
b30ab791 2993 self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
b411b363 2994 if (self == peer) {
44a4d551 2995 if (connection->agreed_pro_version < 96 ?
b30ab791
AG
2996 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2997 (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2998 self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
b411b363
PR
2999 /* The last P_SYNC_UUID did not get though. Undo the last start of
3000 resync as sync source modifications of our UUIDs. */
3001
44a4d551 3002 if (connection->agreed_pro_version < 91)
4a23f264 3003 return -1091;
b411b363 3004
b30ab791
AG
3005 __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
3006 __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
b411b363 3007
d0180171 3008 drbd_info(device, "Last syncUUID did not get through, corrected:\n");
b30ab791
AG
3009 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3010 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
b411b363
PR
3011
3012 return 1;
3013 }
3014 }
3015
3016
3017 *rule_nr = 80;
b30ab791 3018 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
b411b363 3019 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
b30ab791 3020 self = device->ldev->md.uuid[i] & ~((u64)1);
b411b363
PR
3021 if (self == peer)
3022 return 2;
3023 }
3024
3025 *rule_nr = 90;
b30ab791
AG
3026 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3027 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
b411b363
PR
3028 if (self == peer && self != ((u64)0))
3029 return 100;
3030
3031 *rule_nr = 100;
3032 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
b30ab791 3033 self = device->ldev->md.uuid[i] & ~((u64)1);
b411b363 3034 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
b30ab791 3035 peer = device->p_uuid[j] & ~((u64)1);
b411b363
PR
3036 if (self == peer)
3037 return -100;
3038 }
3039 }
3040
3041 return -1000;
3042}
3043
3044/* drbd_sync_handshake() returns the new conn state on success, or
3045 CONN_MASK (-1) on failure.
3046 */
69a22773
AG
3047static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
3048 enum drbd_role peer_role,
b411b363
PR
3049 enum drbd_disk_state peer_disk) __must_hold(local)
3050{
69a22773 3051 struct drbd_device *device = peer_device->device;
b411b363
PR
3052 enum drbd_conns rv = C_MASK;
3053 enum drbd_disk_state mydisk;
44ed167d 3054 struct net_conf *nc;
6dff2902 3055 int hg, rule_nr, rr_conflict, tentative;
b411b363 3056
b30ab791 3057 mydisk = device->state.disk;
b411b363 3058 if (mydisk == D_NEGOTIATING)
b30ab791 3059 mydisk = device->new_state_tmp.disk;
b411b363 3060
d0180171 3061 drbd_info(device, "drbd_sync_handshake:\n");
9f2247bb 3062
b30ab791
AG
3063 spin_lock_irq(&device->ldev->md.uuid_lock);
3064 drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3065 drbd_uuid_dump(device, "peer", device->p_uuid,
3066 device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
b411b363 3067
b30ab791
AG
3068 hg = drbd_uuid_compare(device, &rule_nr);
3069 spin_unlock_irq(&device->ldev->md.uuid_lock);
b411b363 3070
d0180171 3071 drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
b411b363
PR
3072
3073 if (hg == -1000) {
d0180171 3074 drbd_alert(device, "Unrelated data, aborting!\n");
b411b363
PR
3075 return C_MASK;
3076 }
4a23f264 3077 if (hg < -1000) {
d0180171 3078 drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
b411b363
PR
3079 return C_MASK;
3080 }
3081
3082 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3083 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
3084 int f = (hg == -100) || abs(hg) == 2;
3085 hg = mydisk > D_INCONSISTENT ? 1 : -1;
3086 if (f)
3087 hg = hg*2;
d0180171 3088 drbd_info(device, "Becoming sync %s due to disk states.\n",
b411b363
PR
3089 hg > 0 ? "source" : "target");
3090 }
3091
3a11a487 3092 if (abs(hg) == 100)
b30ab791 3093 drbd_khelper(device, "initial-split-brain");
3a11a487 3094
44ed167d 3095 rcu_read_lock();
69a22773 3096 nc = rcu_dereference(peer_device->connection->net_conf);
44ed167d
PR
3097
3098 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
b30ab791 3099 int pcount = (device->state.role == R_PRIMARY)
b411b363
PR
3100 + (peer_role == R_PRIMARY);
3101 int forced = (hg == -100);
3102
3103 switch (pcount) {
3104 case 0:
69a22773 3105 hg = drbd_asb_recover_0p(peer_device);
b411b363
PR
3106 break;
3107 case 1:
69a22773 3108 hg = drbd_asb_recover_1p(peer_device);
b411b363
PR
3109 break;
3110 case 2:
69a22773 3111 hg = drbd_asb_recover_2p(peer_device);
b411b363
PR
3112 break;
3113 }
3114 if (abs(hg) < 100) {
d0180171 3115 drbd_warn(device, "Split-Brain detected, %d primaries, "
b411b363
PR
3116 "automatically solved. Sync from %s node\n",
3117 pcount, (hg < 0) ? "peer" : "this");
3118 if (forced) {
d0180171 3119 drbd_warn(device, "Doing a full sync, since"
b411b363
PR
3120 " UUIDs where ambiguous.\n");
3121 hg = hg*2;
3122 }
3123 }
3124 }
3125
3126 if (hg == -100) {
b30ab791 3127 if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
b411b363 3128 hg = -1;
b30ab791 3129 if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
b411b363
PR
3130 hg = 1;
3131
3132 if (abs(hg) < 100)
d0180171 3133 drbd_warn(device, "Split-Brain detected, manually solved. "
b411b363
PR
3134 "Sync from %s node\n",
3135 (hg < 0) ? "peer" : "this");
3136 }
44ed167d 3137 rr_conflict = nc->rr_conflict;
6dff2902 3138 tentative = nc->tentative;
44ed167d 3139 rcu_read_unlock();
b411b363
PR
3140
3141 if (hg == -100) {
580b9767
LE
3142 /* FIXME this log message is not correct if we end up here
3143 * after an attempted attach on a diskless node.
3144 * We just refuse to attach -- well, we drop the "connection"
3145 * to that disk, in a way... */
d0180171 3146 drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
b30ab791 3147 drbd_khelper(device, "split-brain");
b411b363
PR
3148 return C_MASK;
3149 }
3150
3151 if (hg > 0 && mydisk <= D_INCONSISTENT) {
d0180171 3152 drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
b411b363
PR
3153 return C_MASK;
3154 }
3155
3156 if (hg < 0 && /* by intention we do not use mydisk here. */
b30ab791 3157 device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
44ed167d 3158 switch (rr_conflict) {
b411b363 3159 case ASB_CALL_HELPER:
b30ab791 3160 drbd_khelper(device, "pri-lost");
b411b363
PR
3161 /* fall through */
3162 case ASB_DISCONNECT:
d0180171 3163 drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
b411b363
PR
3164 return C_MASK;
3165 case ASB_VIOLENTLY:
d0180171 3166 drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
b411b363
PR
3167 "assumption\n");
3168 }
3169 }
3170
69a22773 3171 if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
cf14c2e9 3172 if (hg == 0)
d0180171 3173 drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
cf14c2e9 3174 else
d0180171 3175 drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
cf14c2e9
PR
3176 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3177 abs(hg) >= 2 ? "full" : "bit-map based");
3178 return C_MASK;
3179 }
3180
b411b363 3181 if (abs(hg) >= 2) {
d0180171 3182 drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
b30ab791 3183 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
20ceb2b2 3184 BM_LOCKED_SET_ALLOWED))
b411b363
PR
3185 return C_MASK;
3186 }
3187
3188 if (hg > 0) { /* become sync source. */
3189 rv = C_WF_BITMAP_S;
3190 } else if (hg < 0) { /* become sync target */
3191 rv = C_WF_BITMAP_T;
3192 } else {
3193 rv = C_CONNECTED;
b30ab791 3194 if (drbd_bm_total_weight(device)) {
d0180171 3195 drbd_info(device, "No resync, but %lu bits in bitmap!\n",
b30ab791 3196 drbd_bm_total_weight(device));
b411b363
PR
3197 }
3198 }
3199
3200 return rv;
3201}
3202
f179d76d 3203static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
b411b363
PR
3204{
3205 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
f179d76d
PR
3206 if (peer == ASB_DISCARD_REMOTE)
3207 return ASB_DISCARD_LOCAL;
b411b363
PR
3208
3209 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
f179d76d
PR
3210 if (peer == ASB_DISCARD_LOCAL)
3211 return ASB_DISCARD_REMOTE;
b411b363
PR
3212
3213 /* everything else is valid if they are equal on both sides. */
f179d76d 3214 return peer;
b411b363
PR
3215}
3216
bde89a9e 3217static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3218{
e658983a 3219 struct p_protocol *p = pi->data;
036b17ea
PR
3220 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3221 int p_proto, p_discard_my_data, p_two_primaries, cf;
3222 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3223 char integrity_alg[SHARED_SECRET_MAX] = "";
accdbcc5 3224 struct crypto_hash *peer_integrity_tfm = NULL;
7aca6c75 3225 void *int_dig_in = NULL, *int_dig_vv = NULL;
b411b363 3226
b411b363
PR
3227 p_proto = be32_to_cpu(p->protocol);
3228 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3229 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3230 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
b411b363 3231 p_two_primaries = be32_to_cpu(p->two_primaries);
cf14c2e9 3232 cf = be32_to_cpu(p->conn_flags);
6139f60d 3233 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
cf14c2e9 3234
bde89a9e 3235 if (connection->agreed_pro_version >= 87) {
86db0618 3236 int err;
cf14c2e9 3237
88104ca4 3238 if (pi->size > sizeof(integrity_alg))
86db0618 3239 return -EIO;
bde89a9e 3240 err = drbd_recv_all(connection, integrity_alg, pi->size);
86db0618
AG
3241 if (err)
3242 return err;
036b17ea 3243 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
b411b363
PR
3244 }
3245
7d4c782c 3246 if (pi->cmd != P_PROTOCOL_UPDATE) {
bde89a9e 3247 clear_bit(CONN_DRY_RUN, &connection->flags);
b411b363 3248
fbc12f45 3249 if (cf & CF_DRY_RUN)
bde89a9e 3250 set_bit(CONN_DRY_RUN, &connection->flags);
b411b363 3251
fbc12f45 3252 rcu_read_lock();
bde89a9e 3253 nc = rcu_dereference(connection->net_conf);
b411b363 3254
fbc12f45 3255 if (p_proto != nc->wire_protocol) {
1ec861eb 3256 drbd_err(connection, "incompatible %s settings\n", "protocol");
fbc12f45
AG
3257 goto disconnect_rcu_unlock;
3258 }
b411b363 3259
fbc12f45 3260 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
1ec861eb 3261 drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
fbc12f45
AG
3262 goto disconnect_rcu_unlock;
3263 }
b411b363 3264
fbc12f45 3265 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
1ec861eb 3266 drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
fbc12f45
AG
3267 goto disconnect_rcu_unlock;
3268 }
b411b363 3269
fbc12f45 3270 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
1ec861eb 3271 drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
fbc12f45
AG
3272 goto disconnect_rcu_unlock;
3273 }
b411b363 3274
fbc12f45 3275 if (p_discard_my_data && nc->discard_my_data) {
1ec861eb 3276 drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
fbc12f45
AG
3277 goto disconnect_rcu_unlock;
3278 }
b411b363 3279
fbc12f45 3280 if (p_two_primaries != nc->two_primaries) {
1ec861eb 3281 drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
fbc12f45
AG
3282 goto disconnect_rcu_unlock;
3283 }
b411b363 3284
fbc12f45 3285 if (strcmp(integrity_alg, nc->integrity_alg)) {
1ec861eb 3286 drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
fbc12f45
AG
3287 goto disconnect_rcu_unlock;
3288 }
b411b363 3289
fbc12f45 3290 rcu_read_unlock();
b411b363
PR
3291 }
3292
7d4c782c
AG
3293 if (integrity_alg[0]) {
3294 int hash_size;
3295
3296 /*
3297 * We can only change the peer data integrity algorithm
3298 * here. Changing our own data integrity algorithm
3299 * requires that we send a P_PROTOCOL_UPDATE packet at
3300 * the same time; otherwise, the peer has no way to
3301 * tell between which packets the algorithm should
3302 * change.
3303 */
b411b363 3304
7d4c782c
AG
3305 peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
3306 if (!peer_integrity_tfm) {
1ec861eb 3307 drbd_err(connection, "peer data-integrity-alg %s not supported\n",
7d4c782c
AG
3308 integrity_alg);
3309 goto disconnect;
3310 }
b411b363 3311
7d4c782c
AG
3312 hash_size = crypto_hash_digestsize(peer_integrity_tfm);
3313 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3314 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3315 if (!(int_dig_in && int_dig_vv)) {
1ec861eb 3316 drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
b411b363
PR
3317 goto disconnect;
3318 }
b411b363
PR
3319 }
3320
7d4c782c
AG
3321 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3322 if (!new_net_conf) {
1ec861eb 3323 drbd_err(connection, "Allocation of new net_conf failed\n");
7d4c782c
AG
3324 goto disconnect;
3325 }
3326
bde89a9e 3327 mutex_lock(&connection->data.mutex);
0500813f 3328 mutex_lock(&connection->resource->conf_update);
bde89a9e 3329 old_net_conf = connection->net_conf;
7d4c782c
AG
3330 *new_net_conf = *old_net_conf;
3331
3332 new_net_conf->wire_protocol = p_proto;
3333 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3334 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3335 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3336 new_net_conf->two_primaries = p_two_primaries;
3337
bde89a9e 3338 rcu_assign_pointer(connection->net_conf, new_net_conf);
0500813f 3339 mutex_unlock(&connection->resource->conf_update);
bde89a9e 3340 mutex_unlock(&connection->data.mutex);
7d4c782c 3341
bde89a9e
AG
3342 crypto_free_hash(connection->peer_integrity_tfm);
3343 kfree(connection->int_dig_in);
3344 kfree(connection->int_dig_vv);
3345 connection->peer_integrity_tfm = peer_integrity_tfm;
3346 connection->int_dig_in = int_dig_in;
3347 connection->int_dig_vv = int_dig_vv;
7d4c782c
AG
3348
3349 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
1ec861eb 3350 drbd_info(connection, "peer data-integrity-alg: %s\n",
7d4c782c
AG
3351 integrity_alg[0] ? integrity_alg : "(none)");
3352
3353 synchronize_rcu();
3354 kfree(old_net_conf);
82bc0194 3355 return 0;
b411b363 3356
44ed167d
PR
3357disconnect_rcu_unlock:
3358 rcu_read_unlock();
b411b363 3359disconnect:
b792c35c 3360 crypto_free_hash(peer_integrity_tfm);
036b17ea
PR
3361 kfree(int_dig_in);
3362 kfree(int_dig_vv);
bde89a9e 3363 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 3364 return -EIO;
b411b363
PR
3365}
3366
3367/* helper function
3368 * input: alg name, feature name
3369 * return: NULL (alg name was "")
3370 * ERR_PTR(error) if something goes wrong
3371 * or the crypto hash ptr, if it worked out ok. */
f63e631a 3372static
b30ab791 3373struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
b411b363
PR
3374 const char *alg, const char *name)
3375{
3376 struct crypto_hash *tfm;
3377
3378 if (!alg[0])
3379 return NULL;
3380
3381 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
3382 if (IS_ERR(tfm)) {
d0180171 3383 drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
b411b363
PR
3384 alg, name, PTR_ERR(tfm));
3385 return tfm;
3386 }
b411b363
PR
3387 return tfm;
3388}
3389
bde89a9e 3390static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
4a76b161 3391{
bde89a9e 3392 void *buffer = connection->data.rbuf;
4a76b161
AG
3393 int size = pi->size;
3394
3395 while (size) {
3396 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
bde89a9e 3397 s = drbd_recv(connection, buffer, s);
4a76b161
AG
3398 if (s <= 0) {
3399 if (s < 0)
3400 return s;
3401 break;
3402 }
3403 size -= s;
3404 }
3405 if (size)
3406 return -EIO;
3407 return 0;
3408}
3409
3410/*
3411 * config_unknown_volume - device configuration command for unknown volume
3412 *
3413 * When a device is added to an existing connection, the node on which the
3414 * device is added first will send configuration commands to its peer but the
3415 * peer will not know about the device yet. It will warn and ignore these
3416 * commands. Once the device is added on the second node, the second node will
3417 * send the same device configuration commands, but in the other direction.
3418 *
3419 * (We can also end up here if drbd is misconfigured.)
3420 */
bde89a9e 3421static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
4a76b161 3422{
1ec861eb 3423 drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
2fcb8f30 3424 cmdname(pi->cmd), pi->vnr);
bde89a9e 3425 return ignore_remaining_packet(connection, pi);
4a76b161
AG
3426}
3427
bde89a9e 3428static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3429{
9f4fe9ad 3430 struct drbd_peer_device *peer_device;
b30ab791 3431 struct drbd_device *device;
e658983a 3432 struct p_rs_param_95 *p;
b411b363
PR
3433 unsigned int header_size, data_size, exp_max_sz;
3434 struct crypto_hash *verify_tfm = NULL;
3435 struct crypto_hash *csums_tfm = NULL;
2ec91e0e 3436 struct net_conf *old_net_conf, *new_net_conf = NULL;
813472ce 3437 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
bde89a9e 3438 const int apv = connection->agreed_pro_version;
813472ce 3439 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
778f271d 3440 int fifo_size = 0;
82bc0194 3441 int err;
b411b363 3442
9f4fe9ad
AG
3443 peer_device = conn_peer_device(connection, pi->vnr);
3444 if (!peer_device)
bde89a9e 3445 return config_unknown_volume(connection, pi);
9f4fe9ad 3446 device = peer_device->device;
b411b363
PR
3447
3448 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3449 : apv == 88 ? sizeof(struct p_rs_param)
3450 + SHARED_SECRET_MAX
8e26f9cc
PR
3451 : apv <= 94 ? sizeof(struct p_rs_param_89)
3452 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
b411b363 3453
e2857216 3454 if (pi->size > exp_max_sz) {
d0180171 3455 drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
e2857216 3456 pi->size, exp_max_sz);
82bc0194 3457 return -EIO;
b411b363
PR
3458 }
3459
3460 if (apv <= 88) {
e658983a 3461 header_size = sizeof(struct p_rs_param);
e2857216 3462 data_size = pi->size - header_size;
8e26f9cc 3463 } else if (apv <= 94) {
e658983a 3464 header_size = sizeof(struct p_rs_param_89);
e2857216 3465 data_size = pi->size - header_size;
0b0ba1ef 3466 D_ASSERT(device, data_size == 0);
8e26f9cc 3467 } else {
e658983a 3468 header_size = sizeof(struct p_rs_param_95);
e2857216 3469 data_size = pi->size - header_size;
0b0ba1ef 3470 D_ASSERT(device, data_size == 0);
b411b363
PR
3471 }
3472
3473 /* initialize verify_alg and csums_alg */
e658983a 3474 p = pi->data;
b411b363
PR
3475 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3476
9f4fe9ad 3477 err = drbd_recv_all(peer_device->connection, p, header_size);
82bc0194
AG
3478 if (err)
3479 return err;
b411b363 3480
0500813f 3481 mutex_lock(&connection->resource->conf_update);
9f4fe9ad 3482 old_net_conf = peer_device->connection->net_conf;
b30ab791 3483 if (get_ldev(device)) {
813472ce
PR
3484 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3485 if (!new_disk_conf) {
b30ab791 3486 put_ldev(device);
0500813f 3487 mutex_unlock(&connection->resource->conf_update);
d0180171 3488 drbd_err(device, "Allocation of new disk_conf failed\n");
813472ce
PR
3489 return -ENOMEM;
3490 }
daeda1cc 3491
b30ab791 3492 old_disk_conf = device->ldev->disk_conf;
813472ce 3493 *new_disk_conf = *old_disk_conf;
b411b363 3494
6394b935 3495 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
813472ce 3496 }
b411b363
PR
3497
3498 if (apv >= 88) {
3499 if (apv == 88) {
5de73827 3500 if (data_size > SHARED_SECRET_MAX || data_size == 0) {
d0180171 3501 drbd_err(device, "verify-alg of wrong size, "
5de73827
PR
3502 "peer wants %u, accepting only up to %u byte\n",
3503 data_size, SHARED_SECRET_MAX);
813472ce
PR
3504 err = -EIO;
3505 goto reconnect;
b411b363
PR
3506 }
3507
9f4fe9ad 3508 err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
813472ce
PR
3509 if (err)
3510 goto reconnect;
b411b363
PR
3511 /* we expect NUL terminated string */
3512 /* but just in case someone tries to be evil */
0b0ba1ef 3513 D_ASSERT(device, p->verify_alg[data_size-1] == 0);
b411b363
PR
3514 p->verify_alg[data_size-1] = 0;
3515
3516 } else /* apv >= 89 */ {
3517 /* we still expect NUL terminated strings */
3518 /* but just in case someone tries to be evil */
0b0ba1ef
AG
3519 D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3520 D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
b411b363
PR
3521 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3522 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3523 }
3524
2ec91e0e 3525 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
b30ab791 3526 if (device->state.conn == C_WF_REPORT_PARAMS) {
d0180171 3527 drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
2ec91e0e 3528 old_net_conf->verify_alg, p->verify_alg);
b411b363
PR
3529 goto disconnect;
3530 }
b30ab791 3531 verify_tfm = drbd_crypto_alloc_digest_safe(device,
b411b363
PR
3532 p->verify_alg, "verify-alg");
3533 if (IS_ERR(verify_tfm)) {
3534 verify_tfm = NULL;
3535 goto disconnect;
3536 }
3537 }
3538
2ec91e0e 3539 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
b30ab791 3540 if (device->state.conn == C_WF_REPORT_PARAMS) {
d0180171 3541 drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
2ec91e0e 3542 old_net_conf->csums_alg, p->csums_alg);
b411b363
PR
3543 goto disconnect;
3544 }
b30ab791 3545 csums_tfm = drbd_crypto_alloc_digest_safe(device,
b411b363
PR
3546 p->csums_alg, "csums-alg");
3547 if (IS_ERR(csums_tfm)) {
3548 csums_tfm = NULL;
3549 goto disconnect;
3550 }
3551 }
3552
813472ce 3553 if (apv > 94 && new_disk_conf) {
daeda1cc
PR
3554 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3555 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3556 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3557 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
778f271d 3558
daeda1cc 3559 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
b30ab791 3560 if (fifo_size != device->rs_plan_s->size) {
813472ce
PR
3561 new_plan = fifo_alloc(fifo_size);
3562 if (!new_plan) {
d0180171 3563 drbd_err(device, "kmalloc of fifo_buffer failed");
b30ab791 3564 put_ldev(device);
778f271d
PR
3565 goto disconnect;
3566 }
3567 }
8e26f9cc 3568 }
b411b363 3569
91fd4dad 3570 if (verify_tfm || csums_tfm) {
2ec91e0e
PR
3571 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3572 if (!new_net_conf) {
d0180171 3573 drbd_err(device, "Allocation of new net_conf failed\n");
91fd4dad
PR
3574 goto disconnect;
3575 }
3576
2ec91e0e 3577 *new_net_conf = *old_net_conf;
91fd4dad
PR
3578
3579 if (verify_tfm) {
2ec91e0e
PR
3580 strcpy(new_net_conf->verify_alg, p->verify_alg);
3581 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
9f4fe9ad
AG
3582 crypto_free_hash(peer_device->connection->verify_tfm);
3583 peer_device->connection->verify_tfm = verify_tfm;
d0180171 3584 drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
91fd4dad
PR
3585 }
3586 if (csums_tfm) {
2ec91e0e
PR
3587 strcpy(new_net_conf->csums_alg, p->csums_alg);
3588 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
9f4fe9ad
AG
3589 crypto_free_hash(peer_device->connection->csums_tfm);
3590 peer_device->connection->csums_tfm = csums_tfm;
d0180171 3591 drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
91fd4dad 3592 }
bde89a9e 3593 rcu_assign_pointer(connection->net_conf, new_net_conf);
778f271d 3594 }
b411b363
PR
3595 }
3596
813472ce 3597 if (new_disk_conf) {
b30ab791
AG
3598 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
3599 put_ldev(device);
813472ce
PR
3600 }
3601
3602 if (new_plan) {
b30ab791
AG
3603 old_plan = device->rs_plan_s;
3604 rcu_assign_pointer(device->rs_plan_s, new_plan);
b411b363 3605 }
daeda1cc 3606
0500813f 3607 mutex_unlock(&connection->resource->conf_update);
daeda1cc
PR
3608 synchronize_rcu();
3609 if (new_net_conf)
3610 kfree(old_net_conf);
3611 kfree(old_disk_conf);
813472ce 3612 kfree(old_plan);
daeda1cc 3613
82bc0194 3614 return 0;
b411b363 3615
813472ce
PR
3616reconnect:
3617 if (new_disk_conf) {
b30ab791 3618 put_ldev(device);
813472ce
PR
3619 kfree(new_disk_conf);
3620 }
0500813f 3621 mutex_unlock(&connection->resource->conf_update);
813472ce
PR
3622 return -EIO;
3623
b411b363 3624disconnect:
813472ce
PR
3625 kfree(new_plan);
3626 if (new_disk_conf) {
b30ab791 3627 put_ldev(device);
813472ce
PR
3628 kfree(new_disk_conf);
3629 }
0500813f 3630 mutex_unlock(&connection->resource->conf_update);
b411b363
PR
3631 /* just for completeness: actually not needed,
3632 * as this is not reached if csums_tfm was ok. */
3633 crypto_free_hash(csums_tfm);
3634 /* but free the verify_tfm again, if csums_tfm did not work out */
3635 crypto_free_hash(verify_tfm);
9f4fe9ad 3636 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 3637 return -EIO;
b411b363
PR
3638}
3639
b411b363 3640/* warn if the arguments differ by more than 12.5% */
b30ab791 3641static void warn_if_differ_considerably(struct drbd_device *device,
b411b363
PR
3642 const char *s, sector_t a, sector_t b)
3643{
3644 sector_t d;
3645 if (a == 0 || b == 0)
3646 return;
3647 d = (a > b) ? (a - b) : (b - a);
3648 if (d > (a>>3) || d > (b>>3))
d0180171 3649 drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
b411b363
PR
3650 (unsigned long long)a, (unsigned long long)b);
3651}
3652
bde89a9e 3653static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3654{
9f4fe9ad 3655 struct drbd_peer_device *peer_device;
b30ab791 3656 struct drbd_device *device;
e658983a 3657 struct p_sizes *p = pi->data;
e96c9633 3658 enum determine_dev_size dd = DS_UNCHANGED;
b411b363
PR
3659 sector_t p_size, p_usize, my_usize;
3660 int ldsc = 0; /* local disk size changed */
e89b591c 3661 enum dds_flags ddsf;
b411b363 3662
9f4fe9ad
AG
3663 peer_device = conn_peer_device(connection, pi->vnr);
3664 if (!peer_device)
bde89a9e 3665 return config_unknown_volume(connection, pi);
9f4fe9ad 3666 device = peer_device->device;
4a76b161 3667
b411b363
PR
3668 p_size = be64_to_cpu(p->d_size);
3669 p_usize = be64_to_cpu(p->u_size);
3670
b411b363
PR
3671 /* just store the peer's disk size for now.
3672 * we still need to figure out whether we accept that. */
b30ab791 3673 device->p_size = p_size;
b411b363 3674
b30ab791 3675 if (get_ldev(device)) {
daeda1cc 3676 rcu_read_lock();
b30ab791 3677 my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
daeda1cc
PR
3678 rcu_read_unlock();
3679
b30ab791
AG
3680 warn_if_differ_considerably(device, "lower level device sizes",
3681 p_size, drbd_get_max_capacity(device->ldev));
3682 warn_if_differ_considerably(device, "user requested size",
daeda1cc 3683 p_usize, my_usize);
b411b363
PR
3684
3685 /* if this is the first connect, or an otherwise expected
3686 * param exchange, choose the minimum */
b30ab791 3687 if (device->state.conn == C_WF_REPORT_PARAMS)
daeda1cc 3688 p_usize = min_not_zero(my_usize, p_usize);
b411b363
PR
3689
3690 /* Never shrink a device with usable data during connect.
3691 But allow online shrinking if we are connected. */
b30ab791
AG
3692 if (drbd_new_dev_size(device, device->ldev, p_usize, 0) <
3693 drbd_get_capacity(device->this_bdev) &&
3694 device->state.disk >= D_OUTDATED &&
3695 device->state.conn < C_CONNECTED) {
d0180171 3696 drbd_err(device, "The peer's disk size is too small!\n");
9f4fe9ad 3697 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
b30ab791 3698 put_ldev(device);
82bc0194 3699 return -EIO;
b411b363 3700 }
daeda1cc
PR
3701
3702 if (my_usize != p_usize) {
3703 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3704
3705 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3706 if (!new_disk_conf) {
d0180171 3707 drbd_err(device, "Allocation of new disk_conf failed\n");
b30ab791 3708 put_ldev(device);
daeda1cc
PR
3709 return -ENOMEM;
3710 }
3711
0500813f 3712 mutex_lock(&connection->resource->conf_update);
b30ab791 3713 old_disk_conf = device->ldev->disk_conf;
daeda1cc
PR
3714 *new_disk_conf = *old_disk_conf;
3715 new_disk_conf->disk_size = p_usize;
3716
b30ab791 3717 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
0500813f 3718 mutex_unlock(&connection->resource->conf_update);
daeda1cc
PR
3719 synchronize_rcu();
3720 kfree(old_disk_conf);
3721
d0180171 3722 drbd_info(device, "Peer sets u_size to %lu sectors\n",
daeda1cc 3723 (unsigned long)my_usize);
b411b363 3724 }
daeda1cc 3725
b30ab791 3726 put_ldev(device);
b411b363 3727 }
b411b363 3728
20c68fde 3729 device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
20c68fde
LE
3730 /* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size().
3731 In case we cleared the QUEUE_FLAG_DISCARD from our queue in
3732 drbd_reconsider_max_bio_size(), we can be sure that after
3733 drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
3734
e89b591c 3735 ddsf = be16_to_cpu(p->dds_flags);
b30ab791 3736 if (get_ldev(device)) {
8fe39aac 3737 drbd_reconsider_max_bio_size(device, device->ldev);
b30ab791
AG
3738 dd = drbd_determine_dev_size(device, ddsf, NULL);
3739 put_ldev(device);
e96c9633 3740 if (dd == DS_ERROR)
82bc0194 3741 return -EIO;
b30ab791 3742 drbd_md_sync(device);
b411b363
PR
3743 } else {
3744 /* I am diskless, need to accept the peer's size. */
8fe39aac 3745 drbd_reconsider_max_bio_size(device, NULL);
b30ab791 3746 drbd_set_my_capacity(device, p_size);
b411b363
PR
3747 }
3748
b30ab791
AG
3749 if (get_ldev(device)) {
3750 if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
3751 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
b411b363
PR
3752 ldsc = 1;
3753 }
3754
b30ab791 3755 put_ldev(device);
b411b363
PR
3756 }
3757
b30ab791 3758 if (device->state.conn > C_WF_REPORT_PARAMS) {
b411b363 3759 if (be64_to_cpu(p->c_size) !=
b30ab791 3760 drbd_get_capacity(device->this_bdev) || ldsc) {
b411b363
PR
3761 /* we have different sizes, probably peer
3762 * needs to know my new size... */
69a22773 3763 drbd_send_sizes(peer_device, 0, ddsf);
b411b363 3764 }
b30ab791
AG
3765 if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
3766 (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
3767 if (device->state.pdsk >= D_INCONSISTENT &&
3768 device->state.disk >= D_INCONSISTENT) {
e89b591c 3769 if (ddsf & DDSF_NO_RESYNC)
d0180171 3770 drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
e89b591c 3771 else
b30ab791 3772 resync_after_online_grow(device);
e89b591c 3773 } else
b30ab791 3774 set_bit(RESYNC_AFTER_NEG, &device->flags);
b411b363
PR
3775 }
3776 }
3777
82bc0194 3778 return 0;
b411b363
PR
3779}
3780
bde89a9e 3781static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3782{
9f4fe9ad 3783 struct drbd_peer_device *peer_device;
b30ab791 3784 struct drbd_device *device;
e658983a 3785 struct p_uuids *p = pi->data;
b411b363 3786 u64 *p_uuid;
62b0da3a 3787 int i, updated_uuids = 0;
b411b363 3788
9f4fe9ad
AG
3789 peer_device = conn_peer_device(connection, pi->vnr);
3790 if (!peer_device)
bde89a9e 3791 return config_unknown_volume(connection, pi);
9f4fe9ad 3792 device = peer_device->device;
4a76b161 3793
b411b363 3794 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
063eacf8 3795 if (!p_uuid) {
d0180171 3796 drbd_err(device, "kmalloc of p_uuid failed\n");
063eacf8
JW
3797 return false;
3798 }
b411b363
PR
3799
3800 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3801 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3802
b30ab791
AG
3803 kfree(device->p_uuid);
3804 device->p_uuid = p_uuid;
b411b363 3805
b30ab791
AG
3806 if (device->state.conn < C_CONNECTED &&
3807 device->state.disk < D_INCONSISTENT &&
3808 device->state.role == R_PRIMARY &&
3809 (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
d0180171 3810 drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
b30ab791 3811 (unsigned long long)device->ed_uuid);
9f4fe9ad 3812 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 3813 return -EIO;
b411b363
PR
3814 }
3815
b30ab791 3816 if (get_ldev(device)) {
b411b363 3817 int skip_initial_sync =
b30ab791 3818 device->state.conn == C_CONNECTED &&
9f4fe9ad 3819 peer_device->connection->agreed_pro_version >= 90 &&
b30ab791 3820 device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
b411b363
PR
3821 (p_uuid[UI_FLAGS] & 8);
3822 if (skip_initial_sync) {
d0180171 3823 drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
b30ab791 3824 drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
20ceb2b2
LE
3825 "clear_n_write from receive_uuids",
3826 BM_LOCKED_TEST_ALLOWED);
b30ab791
AG
3827 _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
3828 _drbd_uuid_set(device, UI_BITMAP, 0);
3829 _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
b411b363 3830 CS_VERBOSE, NULL);
b30ab791 3831 drbd_md_sync(device);
62b0da3a 3832 updated_uuids = 1;
b411b363 3833 }
b30ab791
AG
3834 put_ldev(device);
3835 } else if (device->state.disk < D_INCONSISTENT &&
3836 device->state.role == R_PRIMARY) {
18a50fa2
PR
3837 /* I am a diskless primary, the peer just created a new current UUID
3838 for me. */
b30ab791 3839 updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
b411b363
PR
3840 }
3841
3842 /* Before we test for the disk state, we should wait until an eventually
3843 ongoing cluster wide state change is finished. That is important if
3844 we are primary and are detaching from our disk. We need to see the
3845 new disk state... */
b30ab791
AG
3846 mutex_lock(device->state_mutex);
3847 mutex_unlock(device->state_mutex);
3848 if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
3849 updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
62b0da3a
LE
3850
3851 if (updated_uuids)
b30ab791 3852 drbd_print_uuids(device, "receiver updated UUIDs to");
b411b363 3853
82bc0194 3854 return 0;
b411b363
PR
3855}
3856
3857/**
3858 * convert_state() - Converts the peer's view of the cluster state to our point of view
3859 * @ps: The state as seen by the peer.
3860 */
3861static union drbd_state convert_state(union drbd_state ps)
3862{
3863 union drbd_state ms;
3864
3865 static enum drbd_conns c_tab[] = {
369bea63 3866 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
b411b363
PR
3867 [C_CONNECTED] = C_CONNECTED,
3868
3869 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3870 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3871 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3872 [C_VERIFY_S] = C_VERIFY_T,
3873 [C_MASK] = C_MASK,
3874 };
3875
3876 ms.i = ps.i;
3877
3878 ms.conn = c_tab[ps.conn];
3879 ms.peer = ps.role;
3880 ms.role = ps.peer;
3881 ms.pdsk = ps.disk;
3882 ms.disk = ps.pdsk;
3883 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3884
3885 return ms;
3886}
3887
bde89a9e 3888static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3889{
9f4fe9ad 3890 struct drbd_peer_device *peer_device;
b30ab791 3891 struct drbd_device *device;
e658983a 3892 struct p_req_state *p = pi->data;
b411b363 3893 union drbd_state mask, val;
bf885f8a 3894 enum drbd_state_rv rv;
b411b363 3895
9f4fe9ad
AG
3896 peer_device = conn_peer_device(connection, pi->vnr);
3897 if (!peer_device)
4a76b161 3898 return -EIO;
9f4fe9ad 3899 device = peer_device->device;
4a76b161 3900
b411b363
PR
3901 mask.i = be32_to_cpu(p->mask);
3902 val.i = be32_to_cpu(p->val);
3903
9f4fe9ad 3904 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
b30ab791 3905 mutex_is_locked(device->state_mutex)) {
69a22773 3906 drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
82bc0194 3907 return 0;
b411b363
PR
3908 }
3909
3910 mask = convert_state(mask);
3911 val = convert_state(val);
3912
b30ab791 3913 rv = drbd_change_state(device, CS_VERBOSE, mask, val);
69a22773 3914 drbd_send_sr_reply(peer_device, rv);
b411b363 3915
b30ab791 3916 drbd_md_sync(device);
b411b363 3917
82bc0194 3918 return 0;
b411b363
PR
3919}
3920
bde89a9e 3921static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3922{
e658983a 3923 struct p_req_state *p = pi->data;
b411b363 3924 union drbd_state mask, val;
bf885f8a 3925 enum drbd_state_rv rv;
b411b363 3926
b411b363
PR
3927 mask.i = be32_to_cpu(p->mask);
3928 val.i = be32_to_cpu(p->val);
3929
bde89a9e
AG
3930 if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
3931 mutex_is_locked(&connection->cstate_mutex)) {
3932 conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
82bc0194 3933 return 0;
b411b363
PR
3934 }
3935
3936 mask = convert_state(mask);
3937 val = convert_state(val);
3938
bde89a9e
AG
3939 rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
3940 conn_send_sr_reply(connection, rv);
b411b363 3941
82bc0194 3942 return 0;
b411b363
PR
3943}
3944
bde89a9e 3945static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3946{
9f4fe9ad 3947 struct drbd_peer_device *peer_device;
b30ab791 3948 struct drbd_device *device;
e658983a 3949 struct p_state *p = pi->data;
4ac4aada 3950 union drbd_state os, ns, peer_state;
b411b363 3951 enum drbd_disk_state real_peer_disk;
65d922c3 3952 enum chg_state_flags cs_flags;
b411b363
PR
3953 int rv;
3954
9f4fe9ad
AG
3955 peer_device = conn_peer_device(connection, pi->vnr);
3956 if (!peer_device)
bde89a9e 3957 return config_unknown_volume(connection, pi);
9f4fe9ad 3958 device = peer_device->device;
4a76b161 3959
b411b363
PR
3960 peer_state.i = be32_to_cpu(p->state);
3961
3962 real_peer_disk = peer_state.disk;
3963 if (peer_state.disk == D_NEGOTIATING) {
b30ab791 3964 real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
d0180171 3965 drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
b411b363
PR
3966 }
3967
0500813f 3968 spin_lock_irq(&device->resource->req_lock);
b411b363 3969 retry:
b30ab791 3970 os = ns = drbd_read_state(device);
0500813f 3971 spin_unlock_irq(&device->resource->req_lock);
b411b363 3972
545752d5
LE
3973 /* If some other part of the code (asender thread, timeout)
3974 * already decided to close the connection again,
3975 * we must not "re-establish" it here. */
3976 if (os.conn <= C_TEAR_DOWN)
58ffa580 3977 return -ECONNRESET;
545752d5 3978
40424e4a
LE
3979 /* If this is the "end of sync" confirmation, usually the peer disk
3980 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
3981 * set) resync started in PausedSyncT, or if the timing of pause-/
3982 * unpause-sync events has been "just right", the peer disk may
3983 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
3984 */
3985 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
3986 real_peer_disk == D_UP_TO_DATE &&
e9ef7bb6
LE
3987 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3988 /* If we are (becoming) SyncSource, but peer is still in sync
3989 * preparation, ignore its uptodate-ness to avoid flapping, it
3990 * will change to inconsistent once the peer reaches active
3991 * syncing states.
3992 * It may have changed syncer-paused flags, however, so we
3993 * cannot ignore this completely. */
3994 if (peer_state.conn > C_CONNECTED &&
3995 peer_state.conn < C_SYNC_SOURCE)
3996 real_peer_disk = D_INCONSISTENT;
3997
3998 /* if peer_state changes to connected at the same time,
3999 * it explicitly notifies us that it finished resync.
4000 * Maybe we should finish it up, too? */
4001 else if (os.conn >= C_SYNC_SOURCE &&
4002 peer_state.conn == C_CONNECTED) {
b30ab791
AG
4003 if (drbd_bm_total_weight(device) <= device->rs_failed)
4004 drbd_resync_finished(device);
82bc0194 4005 return 0;
e9ef7bb6
LE
4006 }
4007 }
4008
02b91b55
LE
4009 /* explicit verify finished notification, stop sector reached. */
4010 if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
4011 peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
b30ab791
AG
4012 ov_out_of_sync_print(device);
4013 drbd_resync_finished(device);
58ffa580 4014 return 0;
02b91b55
LE
4015 }
4016
e9ef7bb6
LE
4017 /* peer says his disk is inconsistent, while we think it is uptodate,
4018 * and this happens while the peer still thinks we have a sync going on,
4019 * but we think we are already done with the sync.
4020 * We ignore this to avoid flapping pdsk.
4021 * This should not happen, if the peer is a recent version of drbd. */
4022 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
4023 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
4024 real_peer_disk = D_UP_TO_DATE;
4025
4ac4aada
LE
4026 if (ns.conn == C_WF_REPORT_PARAMS)
4027 ns.conn = C_CONNECTED;
b411b363 4028
67531718
PR
4029 if (peer_state.conn == C_AHEAD)
4030 ns.conn = C_BEHIND;
4031
b30ab791
AG
4032 if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4033 get_ldev_if_state(device, D_NEGOTIATING)) {
b411b363
PR
4034 int cr; /* consider resync */
4035
4036 /* if we established a new connection */
4ac4aada 4037 cr = (os.conn < C_CONNECTED);
b411b363
PR
4038 /* if we had an established connection
4039 * and one of the nodes newly attaches a disk */
4ac4aada 4040 cr |= (os.conn == C_CONNECTED &&
b411b363 4041 (peer_state.disk == D_NEGOTIATING ||
4ac4aada 4042 os.disk == D_NEGOTIATING));
b411b363
PR
4043 /* if we have both been inconsistent, and the peer has been
4044 * forced to be UpToDate with --overwrite-data */
b30ab791 4045 cr |= test_bit(CONSIDER_RESYNC, &device->flags);
b411b363
PR
4046 /* if we had been plain connected, and the admin requested to
4047 * start a sync by "invalidate" or "invalidate-remote" */
4ac4aada 4048 cr |= (os.conn == C_CONNECTED &&
b411b363
PR
4049 (peer_state.conn >= C_STARTING_SYNC_S &&
4050 peer_state.conn <= C_WF_BITMAP_T));
4051
4052 if (cr)
69a22773 4053 ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
b411b363 4054
b30ab791 4055 put_ldev(device);
4ac4aada
LE
4056 if (ns.conn == C_MASK) {
4057 ns.conn = C_CONNECTED;
b30ab791
AG
4058 if (device->state.disk == D_NEGOTIATING) {
4059 drbd_force_state(device, NS(disk, D_FAILED));
b411b363 4060 } else if (peer_state.disk == D_NEGOTIATING) {
d0180171 4061 drbd_err(device, "Disk attach process on the peer node was aborted.\n");
b411b363 4062 peer_state.disk = D_DISKLESS;
580b9767 4063 real_peer_disk = D_DISKLESS;
b411b363 4064 } else {
9f4fe9ad 4065 if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
82bc0194 4066 return -EIO;
0b0ba1ef 4067 D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
9f4fe9ad 4068 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 4069 return -EIO;
b411b363
PR
4070 }
4071 }
4072 }
4073
0500813f 4074 spin_lock_irq(&device->resource->req_lock);
b30ab791 4075 if (os.i != drbd_read_state(device).i)
b411b363 4076 goto retry;
b30ab791 4077 clear_bit(CONSIDER_RESYNC, &device->flags);
b411b363
PR
4078 ns.peer = peer_state.role;
4079 ns.pdsk = real_peer_disk;
4080 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
4ac4aada 4081 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
b30ab791 4082 ns.disk = device->new_state_tmp.disk;
4ac4aada 4083 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
b30ab791
AG
4084 if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4085 test_bit(NEW_CUR_UUID, &device->flags)) {
8554df1c 4086 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
481c6f50 4087 for temporal network outages! */
0500813f 4088 spin_unlock_irq(&device->resource->req_lock);
d0180171 4089 drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
9f4fe9ad 4090 tl_clear(peer_device->connection);
b30ab791
AG
4091 drbd_uuid_new_current(device);
4092 clear_bit(NEW_CUR_UUID, &device->flags);
9f4fe9ad 4093 conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
82bc0194 4094 return -EIO;
481c6f50 4095 }
b30ab791
AG
4096 rv = _drbd_set_state(device, ns, cs_flags, NULL);
4097 ns = drbd_read_state(device);
0500813f 4098 spin_unlock_irq(&device->resource->req_lock);
b411b363
PR
4099
4100 if (rv < SS_SUCCESS) {
9f4fe9ad 4101 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 4102 return -EIO;
b411b363
PR
4103 }
4104
4ac4aada
LE
4105 if (os.conn > C_WF_REPORT_PARAMS) {
4106 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
b411b363
PR
4107 peer_state.disk != D_NEGOTIATING ) {
4108 /* we want resync, peer has not yet decided to sync... */
4109 /* Nowadays only used when forcing a node into primary role and
4110 setting its disk to UpToDate with that */
69a22773
AG
4111 drbd_send_uuids(peer_device);
4112 drbd_send_current_state(peer_device);
b411b363
PR
4113 }
4114 }
4115
b30ab791 4116 clear_bit(DISCARD_MY_DATA, &device->flags);
b411b363 4117
b30ab791 4118 drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
b411b363 4119
82bc0194 4120 return 0;
b411b363
PR
4121}
4122
bde89a9e 4123static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
b411b363 4124{
9f4fe9ad 4125 struct drbd_peer_device *peer_device;
b30ab791 4126 struct drbd_device *device;
e658983a 4127 struct p_rs_uuid *p = pi->data;
4a76b161 4128
9f4fe9ad
AG
4129 peer_device = conn_peer_device(connection, pi->vnr);
4130 if (!peer_device)
4a76b161 4131 return -EIO;
9f4fe9ad 4132 device = peer_device->device;
b411b363 4133
b30ab791
AG
4134 wait_event(device->misc_wait,
4135 device->state.conn == C_WF_SYNC_UUID ||
4136 device->state.conn == C_BEHIND ||
4137 device->state.conn < C_CONNECTED ||
4138 device->state.disk < D_NEGOTIATING);
b411b363 4139
0b0ba1ef 4140 /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */
b411b363 4141
b411b363
PR
4142 /* Here the _drbd_uuid_ functions are right, current should
4143 _not_ be rotated into the history */
b30ab791
AG
4144 if (get_ldev_if_state(device, D_NEGOTIATING)) {
4145 _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4146 _drbd_uuid_set(device, UI_BITMAP, 0UL);
b411b363 4147
b30ab791
AG
4148 drbd_print_uuids(device, "updated sync uuid");
4149 drbd_start_resync(device, C_SYNC_TARGET);
b411b363 4150
b30ab791 4151 put_ldev(device);
b411b363 4152 } else
d0180171 4153 drbd_err(device, "Ignoring SyncUUID packet!\n");
b411b363 4154
82bc0194 4155 return 0;
b411b363
PR
4156}
4157
2c46407d
AG
4158/**
4159 * receive_bitmap_plain
4160 *
4161 * Return 0 when done, 1 when another iteration is needed, and a negative error
4162 * code upon failure.
4163 */
4164static int
69a22773 4165receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
e658983a 4166 unsigned long *p, struct bm_xfer_ctx *c)
b411b363 4167{
50d0b1ad 4168 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
69a22773 4169 drbd_header_size(peer_device->connection);
e658983a 4170 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
50d0b1ad 4171 c->bm_words - c->word_offset);
e658983a 4172 unsigned int want = num_words * sizeof(*p);
2c46407d 4173 int err;
b411b363 4174
50d0b1ad 4175 if (want != size) {
69a22773 4176 drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
2c46407d 4177 return -EIO;
b411b363
PR
4178 }
4179 if (want == 0)
2c46407d 4180 return 0;
69a22773 4181 err = drbd_recv_all(peer_device->connection, p, want);
82bc0194 4182 if (err)
2c46407d 4183 return err;
b411b363 4184
69a22773 4185 drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
b411b363
PR
4186
4187 c->word_offset += num_words;
4188 c->bit_offset = c->word_offset * BITS_PER_LONG;
4189 if (c->bit_offset > c->bm_bits)
4190 c->bit_offset = c->bm_bits;
4191
2c46407d 4192 return 1;
b411b363
PR
4193}
4194
a02d1240
AG
4195static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4196{
4197 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4198}
4199
4200static int dcbp_get_start(struct p_compressed_bm *p)
4201{
4202 return (p->encoding & 0x80) != 0;
4203}
4204
4205static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4206{
4207 return (p->encoding >> 4) & 0x7;
4208}
4209
2c46407d
AG
4210/**
4211 * recv_bm_rle_bits
4212 *
4213 * Return 0 when done, 1 when another iteration is needed, and a negative error
4214 * code upon failure.
4215 */
4216static int
69a22773 4217recv_bm_rle_bits(struct drbd_peer_device *peer_device,
b411b363 4218 struct p_compressed_bm *p,
c6d25cfe
PR
4219 struct bm_xfer_ctx *c,
4220 unsigned int len)
b411b363
PR
4221{
4222 struct bitstream bs;
4223 u64 look_ahead;
4224 u64 rl;
4225 u64 tmp;
4226 unsigned long s = c->bit_offset;
4227 unsigned long e;
a02d1240 4228 int toggle = dcbp_get_start(p);
b411b363
PR
4229 int have;
4230 int bits;
4231
a02d1240 4232 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
b411b363
PR
4233
4234 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4235 if (bits < 0)
2c46407d 4236 return -EIO;
b411b363
PR
4237
4238 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4239 bits = vli_decode_bits(&rl, look_ahead);
4240 if (bits <= 0)
2c46407d 4241 return -EIO;
b411b363
PR
4242
4243 if (toggle) {
4244 e = s + rl -1;
4245 if (e >= c->bm_bits) {
69a22773 4246 drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
2c46407d 4247 return -EIO;
b411b363 4248 }
69a22773 4249 _drbd_bm_set_bits(peer_device->device, s, e);
b411b363
PR
4250 }
4251
4252 if (have < bits) {
69a22773 4253 drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
b411b363
PR
4254 have, bits, look_ahead,
4255 (unsigned int)(bs.cur.b - p->code),
4256 (unsigned int)bs.buf_len);
2c46407d 4257 return -EIO;
b411b363 4258 }
d2da5b0c
LE
4259 /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4260 if (likely(bits < 64))
4261 look_ahead >>= bits;
4262 else
4263 look_ahead = 0;
b411b363
PR
4264 have -= bits;
4265
4266 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4267 if (bits < 0)
2c46407d 4268 return -EIO;
b411b363
PR
4269 look_ahead |= tmp << have;
4270 have += bits;
4271 }
4272
4273 c->bit_offset = s;
4274 bm_xfer_ctx_bit_to_word_offset(c);
4275
2c46407d 4276 return (s != c->bm_bits);
b411b363
PR
4277}
4278
2c46407d
AG
4279/**
4280 * decode_bitmap_c
4281 *
4282 * Return 0 when done, 1 when another iteration is needed, and a negative error
4283 * code upon failure.
4284 */
4285static int
69a22773 4286decode_bitmap_c(struct drbd_peer_device *peer_device,
b411b363 4287 struct p_compressed_bm *p,
c6d25cfe
PR
4288 struct bm_xfer_ctx *c,
4289 unsigned int len)
b411b363 4290{
a02d1240 4291 if (dcbp_get_code(p) == RLE_VLI_Bits)
69a22773 4292 return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
b411b363
PR
4293
4294 /* other variants had been implemented for evaluation,
4295 * but have been dropped as this one turned out to be "best"
4296 * during all our tests. */
4297
69a22773
AG
4298 drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
4299 conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
2c46407d 4300 return -EIO;
b411b363
PR
4301}
4302
b30ab791 4303void INFO_bm_xfer_stats(struct drbd_device *device,
b411b363
PR
4304 const char *direction, struct bm_xfer_ctx *c)
4305{
4306 /* what would it take to transfer it "plaintext" */
a6b32bc3 4307 unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
50d0b1ad
AG
4308 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4309 unsigned int plain =
4310 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4311 c->bm_words * sizeof(unsigned long);
4312 unsigned int total = c->bytes[0] + c->bytes[1];
4313 unsigned int r;
b411b363
PR
4314
4315 /* total can not be zero. but just in case: */
4316 if (total == 0)
4317 return;
4318
4319 /* don't report if not compressed */
4320 if (total >= plain)
4321 return;
4322
4323 /* total < plain. check for overflow, still */
4324 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4325 : (1000 * total / plain);
4326
4327 if (r > 1000)
4328 r = 1000;
4329
4330 r = 1000 - r;
d0180171 4331 drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
b411b363
PR
4332 "total %u; compression: %u.%u%%\n",
4333 direction,
4334 c->bytes[1], c->packets[1],
4335 c->bytes[0], c->packets[0],
4336 total, r/10, r % 10);
4337}
4338
4339/* Since we are processing the bitfield from lower addresses to higher,
4340 it does not matter if the process it in 32 bit chunks or 64 bit
4341 chunks as long as it is little endian. (Understand it as byte stream,
4342 beginning with the lowest byte...) If we would use big endian
4343 we would need to process it from the highest address to the lowest,
4344 in order to be agnostic to the 32 vs 64 bits issue.
4345
4346 returns 0 on failure, 1 if we successfully received it. */
bde89a9e 4347static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
b411b363 4348{
9f4fe9ad 4349 struct drbd_peer_device *peer_device;
b30ab791 4350 struct drbd_device *device;
b411b363 4351 struct bm_xfer_ctx c;
2c46407d 4352 int err;
4a76b161 4353
9f4fe9ad
AG
4354 peer_device = conn_peer_device(connection, pi->vnr);
4355 if (!peer_device)
4a76b161 4356 return -EIO;
9f4fe9ad 4357 device = peer_device->device;
b411b363 4358
b30ab791 4359 drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
20ceb2b2
LE
4360 /* you are supposed to send additional out-of-sync information
4361 * if you actually set bits during this phase */
b411b363 4362
b411b363 4363 c = (struct bm_xfer_ctx) {
b30ab791
AG
4364 .bm_bits = drbd_bm_bits(device),
4365 .bm_words = drbd_bm_words(device),
b411b363
PR
4366 };
4367
2c46407d 4368 for(;;) {
e658983a 4369 if (pi->cmd == P_BITMAP)
69a22773 4370 err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
e658983a 4371 else if (pi->cmd == P_COMPRESSED_BITMAP) {
b411b363
PR
4372 /* MAYBE: sanity check that we speak proto >= 90,
4373 * and the feature is enabled! */
e658983a 4374 struct p_compressed_bm *p = pi->data;
b411b363 4375
bde89a9e 4376 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
d0180171 4377 drbd_err(device, "ReportCBitmap packet too large\n");
82bc0194 4378 err = -EIO;
b411b363
PR
4379 goto out;
4380 }
e658983a 4381 if (pi->size <= sizeof(*p)) {
d0180171 4382 drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
82bc0194 4383 err = -EIO;
78fcbdae 4384 goto out;
b411b363 4385 }
9f4fe9ad 4386 err = drbd_recv_all(peer_device->connection, p, pi->size);
e658983a
AG
4387 if (err)
4388 goto out;
69a22773 4389 err = decode_bitmap_c(peer_device, p, &c, pi->size);
b411b363 4390 } else {
d0180171 4391 drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
82bc0194 4392 err = -EIO;
b411b363
PR
4393 goto out;
4394 }
4395
e2857216 4396 c.packets[pi->cmd == P_BITMAP]++;
bde89a9e 4397 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
b411b363 4398
2c46407d
AG
4399 if (err <= 0) {
4400 if (err < 0)
4401 goto out;
b411b363 4402 break;
2c46407d 4403 }
9f4fe9ad 4404 err = drbd_recv_header(peer_device->connection, pi);
82bc0194 4405 if (err)
b411b363 4406 goto out;
2c46407d 4407 }
b411b363 4408
b30ab791 4409 INFO_bm_xfer_stats(device, "receive", &c);
b411b363 4410
b30ab791 4411 if (device->state.conn == C_WF_BITMAP_T) {
de1f8e4a
AG
4412 enum drbd_state_rv rv;
4413
b30ab791 4414 err = drbd_send_bitmap(device);
82bc0194 4415 if (err)
b411b363
PR
4416 goto out;
4417 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
b30ab791 4418 rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
0b0ba1ef 4419 D_ASSERT(device, rv == SS_SUCCESS);
b30ab791 4420 } else if (device->state.conn != C_WF_BITMAP_S) {
b411b363
PR
4421 /* admin may have requested C_DISCONNECTING,
4422 * other threads may have noticed network errors */
d0180171 4423 drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
b30ab791 4424 drbd_conn_str(device->state.conn));
b411b363 4425 }
82bc0194 4426 err = 0;
b411b363 4427
b411b363 4428 out:
b30ab791
AG
4429 drbd_bm_unlock(device);
4430 if (!err && device->state.conn == C_WF_BITMAP_S)
4431 drbd_start_resync(device, C_SYNC_SOURCE);
82bc0194 4432 return err;
b411b363
PR
4433}
4434
bde89a9e 4435static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
b411b363 4436{
1ec861eb 4437 drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
e2857216 4438 pi->cmd, pi->size);
b411b363 4439
bde89a9e 4440 return ignore_remaining_packet(connection, pi);
b411b363
PR
4441}
4442
bde89a9e 4443static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
0ced55a3 4444{
e7f52dfb
LE
4445 /* Make sure we've acked all the TCP data associated
4446 * with the data requests being unplugged */
bde89a9e 4447 drbd_tcp_quickack(connection->data.socket);
0ced55a3 4448
82bc0194 4449 return 0;
0ced55a3
PR
4450}
4451
bde89a9e 4452static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
73a01a18 4453{
9f4fe9ad 4454 struct drbd_peer_device *peer_device;
b30ab791 4455 struct drbd_device *device;
e658983a 4456 struct p_block_desc *p = pi->data;
4a76b161 4457
9f4fe9ad
AG
4458 peer_device = conn_peer_device(connection, pi->vnr);
4459 if (!peer_device)
4a76b161 4460 return -EIO;
9f4fe9ad 4461 device = peer_device->device;
73a01a18 4462
b30ab791 4463 switch (device->state.conn) {
f735e363
LE
4464 case C_WF_SYNC_UUID:
4465 case C_WF_BITMAP_T:
4466 case C_BEHIND:
4467 break;
4468 default:
d0180171 4469 drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
b30ab791 4470 drbd_conn_str(device->state.conn));
f735e363
LE
4471 }
4472
b30ab791 4473 drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
73a01a18 4474
82bc0194 4475 return 0;
73a01a18
PR
4476}
4477
02918be2
PR
4478struct data_cmd {
4479 int expect_payload;
4480 size_t pkt_size;
bde89a9e 4481 int (*fn)(struct drbd_connection *, struct packet_info *);
02918be2
PR
4482};
4483
4484static struct data_cmd drbd_cmd_handler[] = {
4485 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4486 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4487 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4488 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
e658983a
AG
4489 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4490 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4491 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
02918be2
PR
4492 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4493 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
e658983a
AG
4494 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4495 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
02918be2
PR
4496 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4497 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4498 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4499 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4500 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4501 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4502 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4503 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4504 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4505 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
73a01a18 4506 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
4a76b161 4507 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
036b17ea 4508 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
a0fb3c47 4509 [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data },
b411b363
PR
4510};
4511
bde89a9e 4512static void drbdd(struct drbd_connection *connection)
b411b363 4513{
77351055 4514 struct packet_info pi;
02918be2 4515 size_t shs; /* sub header size */
82bc0194 4516 int err;
b411b363 4517
bde89a9e 4518 while (get_t_state(&connection->receiver) == RUNNING) {
deebe195 4519 struct data_cmd *cmd;
b411b363 4520
bde89a9e
AG
4521 drbd_thread_current_set_cpu(&connection->receiver);
4522 if (drbd_recv_header(connection, &pi))
02918be2 4523 goto err_out;
b411b363 4524
deebe195 4525 cmd = &drbd_cmd_handler[pi.cmd];
4a76b161 4526 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
1ec861eb 4527 drbd_err(connection, "Unexpected data packet %s (0x%04x)",
2fcb8f30 4528 cmdname(pi.cmd), pi.cmd);
02918be2 4529 goto err_out;
0b33a916 4530 }
b411b363 4531
e658983a
AG
4532 shs = cmd->pkt_size;
4533 if (pi.size > shs && !cmd->expect_payload) {
1ec861eb 4534 drbd_err(connection, "No payload expected %s l:%d\n",
2fcb8f30 4535 cmdname(pi.cmd), pi.size);
02918be2 4536 goto err_out;
b411b363 4537 }
b411b363 4538
c13f7e1a 4539 if (shs) {
bde89a9e 4540 err = drbd_recv_all_warn(connection, pi.data, shs);
a5c31904 4541 if (err)
c13f7e1a 4542 goto err_out;
e2857216 4543 pi.size -= shs;
c13f7e1a
LE
4544 }
4545
bde89a9e 4546 err = cmd->fn(connection, &pi);
4a76b161 4547 if (err) {
1ec861eb 4548 drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
9f5bdc33 4549 cmdname(pi.cmd), err, pi.size);
02918be2 4550 goto err_out;
b411b363
PR
4551 }
4552 }
82bc0194 4553 return;
b411b363 4554
82bc0194 4555 err_out:
bde89a9e 4556 conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
b411b363
PR
4557}
4558
bde89a9e 4559static void conn_disconnect(struct drbd_connection *connection)
b411b363 4560{
c06ece6b 4561 struct drbd_peer_device *peer_device;
bbeb641c 4562 enum drbd_conns oc;
376694a0 4563 int vnr;
b411b363 4564
bde89a9e 4565 if (connection->cstate == C_STANDALONE)
b411b363 4566 return;
b411b363 4567
545752d5
LE
4568 /* We are about to start the cleanup after connection loss.
4569 * Make sure drbd_make_request knows about that.
4570 * Usually we should be in some network failure state already,
4571 * but just in case we are not, we fix it up here.
4572 */
bde89a9e 4573 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
545752d5 4574
b411b363 4575 /* asender does not clean up anything. it must not interfere, either */
bde89a9e
AG
4576 drbd_thread_stop(&connection->asender);
4577 drbd_free_sock(connection);
360cc740 4578
c141ebda 4579 rcu_read_lock();
c06ece6b
AG
4580 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
4581 struct drbd_device *device = peer_device->device;
b30ab791 4582 kref_get(&device->kref);
c141ebda 4583 rcu_read_unlock();
69a22773 4584 drbd_disconnected(peer_device);
c06ece6b 4585 kref_put(&device->kref, drbd_destroy_device);
c141ebda
PR
4586 rcu_read_lock();
4587 }
4588 rcu_read_unlock();
4589
bde89a9e 4590 if (!list_empty(&connection->current_epoch->list))
1ec861eb 4591 drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
12038a3a 4592 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
bde89a9e
AG
4593 atomic_set(&connection->current_epoch->epoch_size, 0);
4594 connection->send.seen_any_write_yet = false;
12038a3a 4595
1ec861eb 4596 drbd_info(connection, "Connection closed\n");
360cc740 4597
bde89a9e
AG
4598 if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
4599 conn_try_outdate_peer_async(connection);
cb703454 4600
0500813f 4601 spin_lock_irq(&connection->resource->req_lock);
bde89a9e 4602 oc = connection->cstate;
bbeb641c 4603 if (oc >= C_UNCONNECTED)
bde89a9e 4604 _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
bbeb641c 4605
0500813f 4606 spin_unlock_irq(&connection->resource->req_lock);
360cc740 4607
f3dfa40a 4608 if (oc == C_DISCONNECTING)
bde89a9e 4609 conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
360cc740
PR
4610}
4611
69a22773 4612static int drbd_disconnected(struct drbd_peer_device *peer_device)
360cc740 4613{
69a22773 4614 struct drbd_device *device = peer_device->device;
360cc740 4615 unsigned int i;
b411b363 4616
85719573 4617 /* wait for current activity to cease. */
0500813f 4618 spin_lock_irq(&device->resource->req_lock);
b30ab791
AG
4619 _drbd_wait_ee_list_empty(device, &device->active_ee);
4620 _drbd_wait_ee_list_empty(device, &device->sync_ee);
4621 _drbd_wait_ee_list_empty(device, &device->read_ee);
0500813f 4622 spin_unlock_irq(&device->resource->req_lock);
b411b363
PR
4623
4624 /* We do not have data structures that would allow us to
4625 * get the rs_pending_cnt down to 0 again.
4626 * * On C_SYNC_TARGET we do not have any data structures describing
4627 * the pending RSDataRequest's we have sent.
4628 * * On C_SYNC_SOURCE there is no data structure that tracks
4629 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4630 * And no, it is not the sum of the reference counts in the
4631 * resync_LRU. The resync_LRU tracks the whole operation including
4632 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4633 * on the fly. */
b30ab791
AG
4634 drbd_rs_cancel_all(device);
4635 device->rs_total = 0;
4636 device->rs_failed = 0;
4637 atomic_set(&device->rs_pending_cnt, 0);
4638 wake_up(&device->misc_wait);
b411b363 4639
b30ab791
AG
4640 del_timer_sync(&device->resync_timer);
4641 resync_timer_fn((unsigned long)device);
b411b363 4642
b411b363
PR
4643 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4644 * w_make_resync_request etc. which may still be on the worker queue
4645 * to be "canceled" */
b5043c5e 4646 drbd_flush_workqueue(&peer_device->connection->sender_work);
b411b363 4647
b30ab791 4648 drbd_finish_peer_reqs(device);
b411b363 4649
d10b4ea3
PR
4650 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4651 might have issued a work again. The one before drbd_finish_peer_reqs() is
4652 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
b5043c5e 4653 drbd_flush_workqueue(&peer_device->connection->sender_work);
d10b4ea3 4654
08332d73
LE
4655 /* need to do it again, drbd_finish_peer_reqs() may have populated it
4656 * again via drbd_try_clear_on_disk_bm(). */
b30ab791 4657 drbd_rs_cancel_all(device);
b411b363 4658
b30ab791
AG
4659 kfree(device->p_uuid);
4660 device->p_uuid = NULL;
b411b363 4661
b30ab791 4662 if (!drbd_suspended(device))
69a22773 4663 tl_clear(peer_device->connection);
b411b363 4664
b30ab791 4665 drbd_md_sync(device);
b411b363 4666
20ceb2b2
LE
4667 /* serialize with bitmap writeout triggered by the state change,
4668 * if any. */
b30ab791 4669 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
20ceb2b2 4670
b411b363
PR
4671 /* tcp_close and release of sendpage pages can be deferred. I don't
4672 * want to use SO_LINGER, because apparently it can be deferred for
4673 * more than 20 seconds (longest time I checked).
4674 *
4675 * Actually we don't care for exactly when the network stack does its
4676 * put_page(), but release our reference on these pages right here.
4677 */
b30ab791 4678 i = drbd_free_peer_reqs(device, &device->net_ee);
b411b363 4679 if (i)
d0180171 4680 drbd_info(device, "net_ee not empty, killed %u entries\n", i);
b30ab791 4681 i = atomic_read(&device->pp_in_use_by_net);
435f0740 4682 if (i)
d0180171 4683 drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
b30ab791 4684 i = atomic_read(&device->pp_in_use);
b411b363 4685 if (i)
d0180171 4686 drbd_info(device, "pp_in_use = %d, expected 0\n", i);
b411b363 4687
0b0ba1ef
AG
4688 D_ASSERT(device, list_empty(&device->read_ee));
4689 D_ASSERT(device, list_empty(&device->active_ee));
4690 D_ASSERT(device, list_empty(&device->sync_ee));
4691 D_ASSERT(device, list_empty(&device->done_ee));
b411b363 4692
360cc740 4693 return 0;
b411b363
PR
4694}
4695
4696/*
4697 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4698 * we can agree on is stored in agreed_pro_version.
4699 *
4700 * feature flags and the reserved array should be enough room for future
4701 * enhancements of the handshake protocol, and possible plugins...
4702 *
4703 * for now, they are expected to be zero, but ignored.
4704 */
bde89a9e 4705static int drbd_send_features(struct drbd_connection *connection)
b411b363 4706{
9f5bdc33
AG
4707 struct drbd_socket *sock;
4708 struct p_connection_features *p;
b411b363 4709
bde89a9e
AG
4710 sock = &connection->data;
4711 p = conn_prepare_command(connection, sock);
9f5bdc33 4712 if (!p)
e8d17b01 4713 return -EIO;
b411b363
PR
4714 memset(p, 0, sizeof(*p));
4715 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4716 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
20c68fde 4717 p->feature_flags = cpu_to_be32(PRO_FEATURES);
bde89a9e 4718 return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
b411b363
PR
4719}
4720
4721/*
4722 * return values:
4723 * 1 yes, we have a valid connection
4724 * 0 oops, did not work out, please try again
4725 * -1 peer talks different language,
4726 * no point in trying again, please go standalone.
4727 */
bde89a9e 4728static int drbd_do_features(struct drbd_connection *connection)
b411b363 4729{
bde89a9e 4730 /* ASSERT current == connection->receiver ... */
e658983a
AG
4731 struct p_connection_features *p;
4732 const int expect = sizeof(struct p_connection_features);
77351055 4733 struct packet_info pi;
a5c31904 4734 int err;
b411b363 4735
bde89a9e 4736 err = drbd_send_features(connection);
e8d17b01 4737 if (err)
b411b363
PR
4738 return 0;
4739
bde89a9e 4740 err = drbd_recv_header(connection, &pi);
69bc7bc3 4741 if (err)
b411b363
PR
4742 return 0;
4743
6038178e 4744 if (pi.cmd != P_CONNECTION_FEATURES) {
1ec861eb 4745 drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
2fcb8f30 4746 cmdname(pi.cmd), pi.cmd);
b411b363
PR
4747 return -1;
4748 }
4749
77351055 4750 if (pi.size != expect) {
1ec861eb 4751 drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
77351055 4752 expect, pi.size);
b411b363
PR
4753 return -1;
4754 }
4755
e658983a 4756 p = pi.data;
bde89a9e 4757 err = drbd_recv_all_warn(connection, p, expect);
a5c31904 4758 if (err)
b411b363 4759 return 0;
b411b363 4760
b411b363
PR
4761 p->protocol_min = be32_to_cpu(p->protocol_min);
4762 p->protocol_max = be32_to_cpu(p->protocol_max);
4763 if (p->protocol_max == 0)
4764 p->protocol_max = p->protocol_min;
4765
4766 if (PRO_VERSION_MAX < p->protocol_min ||
4767 PRO_VERSION_MIN > p->protocol_max)
4768 goto incompat;
4769
bde89a9e 4770 connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
20c68fde 4771 connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
b411b363 4772
1ec861eb 4773 drbd_info(connection, "Handshake successful: "
bde89a9e 4774 "Agreed network protocol version %d\n", connection->agreed_pro_version);
b411b363 4775
20c68fde
LE
4776 drbd_info(connection, "Agreed to%ssupport TRIM on protocol level\n",
4777 connection->agreed_features & FF_TRIM ? " " : " not ");
4778
b411b363
PR
4779 return 1;
4780
4781 incompat:
1ec861eb 4782 drbd_err(connection, "incompatible DRBD dialects: "
b411b363
PR
4783 "I support %d-%d, peer supports %d-%d\n",
4784 PRO_VERSION_MIN, PRO_VERSION_MAX,
4785 p->protocol_min, p->protocol_max);
4786 return -1;
4787}
4788
4789#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
bde89a9e 4790static int drbd_do_auth(struct drbd_connection *connection)
b411b363 4791{
1ec861eb
AG
4792 drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4793 drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
b10d96cb 4794 return -1;
b411b363
PR
4795}
4796#else
4797#define CHALLENGE_LEN 64
b10d96cb
JT
4798
4799/* Return value:
4800 1 - auth succeeded,
4801 0 - failed, try again (network error),
4802 -1 - auth failed, don't try again.
4803*/
4804
bde89a9e 4805static int drbd_do_auth(struct drbd_connection *connection)
b411b363 4806{
9f5bdc33 4807 struct drbd_socket *sock;
b411b363
PR
4808 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4809 struct scatterlist sg;
4810 char *response = NULL;
4811 char *right_response = NULL;
4812 char *peers_ch = NULL;
44ed167d
PR
4813 unsigned int key_len;
4814 char secret[SHARED_SECRET_MAX]; /* 64 byte */
b411b363
PR
4815 unsigned int resp_size;
4816 struct hash_desc desc;
77351055 4817 struct packet_info pi;
44ed167d 4818 struct net_conf *nc;
69bc7bc3 4819 int err, rv;
b411b363 4820
9f5bdc33 4821 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
b411b363 4822
44ed167d 4823 rcu_read_lock();
bde89a9e 4824 nc = rcu_dereference(connection->net_conf);
44ed167d
PR
4825 key_len = strlen(nc->shared_secret);
4826 memcpy(secret, nc->shared_secret, key_len);
4827 rcu_read_unlock();
4828
bde89a9e 4829 desc.tfm = connection->cram_hmac_tfm;
b411b363
PR
4830 desc.flags = 0;
4831
bde89a9e 4832 rv = crypto_hash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
b411b363 4833 if (rv) {
1ec861eb 4834 drbd_err(connection, "crypto_hash_setkey() failed with %d\n", rv);
b10d96cb 4835 rv = -1;
b411b363
PR
4836 goto fail;
4837 }
4838
4839 get_random_bytes(my_challenge, CHALLENGE_LEN);
4840
bde89a9e
AG
4841 sock = &connection->data;
4842 if (!conn_prepare_command(connection, sock)) {
9f5bdc33
AG
4843 rv = 0;
4844 goto fail;
4845 }
bde89a9e 4846 rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
9f5bdc33 4847 my_challenge, CHALLENGE_LEN);
b411b363
PR
4848 if (!rv)
4849 goto fail;
4850
bde89a9e 4851 err = drbd_recv_header(connection, &pi);
69bc7bc3
AG
4852 if (err) {
4853 rv = 0;
b411b363 4854 goto fail;
69bc7bc3 4855 }
b411b363 4856
77351055 4857 if (pi.cmd != P_AUTH_CHALLENGE) {
1ec861eb 4858 drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
2fcb8f30 4859 cmdname(pi.cmd), pi.cmd);
b411b363
PR
4860 rv = 0;
4861 goto fail;
4862 }
4863
77351055 4864 if (pi.size > CHALLENGE_LEN * 2) {
1ec861eb 4865 drbd_err(connection, "expected AuthChallenge payload too big.\n");
b10d96cb 4866 rv = -1;
b411b363
PR
4867 goto fail;
4868 }
4869
67cca286
PR
4870 if (pi.size < CHALLENGE_LEN) {
4871 drbd_err(connection, "AuthChallenge payload too small.\n");
4872 rv = -1;
4873 goto fail;
4874 }
4875
77351055 4876 peers_ch = kmalloc(pi.size, GFP_NOIO);
b411b363 4877 if (peers_ch == NULL) {
1ec861eb 4878 drbd_err(connection, "kmalloc of peers_ch failed\n");
b10d96cb 4879 rv = -1;
b411b363
PR
4880 goto fail;
4881 }
4882
bde89a9e 4883 err = drbd_recv_all_warn(connection, peers_ch, pi.size);
a5c31904 4884 if (err) {
b411b363
PR
4885 rv = 0;
4886 goto fail;
4887 }
4888
67cca286
PR
4889 if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
4890 drbd_err(connection, "Peer presented the same challenge!\n");
4891 rv = -1;
4892 goto fail;
4893 }
4894
bde89a9e 4895 resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm);
b411b363
PR
4896 response = kmalloc(resp_size, GFP_NOIO);
4897 if (response == NULL) {
1ec861eb 4898 drbd_err(connection, "kmalloc of response failed\n");
b10d96cb 4899 rv = -1;
b411b363
PR
4900 goto fail;
4901 }
4902
4903 sg_init_table(&sg, 1);
77351055 4904 sg_set_buf(&sg, peers_ch, pi.size);
b411b363
PR
4905
4906 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4907 if (rv) {
1ec861eb 4908 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
b10d96cb 4909 rv = -1;
b411b363
PR
4910 goto fail;
4911 }
4912
bde89a9e 4913 if (!conn_prepare_command(connection, sock)) {
9f5bdc33 4914 rv = 0;
b411b363 4915 goto fail;
9f5bdc33 4916 }
bde89a9e 4917 rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
9f5bdc33 4918 response, resp_size);
b411b363
PR
4919 if (!rv)
4920 goto fail;
4921
bde89a9e 4922 err = drbd_recv_header(connection, &pi);
69bc7bc3 4923 if (err) {
b411b363
PR
4924 rv = 0;
4925 goto fail;
4926 }
4927
77351055 4928 if (pi.cmd != P_AUTH_RESPONSE) {
1ec861eb 4929 drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
2fcb8f30 4930 cmdname(pi.cmd), pi.cmd);
b411b363
PR
4931 rv = 0;
4932 goto fail;
4933 }
4934
77351055 4935 if (pi.size != resp_size) {
1ec861eb 4936 drbd_err(connection, "expected AuthResponse payload of wrong size\n");
b411b363
PR
4937 rv = 0;
4938 goto fail;
4939 }
b411b363 4940
bde89a9e 4941 err = drbd_recv_all_warn(connection, response , resp_size);
a5c31904 4942 if (err) {
b411b363
PR
4943 rv = 0;
4944 goto fail;
4945 }
4946
4947 right_response = kmalloc(resp_size, GFP_NOIO);
2d1ee87d 4948 if (right_response == NULL) {
1ec861eb 4949 drbd_err(connection, "kmalloc of right_response failed\n");
b10d96cb 4950 rv = -1;
b411b363
PR
4951 goto fail;
4952 }
4953
4954 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4955
4956 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4957 if (rv) {
1ec861eb 4958 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
b10d96cb 4959 rv = -1;
b411b363
PR
4960 goto fail;
4961 }
4962
4963 rv = !memcmp(response, right_response, resp_size);
4964
4965 if (rv)
1ec861eb 4966 drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
44ed167d 4967 resp_size);
b10d96cb
JT
4968 else
4969 rv = -1;
b411b363
PR
4970
4971 fail:
4972 kfree(peers_ch);
4973 kfree(response);
4974 kfree(right_response);
4975
4976 return rv;
4977}
4978#endif
4979
8fe60551 4980int drbd_receiver(struct drbd_thread *thi)
b411b363 4981{
bde89a9e 4982 struct drbd_connection *connection = thi->connection;
b411b363
PR
4983 int h;
4984
1ec861eb 4985 drbd_info(connection, "receiver (re)started\n");
b411b363
PR
4986
4987 do {
bde89a9e 4988 h = conn_connect(connection);
b411b363 4989 if (h == 0) {
bde89a9e 4990 conn_disconnect(connection);
20ee6390 4991 schedule_timeout_interruptible(HZ);
b411b363
PR
4992 }
4993 if (h == -1) {
1ec861eb 4994 drbd_warn(connection, "Discarding network configuration.\n");
bde89a9e 4995 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363
PR
4996 }
4997 } while (h == 0);
4998
91fd4dad 4999 if (h > 0)
bde89a9e 5000 drbdd(connection);
b411b363 5001
bde89a9e 5002 conn_disconnect(connection);
b411b363 5003
1ec861eb 5004 drbd_info(connection, "receiver terminated\n");
b411b363
PR
5005 return 0;
5006}
5007
5008/* ********* acknowledge sender ******** */
5009
bde89a9e 5010static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5011{
e658983a 5012 struct p_req_state_reply *p = pi->data;
e4f78ede
PR
5013 int retcode = be32_to_cpu(p->retcode);
5014
5015 if (retcode >= SS_SUCCESS) {
bde89a9e 5016 set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
e4f78ede 5017 } else {
bde89a9e 5018 set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
1ec861eb 5019 drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
e4f78ede
PR
5020 drbd_set_st_err_str(retcode), retcode);
5021 }
bde89a9e 5022 wake_up(&connection->ping_wait);
e4f78ede 5023
2735a594 5024 return 0;
e4f78ede 5025}
b411b363 5026
bde89a9e 5027static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5028{
9f4fe9ad 5029 struct drbd_peer_device *peer_device;
b30ab791 5030 struct drbd_device *device;
e658983a 5031 struct p_req_state_reply *p = pi->data;
b411b363
PR
5032 int retcode = be32_to_cpu(p->retcode);
5033
9f4fe9ad
AG
5034 peer_device = conn_peer_device(connection, pi->vnr);
5035 if (!peer_device)
2735a594 5036 return -EIO;
9f4fe9ad 5037 device = peer_device->device;
1952e916 5038
bde89a9e 5039 if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
0b0ba1ef 5040 D_ASSERT(device, connection->agreed_pro_version < 100);
bde89a9e 5041 return got_conn_RqSReply(connection, pi);
4d0fc3fd
PR
5042 }
5043
b411b363 5044 if (retcode >= SS_SUCCESS) {
b30ab791 5045 set_bit(CL_ST_CHG_SUCCESS, &device->flags);
b411b363 5046 } else {
b30ab791 5047 set_bit(CL_ST_CHG_FAIL, &device->flags);
d0180171 5048 drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
e4f78ede 5049 drbd_set_st_err_str(retcode), retcode);
b411b363 5050 }
b30ab791 5051 wake_up(&device->state_wait);
b411b363 5052
2735a594 5053 return 0;
b411b363
PR
5054}
5055
bde89a9e 5056static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5057{
bde89a9e 5058 return drbd_send_ping_ack(connection);
b411b363
PR
5059
5060}
5061
bde89a9e 5062static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
b411b363
PR
5063{
5064 /* restore idle timeout */
bde89a9e
AG
5065 connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
5066 if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
5067 wake_up(&connection->ping_wait);
b411b363 5068
2735a594 5069 return 0;
b411b363
PR
5070}
5071
bde89a9e 5072static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5073{
9f4fe9ad 5074 struct drbd_peer_device *peer_device;
b30ab791 5075 struct drbd_device *device;
e658983a 5076 struct p_block_ack *p = pi->data;
b411b363
PR
5077 sector_t sector = be64_to_cpu(p->sector);
5078 int blksize = be32_to_cpu(p->blksize);
5079
9f4fe9ad
AG
5080 peer_device = conn_peer_device(connection, pi->vnr);
5081 if (!peer_device)
2735a594 5082 return -EIO;
9f4fe9ad 5083 device = peer_device->device;
1952e916 5084
9f4fe9ad 5085 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
b411b363 5086
69a22773 5087 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
b411b363 5088
b30ab791
AG
5089 if (get_ldev(device)) {
5090 drbd_rs_complete_io(device, sector);
5091 drbd_set_in_sync(device, sector, blksize);
1d53f09e 5092 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
b30ab791
AG
5093 device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5094 put_ldev(device);
1d53f09e 5095 }
b30ab791
AG
5096 dec_rs_pending(device);
5097 atomic_add(blksize >> 9, &device->rs_sect_in);
b411b363 5098
2735a594 5099 return 0;
b411b363
PR
5100}
5101
bc9c5c41 5102static int
b30ab791 5103validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
bc9c5c41
AG
5104 struct rb_root *root, const char *func,
5105 enum drbd_req_event what, bool missing_ok)
b411b363
PR
5106{
5107 struct drbd_request *req;
5108 struct bio_and_error m;
5109
0500813f 5110 spin_lock_irq(&device->resource->req_lock);
b30ab791 5111 req = find_request(device, root, id, sector, missing_ok, func);
b411b363 5112 if (unlikely(!req)) {
0500813f 5113 spin_unlock_irq(&device->resource->req_lock);
85997675 5114 return -EIO;
b411b363
PR
5115 }
5116 __req_mod(req, what, &m);
0500813f 5117 spin_unlock_irq(&device->resource->req_lock);
b411b363
PR
5118
5119 if (m.bio)
b30ab791 5120 complete_master_bio(device, &m);
85997675 5121 return 0;
b411b363
PR
5122}
5123
bde89a9e 5124static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5125{
9f4fe9ad 5126 struct drbd_peer_device *peer_device;
b30ab791 5127 struct drbd_device *device;
e658983a 5128 struct p_block_ack *p = pi->data;
b411b363
PR
5129 sector_t sector = be64_to_cpu(p->sector);
5130 int blksize = be32_to_cpu(p->blksize);
5131 enum drbd_req_event what;
5132
9f4fe9ad
AG
5133 peer_device = conn_peer_device(connection, pi->vnr);
5134 if (!peer_device)
2735a594 5135 return -EIO;
9f4fe9ad 5136 device = peer_device->device;
1952e916 5137
69a22773 5138 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
b411b363 5139
579b57ed 5140 if (p->block_id == ID_SYNCER) {
b30ab791
AG
5141 drbd_set_in_sync(device, sector, blksize);
5142 dec_rs_pending(device);
2735a594 5143 return 0;
b411b363 5144 }
e05e1e59 5145 switch (pi->cmd) {
b411b363 5146 case P_RS_WRITE_ACK:
8554df1c 5147 what = WRITE_ACKED_BY_PEER_AND_SIS;
b411b363
PR
5148 break;
5149 case P_WRITE_ACK:
8554df1c 5150 what = WRITE_ACKED_BY_PEER;
b411b363
PR
5151 break;
5152 case P_RECV_ACK:
8554df1c 5153 what = RECV_ACKED_BY_PEER;
b411b363 5154 break;
d4dabbe2
LE
5155 case P_SUPERSEDED:
5156 what = CONFLICT_RESOLVED;
b411b363 5157 break;
7be8da07 5158 case P_RETRY_WRITE:
7be8da07 5159 what = POSTPONE_WRITE;
b411b363
PR
5160 break;
5161 default:
2735a594 5162 BUG();
b411b363
PR
5163 }
5164
b30ab791
AG
5165 return validate_req_change_req_state(device, p->block_id, sector,
5166 &device->write_requests, __func__,
2735a594 5167 what, false);
b411b363
PR
5168}
5169
bde89a9e 5170static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5171{
9f4fe9ad 5172 struct drbd_peer_device *peer_device;
b30ab791 5173 struct drbd_device *device;
e658983a 5174 struct p_block_ack *p = pi->data;
b411b363 5175 sector_t sector = be64_to_cpu(p->sector);
2deb8336 5176 int size = be32_to_cpu(p->blksize);
85997675 5177 int err;
b411b363 5178
9f4fe9ad
AG
5179 peer_device = conn_peer_device(connection, pi->vnr);
5180 if (!peer_device)
2735a594 5181 return -EIO;
9f4fe9ad 5182 device = peer_device->device;
b411b363 5183
69a22773 5184 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
b411b363 5185
579b57ed 5186 if (p->block_id == ID_SYNCER) {
b30ab791
AG
5187 dec_rs_pending(device);
5188 drbd_rs_failed_io(device, sector, size);
2735a594 5189 return 0;
b411b363 5190 }
2deb8336 5191
b30ab791
AG
5192 err = validate_req_change_req_state(device, p->block_id, sector,
5193 &device->write_requests, __func__,
303d1448 5194 NEG_ACKED, true);
85997675 5195 if (err) {
c3afd8f5
AG
5196 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
5197 The master bio might already be completed, therefore the
5198 request is no longer in the collision hash. */
5199 /* In Protocol B we might already have got a P_RECV_ACK
5200 but then get a P_NEG_ACK afterwards. */
b30ab791 5201 drbd_set_out_of_sync(device, sector, size);
2deb8336 5202 }
2735a594 5203 return 0;
b411b363
PR
5204}
5205
bde89a9e 5206static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5207{
9f4fe9ad 5208 struct drbd_peer_device *peer_device;
b30ab791 5209 struct drbd_device *device;
e658983a 5210 struct p_block_ack *p = pi->data;
b411b363
PR
5211 sector_t sector = be64_to_cpu(p->sector);
5212
9f4fe9ad
AG
5213 peer_device = conn_peer_device(connection, pi->vnr);
5214 if (!peer_device)
2735a594 5215 return -EIO;
9f4fe9ad 5216 device = peer_device->device;
1952e916 5217
69a22773 5218 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
7be8da07 5219
d0180171 5220 drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
b411b363
PR
5221 (unsigned long long)sector, be32_to_cpu(p->blksize));
5222
b30ab791
AG
5223 return validate_req_change_req_state(device, p->block_id, sector,
5224 &device->read_requests, __func__,
2735a594 5225 NEG_ACKED, false);
b411b363
PR
5226}
5227
bde89a9e 5228static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5229{
9f4fe9ad 5230 struct drbd_peer_device *peer_device;
b30ab791 5231 struct drbd_device *device;
b411b363
PR
5232 sector_t sector;
5233 int size;
e658983a 5234 struct p_block_ack *p = pi->data;
1952e916 5235
9f4fe9ad
AG
5236 peer_device = conn_peer_device(connection, pi->vnr);
5237 if (!peer_device)
2735a594 5238 return -EIO;
9f4fe9ad 5239 device = peer_device->device;
b411b363
PR
5240
5241 sector = be64_to_cpu(p->sector);
5242 size = be32_to_cpu(p->blksize);
b411b363 5243
69a22773 5244 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
b411b363 5245
b30ab791 5246 dec_rs_pending(device);
b411b363 5247
b30ab791
AG
5248 if (get_ldev_if_state(device, D_FAILED)) {
5249 drbd_rs_complete_io(device, sector);
e05e1e59 5250 switch (pi->cmd) {
d612d309 5251 case P_NEG_RS_DREPLY:
b30ab791 5252 drbd_rs_failed_io(device, sector, size);
d612d309
PR
5253 case P_RS_CANCEL:
5254 break;
5255 default:
2735a594 5256 BUG();
d612d309 5257 }
b30ab791 5258 put_ldev(device);
b411b363
PR
5259 }
5260
2735a594 5261 return 0;
b411b363
PR
5262}
5263
bde89a9e 5264static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5265{
e658983a 5266 struct p_barrier_ack *p = pi->data;
c06ece6b 5267 struct drbd_peer_device *peer_device;
9ed57dcb 5268 int vnr;
1952e916 5269
bde89a9e 5270 tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
b411b363 5271
9ed57dcb 5272 rcu_read_lock();
c06ece6b
AG
5273 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5274 struct drbd_device *device = peer_device->device;
5275
b30ab791
AG
5276 if (device->state.conn == C_AHEAD &&
5277 atomic_read(&device->ap_in_flight) == 0 &&
5278 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5279 device->start_resync_timer.expires = jiffies + HZ;
5280 add_timer(&device->start_resync_timer);
9ed57dcb 5281 }
c4752ef1 5282 }
9ed57dcb 5283 rcu_read_unlock();
c4752ef1 5284
2735a594 5285 return 0;
b411b363
PR
5286}
5287
bde89a9e 5288static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5289{
9f4fe9ad 5290 struct drbd_peer_device *peer_device;
b30ab791 5291 struct drbd_device *device;
e658983a 5292 struct p_block_ack *p = pi->data;
84b8c06b 5293 struct drbd_device_work *dw;
b411b363
PR
5294 sector_t sector;
5295 int size;
5296
9f4fe9ad
AG
5297 peer_device = conn_peer_device(connection, pi->vnr);
5298 if (!peer_device)
2735a594 5299 return -EIO;
9f4fe9ad 5300 device = peer_device->device;
1952e916 5301
b411b363
PR
5302 sector = be64_to_cpu(p->sector);
5303 size = be32_to_cpu(p->blksize);
5304
69a22773 5305 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
b411b363
PR
5306
5307 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
b30ab791 5308 drbd_ov_out_of_sync_found(device, sector, size);
b411b363 5309 else
b30ab791 5310 ov_out_of_sync_print(device);
b411b363 5311
b30ab791 5312 if (!get_ldev(device))
2735a594 5313 return 0;
1d53f09e 5314
b30ab791
AG
5315 drbd_rs_complete_io(device, sector);
5316 dec_rs_pending(device);
b411b363 5317
b30ab791 5318 --device->ov_left;
ea5442af
LE
5319
5320 /* let's advance progress step marks only for every other megabyte */
b30ab791
AG
5321 if ((device->ov_left & 0x200) == 0x200)
5322 drbd_advance_rs_marks(device, device->ov_left);
ea5442af 5323
b30ab791 5324 if (device->ov_left == 0) {
84b8c06b
AG
5325 dw = kmalloc(sizeof(*dw), GFP_NOIO);
5326 if (dw) {
5327 dw->w.cb = w_ov_finished;
5328 dw->device = device;
5329 drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
b411b363 5330 } else {
84b8c06b 5331 drbd_err(device, "kmalloc(dw) failed.");
b30ab791
AG
5332 ov_out_of_sync_print(device);
5333 drbd_resync_finished(device);
b411b363
PR
5334 }
5335 }
b30ab791 5336 put_ldev(device);
2735a594 5337 return 0;
b411b363
PR
5338}
5339
bde89a9e 5340static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
0ced55a3 5341{
2735a594 5342 return 0;
b411b363
PR
5343}
5344
bde89a9e 5345static int connection_finish_peer_reqs(struct drbd_connection *connection)
0ced55a3 5346{
c06ece6b 5347 struct drbd_peer_device *peer_device;
c141ebda 5348 int vnr, not_empty = 0;
32862ec7
PR
5349
5350 do {
bde89a9e 5351 clear_bit(SIGNAL_ASENDER, &connection->flags);
32862ec7 5352 flush_signals(current);
c141ebda
PR
5353
5354 rcu_read_lock();
c06ece6b
AG
5355 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5356 struct drbd_device *device = peer_device->device;
b30ab791 5357 kref_get(&device->kref);
c141ebda 5358 rcu_read_unlock();
b30ab791 5359 if (drbd_finish_peer_reqs(device)) {
05a10ec7 5360 kref_put(&device->kref, drbd_destroy_device);
c141ebda 5361 return 1;
d3fcb490 5362 }
05a10ec7 5363 kref_put(&device->kref, drbd_destroy_device);
c141ebda 5364 rcu_read_lock();
082a3439 5365 }
bde89a9e 5366 set_bit(SIGNAL_ASENDER, &connection->flags);
082a3439 5367
0500813f 5368 spin_lock_irq(&connection->resource->req_lock);
c06ece6b
AG
5369 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5370 struct drbd_device *device = peer_device->device;
b30ab791 5371 not_empty = !list_empty(&device->done_ee);
082a3439
PR
5372 if (not_empty)
5373 break;
5374 }
0500813f 5375 spin_unlock_irq(&connection->resource->req_lock);
c141ebda 5376 rcu_read_unlock();
32862ec7
PR
5377 } while (not_empty);
5378
5379 return 0;
0ced55a3
PR
5380}
5381
b411b363
PR
5382struct asender_cmd {
5383 size_t pkt_size;
bde89a9e 5384 int (*fn)(struct drbd_connection *connection, struct packet_info *);
b411b363
PR
5385};
5386
7201b972 5387static struct asender_cmd asender_tbl[] = {
e658983a
AG
5388 [P_PING] = { 0, got_Ping },
5389 [P_PING_ACK] = { 0, got_PingAck },
b411b363
PR
5390 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5391 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5392 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
d4dabbe2 5393 [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck },
b411b363
PR
5394 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5395 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
1952e916 5396 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
b411b363
PR
5397 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5398 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5399 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5400 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
02918be2 5401 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
1952e916
AG
5402 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5403 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5404 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
7201b972 5405};
b411b363
PR
5406
5407int drbd_asender(struct drbd_thread *thi)
5408{
bde89a9e 5409 struct drbd_connection *connection = thi->connection;
b411b363 5410 struct asender_cmd *cmd = NULL;
77351055 5411 struct packet_info pi;
257d0af6 5412 int rv;
bde89a9e 5413 void *buf = connection->meta.rbuf;
b411b363 5414 int received = 0;
bde89a9e 5415 unsigned int header_size = drbd_header_size(connection);
52b061a4 5416 int expect = header_size;
44ed167d
PR
5417 bool ping_timeout_active = false;
5418 struct net_conf *nc;
bb77d34e 5419 int ping_timeo, tcp_cork, ping_int;
3990e04d 5420 struct sched_param param = { .sched_priority = 2 };
b411b363 5421
3990e04d
PR
5422 rv = sched_setscheduler(current, SCHED_RR, &param);
5423 if (rv < 0)
1ec861eb 5424 drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv);
b411b363 5425
e77a0a5c 5426 while (get_t_state(thi) == RUNNING) {
80822284 5427 drbd_thread_current_set_cpu(thi);
b411b363 5428
44ed167d 5429 rcu_read_lock();
bde89a9e 5430 nc = rcu_dereference(connection->net_conf);
44ed167d 5431 ping_timeo = nc->ping_timeo;
bb77d34e 5432 tcp_cork = nc->tcp_cork;
44ed167d
PR
5433 ping_int = nc->ping_int;
5434 rcu_read_unlock();
5435
bde89a9e
AG
5436 if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5437 if (drbd_send_ping(connection)) {
1ec861eb 5438 drbd_err(connection, "drbd_send_ping has failed\n");
b411b363 5439 goto reconnect;
841ce241 5440 }
bde89a9e 5441 connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
44ed167d 5442 ping_timeout_active = true;
b411b363
PR
5443 }
5444
32862ec7
PR
5445 /* TODO: conditionally cork; it may hurt latency if we cork without
5446 much to send */
bb77d34e 5447 if (tcp_cork)
bde89a9e
AG
5448 drbd_tcp_cork(connection->meta.socket);
5449 if (connection_finish_peer_reqs(connection)) {
1ec861eb 5450 drbd_err(connection, "connection_finish_peer_reqs() failed\n");
32862ec7 5451 goto reconnect;
b411b363
PR
5452 }
5453 /* but unconditionally uncork unless disabled */
bb77d34e 5454 if (tcp_cork)
bde89a9e 5455 drbd_tcp_uncork(connection->meta.socket);
b411b363
PR
5456
5457 /* short circuit, recv_msg would return EINTR anyways. */
5458 if (signal_pending(current))
5459 continue;
5460
bde89a9e
AG
5461 rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
5462 clear_bit(SIGNAL_ASENDER, &connection->flags);
b411b363
PR
5463
5464 flush_signals(current);
5465
5466 /* Note:
5467 * -EINTR (on meta) we got a signal
5468 * -EAGAIN (on meta) rcvtimeo expired
5469 * -ECONNRESET other side closed the connection
5470 * -ERESTARTSYS (on data) we got a signal
5471 * rv < 0 other than above: unexpected error!
5472 * rv == expected: full header or command
5473 * rv < expected: "woken" by signal during receive
5474 * rv == 0 : "connection shut down by peer"
5475 */
5476 if (likely(rv > 0)) {
5477 received += rv;
5478 buf += rv;
5479 } else if (rv == 0) {
bde89a9e 5480 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
b66623e3
PR
5481 long t;
5482 rcu_read_lock();
bde89a9e 5483 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
b66623e3
PR
5484 rcu_read_unlock();
5485
bde89a9e
AG
5486 t = wait_event_timeout(connection->ping_wait,
5487 connection->cstate < C_WF_REPORT_PARAMS,
b66623e3 5488 t);
599377ac
PR
5489 if (t)
5490 break;
5491 }
1ec861eb 5492 drbd_err(connection, "meta connection shut down by peer.\n");
b411b363
PR
5493 goto reconnect;
5494 } else if (rv == -EAGAIN) {
cb6518cb
LE
5495 /* If the data socket received something meanwhile,
5496 * that is good enough: peer is still alive. */
bde89a9e
AG
5497 if (time_after(connection->last_received,
5498 jiffies - connection->meta.socket->sk->sk_rcvtimeo))
cb6518cb 5499 continue;
f36af18c 5500 if (ping_timeout_active) {
1ec861eb 5501 drbd_err(connection, "PingAck did not arrive in time.\n");
b411b363
PR
5502 goto reconnect;
5503 }
bde89a9e 5504 set_bit(SEND_PING, &connection->flags);
b411b363
PR
5505 continue;
5506 } else if (rv == -EINTR) {
5507 continue;
5508 } else {
1ec861eb 5509 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
b411b363
PR
5510 goto reconnect;
5511 }
5512
5513 if (received == expect && cmd == NULL) {
bde89a9e 5514 if (decode_header(connection, connection->meta.rbuf, &pi))
b411b363 5515 goto reconnect;
7201b972 5516 cmd = &asender_tbl[pi.cmd];
1952e916 5517 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
1ec861eb 5518 drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
2fcb8f30 5519 cmdname(pi.cmd), pi.cmd);
b411b363
PR
5520 goto disconnect;
5521 }
e658983a 5522 expect = header_size + cmd->pkt_size;
52b061a4 5523 if (pi.size != expect - header_size) {
1ec861eb 5524 drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
77351055 5525 pi.cmd, pi.size);
b411b363 5526 goto reconnect;
257d0af6 5527 }
b411b363
PR
5528 }
5529 if (received == expect) {
2735a594 5530 bool err;
a4fbda8e 5531
bde89a9e 5532 err = cmd->fn(connection, &pi);
2735a594 5533 if (err) {
1ec861eb 5534 drbd_err(connection, "%pf failed\n", cmd->fn);
b411b363 5535 goto reconnect;
1952e916 5536 }
b411b363 5537
bde89a9e 5538 connection->last_received = jiffies;
f36af18c 5539
44ed167d
PR
5540 if (cmd == &asender_tbl[P_PING_ACK]) {
5541 /* restore idle timeout */
bde89a9e 5542 connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
44ed167d
PR
5543 ping_timeout_active = false;
5544 }
f36af18c 5545
bde89a9e 5546 buf = connection->meta.rbuf;
b411b363 5547 received = 0;
52b061a4 5548 expect = header_size;
b411b363
PR
5549 cmd = NULL;
5550 }
5551 }
5552
5553 if (0) {
5554reconnect:
bde89a9e
AG
5555 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5556 conn_md_sync(connection);
b411b363
PR
5557 }
5558 if (0) {
5559disconnect:
bde89a9e 5560 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363 5561 }
bde89a9e 5562 clear_bit(SIGNAL_ASENDER, &connection->flags);
b411b363 5563
1ec861eb 5564 drbd_info(connection, "asender terminated\n");
b411b363
PR
5565
5566 return 0;
5567}
This page took 1.252049 seconds and 5 git commands to generate.