drbd: fix bogus resync stats in /proc/drbd
[deliverable/linux.git] / drivers / block / drbd / drbd_receiver.c
CommitLineData
b411b363
PR
1/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
b411b363
PR
26#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
b411b363
PR
31#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
b411b363
PR
39#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
b411b363
PR
44#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
a3603a6e 47#include "drbd_protocol.h"
b411b363 48#include "drbd_req.h"
b411b363
PR
49#include "drbd_vli.h"
50
20c68fde
LE
51#define PRO_FEATURES (FF_TRIM)
52
77351055
PR
53struct packet_info {
54 enum drbd_packet cmd;
e2857216
AG
55 unsigned int size;
56 unsigned int vnr;
e658983a 57 void *data;
77351055
PR
58};
59
b411b363
PR
60enum finish_epoch {
61 FE_STILL_LIVE,
62 FE_DESTROYED,
63 FE_RECYCLED,
64};
65
bde89a9e
AG
66static int drbd_do_features(struct drbd_connection *connection);
67static int drbd_do_auth(struct drbd_connection *connection);
69a22773 68static int drbd_disconnected(struct drbd_peer_device *);
a0fb3c47 69static void conn_wait_active_ee_empty(struct drbd_connection *connection);
bde89a9e 70static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
99920dc5 71static int e_end_block(struct drbd_work *, int);
b411b363 72
b411b363
PR
73
74#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
75
45bb912b
LE
76/*
77 * some helper functions to deal with single linked page lists,
78 * page->private being our "next" pointer.
79 */
80
81/* If at least n pages are linked at head, get n pages off.
82 * Otherwise, don't modify head, and return NULL.
83 * Locking is the responsibility of the caller.
84 */
85static struct page *page_chain_del(struct page **head, int n)
86{
87 struct page *page;
88 struct page *tmp;
89
90 BUG_ON(!n);
91 BUG_ON(!head);
92
93 page = *head;
23ce4227
PR
94
95 if (!page)
96 return NULL;
97
45bb912b
LE
98 while (page) {
99 tmp = page_chain_next(page);
100 if (--n == 0)
101 break; /* found sufficient pages */
102 if (tmp == NULL)
103 /* insufficient pages, don't use any of them. */
104 return NULL;
105 page = tmp;
106 }
107
108 /* add end of list marker for the returned list */
109 set_page_private(page, 0);
110 /* actual return value, and adjustment of head */
111 page = *head;
112 *head = tmp;
113 return page;
114}
115
116/* may be used outside of locks to find the tail of a (usually short)
117 * "private" page chain, before adding it back to a global chain head
118 * with page_chain_add() under a spinlock. */
119static struct page *page_chain_tail(struct page *page, int *len)
120{
121 struct page *tmp;
122 int i = 1;
123 while ((tmp = page_chain_next(page)))
124 ++i, page = tmp;
125 if (len)
126 *len = i;
127 return page;
128}
129
130static int page_chain_free(struct page *page)
131{
132 struct page *tmp;
133 int i = 0;
134 page_chain_for_each_safe(page, tmp) {
135 put_page(page);
136 ++i;
137 }
138 return i;
139}
140
141static void page_chain_add(struct page **head,
142 struct page *chain_first, struct page *chain_last)
143{
144#if 1
145 struct page *tmp;
146 tmp = page_chain_tail(chain_first, NULL);
147 BUG_ON(tmp != chain_last);
148#endif
149
150 /* add chain to head */
151 set_page_private(chain_last, (unsigned long)*head);
152 *head = chain_first;
153}
154
b30ab791 155static struct page *__drbd_alloc_pages(struct drbd_device *device,
18c2d522 156 unsigned int number)
b411b363
PR
157{
158 struct page *page = NULL;
45bb912b 159 struct page *tmp = NULL;
18c2d522 160 unsigned int i = 0;
b411b363
PR
161
162 /* Yes, testing drbd_pp_vacant outside the lock is racy.
163 * So what. It saves a spin_lock. */
45bb912b 164 if (drbd_pp_vacant >= number) {
b411b363 165 spin_lock(&drbd_pp_lock);
45bb912b
LE
166 page = page_chain_del(&drbd_pp_pool, number);
167 if (page)
168 drbd_pp_vacant -= number;
b411b363 169 spin_unlock(&drbd_pp_lock);
45bb912b
LE
170 if (page)
171 return page;
b411b363 172 }
45bb912b 173
b411b363
PR
174 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
175 * "criss-cross" setup, that might cause write-out on some other DRBD,
176 * which in turn might block on the other node at this very place. */
45bb912b
LE
177 for (i = 0; i < number; i++) {
178 tmp = alloc_page(GFP_TRY);
179 if (!tmp)
180 break;
181 set_page_private(tmp, (unsigned long)page);
182 page = tmp;
183 }
184
185 if (i == number)
186 return page;
187
188 /* Not enough pages immediately available this time.
c37c8ecf 189 * No need to jump around here, drbd_alloc_pages will retry this
45bb912b
LE
190 * function "soon". */
191 if (page) {
192 tmp = page_chain_tail(page, NULL);
193 spin_lock(&drbd_pp_lock);
194 page_chain_add(&drbd_pp_pool, page, tmp);
195 drbd_pp_vacant += i;
196 spin_unlock(&drbd_pp_lock);
197 }
198 return NULL;
b411b363
PR
199}
200
b30ab791 201static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
a990be46 202 struct list_head *to_be_freed)
b411b363 203{
a8cd15ba 204 struct drbd_peer_request *peer_req, *tmp;
b411b363
PR
205
206 /* The EEs are always appended to the end of the list. Since
207 they are sent in order over the wire, they have to finish
208 in order. As soon as we see the first not finished we can
209 stop to examine the list... */
210
a8cd15ba 211 list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
045417f7 212 if (drbd_peer_req_has_active_page(peer_req))
b411b363 213 break;
a8cd15ba 214 list_move(&peer_req->w.list, to_be_freed);
b411b363
PR
215 }
216}
217
b30ab791 218static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
b411b363
PR
219{
220 LIST_HEAD(reclaimed);
db830c46 221 struct drbd_peer_request *peer_req, *t;
b411b363 222
0500813f 223 spin_lock_irq(&device->resource->req_lock);
b30ab791 224 reclaim_finished_net_peer_reqs(device, &reclaimed);
0500813f 225 spin_unlock_irq(&device->resource->req_lock);
b411b363 226
a8cd15ba 227 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
b30ab791 228 drbd_free_net_peer_req(device, peer_req);
b411b363
PR
229}
230
231/**
c37c8ecf 232 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
b30ab791 233 * @device: DRBD device.
45bb912b
LE
234 * @number: number of pages requested
235 * @retry: whether to retry, if not enough pages are available right now
236 *
237 * Tries to allocate number pages, first from our own page pool, then from
0e49d7b0 238 * the kernel.
45bb912b 239 * Possibly retry until DRBD frees sufficient pages somewhere else.
b411b363 240 *
0e49d7b0
LE
241 * If this allocation would exceed the max_buffers setting, we throttle
242 * allocation (schedule_timeout) to give the system some room to breathe.
243 *
244 * We do not use max-buffers as hard limit, because it could lead to
245 * congestion and further to a distributed deadlock during online-verify or
246 * (checksum based) resync, if the max-buffers, socket buffer sizes and
247 * resync-rate settings are mis-configured.
248 *
45bb912b 249 * Returns a page chain linked via page->private.
b411b363 250 */
69a22773 251struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
c37c8ecf 252 bool retry)
b411b363 253{
69a22773 254 struct drbd_device *device = peer_device->device;
b411b363 255 struct page *page = NULL;
44ed167d 256 struct net_conf *nc;
b411b363 257 DEFINE_WAIT(wait);
0e49d7b0 258 unsigned int mxb;
b411b363 259
44ed167d 260 rcu_read_lock();
69a22773 261 nc = rcu_dereference(peer_device->connection->net_conf);
44ed167d
PR
262 mxb = nc ? nc->max_buffers : 1000000;
263 rcu_read_unlock();
264
b30ab791
AG
265 if (atomic_read(&device->pp_in_use) < mxb)
266 page = __drbd_alloc_pages(device, number);
b411b363 267
45bb912b 268 while (page == NULL) {
b411b363
PR
269 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
270
b30ab791 271 drbd_kick_lo_and_reclaim_net(device);
b411b363 272
b30ab791
AG
273 if (atomic_read(&device->pp_in_use) < mxb) {
274 page = __drbd_alloc_pages(device, number);
b411b363
PR
275 if (page)
276 break;
277 }
278
279 if (!retry)
280 break;
281
282 if (signal_pending(current)) {
d0180171 283 drbd_warn(device, "drbd_alloc_pages interrupted!\n");
b411b363
PR
284 break;
285 }
286
0e49d7b0
LE
287 if (schedule_timeout(HZ/10) == 0)
288 mxb = UINT_MAX;
b411b363
PR
289 }
290 finish_wait(&drbd_pp_wait, &wait);
291
45bb912b 292 if (page)
b30ab791 293 atomic_add(number, &device->pp_in_use);
b411b363
PR
294 return page;
295}
296
c37c8ecf 297/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
0500813f 298 * Is also used from inside an other spin_lock_irq(&resource->req_lock);
45bb912b
LE
299 * Either links the page chain back to the global pool,
300 * or returns all pages to the system. */
b30ab791 301static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
b411b363 302{
b30ab791 303 atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
b411b363 304 int i;
435f0740 305
a73ff323
LE
306 if (page == NULL)
307 return;
308
81a5d60e 309 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
45bb912b
LE
310 i = page_chain_free(page);
311 else {
312 struct page *tmp;
313 tmp = page_chain_tail(page, &i);
314 spin_lock(&drbd_pp_lock);
315 page_chain_add(&drbd_pp_pool, page, tmp);
316 drbd_pp_vacant += i;
317 spin_unlock(&drbd_pp_lock);
b411b363 318 }
435f0740 319 i = atomic_sub_return(i, a);
45bb912b 320 if (i < 0)
d0180171 321 drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
435f0740 322 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
b411b363
PR
323 wake_up(&drbd_pp_wait);
324}
325
326/*
327You need to hold the req_lock:
328 _drbd_wait_ee_list_empty()
329
330You must not have the req_lock:
3967deb1 331 drbd_free_peer_req()
0db55363 332 drbd_alloc_peer_req()
7721f567 333 drbd_free_peer_reqs()
b411b363 334 drbd_ee_fix_bhs()
a990be46 335 drbd_finish_peer_reqs()
b411b363
PR
336 drbd_clear_done_ee()
337 drbd_wait_ee_list_empty()
338*/
339
f6ffca9f 340struct drbd_peer_request *
69a22773 341drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
a0fb3c47 342 unsigned int data_size, bool has_payload, gfp_t gfp_mask) __must_hold(local)
b411b363 343{
69a22773 344 struct drbd_device *device = peer_device->device;
db830c46 345 struct drbd_peer_request *peer_req;
a73ff323 346 struct page *page = NULL;
45bb912b 347 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
b411b363 348
b30ab791 349 if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
b411b363
PR
350 return NULL;
351
db830c46
AG
352 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
353 if (!peer_req) {
b411b363 354 if (!(gfp_mask & __GFP_NOWARN))
d0180171 355 drbd_err(device, "%s: allocation failed\n", __func__);
b411b363
PR
356 return NULL;
357 }
358
a0fb3c47 359 if (has_payload && data_size) {
69a22773 360 page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT));
a73ff323
LE
361 if (!page)
362 goto fail;
363 }
b411b363 364
db830c46
AG
365 drbd_clear_interval(&peer_req->i);
366 peer_req->i.size = data_size;
367 peer_req->i.sector = sector;
368 peer_req->i.local = false;
369 peer_req->i.waiting = false;
370
371 peer_req->epoch = NULL;
a8cd15ba 372 peer_req->peer_device = peer_device;
db830c46
AG
373 peer_req->pages = page;
374 atomic_set(&peer_req->pending_bios, 0);
375 peer_req->flags = 0;
9a8e7753
AG
376 /*
377 * The block_id is opaque to the receiver. It is not endianness
378 * converted, and sent back to the sender unchanged.
379 */
db830c46 380 peer_req->block_id = id;
b411b363 381
db830c46 382 return peer_req;
b411b363 383
45bb912b 384 fail:
db830c46 385 mempool_free(peer_req, drbd_ee_mempool);
b411b363
PR
386 return NULL;
387}
388
b30ab791 389void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
f6ffca9f 390 int is_net)
b411b363 391{
db830c46
AG
392 if (peer_req->flags & EE_HAS_DIGEST)
393 kfree(peer_req->digest);
b30ab791 394 drbd_free_pages(device, peer_req->pages, is_net);
0b0ba1ef
AG
395 D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
396 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
db830c46 397 mempool_free(peer_req, drbd_ee_mempool);
b411b363
PR
398}
399
b30ab791 400int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
b411b363
PR
401{
402 LIST_HEAD(work_list);
db830c46 403 struct drbd_peer_request *peer_req, *t;
b411b363 404 int count = 0;
b30ab791 405 int is_net = list == &device->net_ee;
b411b363 406
0500813f 407 spin_lock_irq(&device->resource->req_lock);
b411b363 408 list_splice_init(list, &work_list);
0500813f 409 spin_unlock_irq(&device->resource->req_lock);
b411b363 410
a8cd15ba 411 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
b30ab791 412 __drbd_free_peer_req(device, peer_req, is_net);
b411b363
PR
413 count++;
414 }
415 return count;
416}
417
b411b363 418/*
a990be46 419 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
b411b363 420 */
b30ab791 421static int drbd_finish_peer_reqs(struct drbd_device *device)
b411b363
PR
422{
423 LIST_HEAD(work_list);
424 LIST_HEAD(reclaimed);
db830c46 425 struct drbd_peer_request *peer_req, *t;
e2b3032b 426 int err = 0;
b411b363 427
0500813f 428 spin_lock_irq(&device->resource->req_lock);
b30ab791
AG
429 reclaim_finished_net_peer_reqs(device, &reclaimed);
430 list_splice_init(&device->done_ee, &work_list);
0500813f 431 spin_unlock_irq(&device->resource->req_lock);
b411b363 432
a8cd15ba 433 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
b30ab791 434 drbd_free_net_peer_req(device, peer_req);
b411b363
PR
435
436 /* possible callbacks here:
d4dabbe2 437 * e_end_block, and e_end_resync_block, e_send_superseded.
b411b363
PR
438 * all ignore the last argument.
439 */
a8cd15ba 440 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
e2b3032b
AG
441 int err2;
442
b411b363 443 /* list_del not necessary, next/prev members not touched */
a8cd15ba 444 err2 = peer_req->w.cb(&peer_req->w, !!err);
e2b3032b
AG
445 if (!err)
446 err = err2;
b30ab791 447 drbd_free_peer_req(device, peer_req);
b411b363 448 }
b30ab791 449 wake_up(&device->ee_wait);
b411b363 450
e2b3032b 451 return err;
b411b363
PR
452}
453
b30ab791 454static void _drbd_wait_ee_list_empty(struct drbd_device *device,
d4da1537 455 struct list_head *head)
b411b363
PR
456{
457 DEFINE_WAIT(wait);
458
459 /* avoids spin_lock/unlock
460 * and calling prepare_to_wait in the fast path */
461 while (!list_empty(head)) {
b30ab791 462 prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
0500813f 463 spin_unlock_irq(&device->resource->req_lock);
7eaceacc 464 io_schedule();
b30ab791 465 finish_wait(&device->ee_wait, &wait);
0500813f 466 spin_lock_irq(&device->resource->req_lock);
b411b363
PR
467 }
468}
469
b30ab791 470static void drbd_wait_ee_list_empty(struct drbd_device *device,
d4da1537 471 struct list_head *head)
b411b363 472{
0500813f 473 spin_lock_irq(&device->resource->req_lock);
b30ab791 474 _drbd_wait_ee_list_empty(device, head);
0500813f 475 spin_unlock_irq(&device->resource->req_lock);
b411b363
PR
476}
477
dbd9eea0 478static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
b411b363 479{
b411b363
PR
480 struct kvec iov = {
481 .iov_base = buf,
482 .iov_len = size,
483 };
484 struct msghdr msg = {
b411b363
PR
485 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
486 };
f730c848 487 return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags);
b411b363
PR
488}
489
bde89a9e 490static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
b411b363 491{
b411b363
PR
492 int rv;
493
bde89a9e 494 rv = drbd_recv_short(connection->data.socket, buf, size, 0);
b411b363 495
dbd0820c
PR
496 if (rv < 0) {
497 if (rv == -ECONNRESET)
1ec861eb 498 drbd_info(connection, "sock was reset by peer\n");
dbd0820c 499 else if (rv != -ERESTARTSYS)
1ec861eb 500 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
dbd0820c 501 } else if (rv == 0) {
bde89a9e 502 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
b66623e3
PR
503 long t;
504 rcu_read_lock();
bde89a9e 505 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
b66623e3
PR
506 rcu_read_unlock();
507
bde89a9e 508 t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
b66623e3 509
599377ac
PR
510 if (t)
511 goto out;
512 }
1ec861eb 513 drbd_info(connection, "sock was shut down by peer\n");
599377ac
PR
514 }
515
b411b363 516 if (rv != size)
bde89a9e 517 conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
b411b363 518
599377ac 519out:
b411b363
PR
520 return rv;
521}
522
bde89a9e 523static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
c6967746
AG
524{
525 int err;
526
bde89a9e 527 err = drbd_recv(connection, buf, size);
c6967746
AG
528 if (err != size) {
529 if (err >= 0)
530 err = -EIO;
531 } else
532 err = 0;
533 return err;
534}
535
bde89a9e 536static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
a5c31904
AG
537{
538 int err;
539
bde89a9e 540 err = drbd_recv_all(connection, buf, size);
a5c31904 541 if (err && !signal_pending(current))
1ec861eb 542 drbd_warn(connection, "short read (expected size %d)\n", (int)size);
a5c31904
AG
543 return err;
544}
545
5dbf1673
LE
546/* quoting tcp(7):
547 * On individual connections, the socket buffer size must be set prior to the
548 * listen(2) or connect(2) calls in order to have it take effect.
549 * This is our wrapper to do so.
550 */
551static void drbd_setbufsize(struct socket *sock, unsigned int snd,
552 unsigned int rcv)
553{
554 /* open coded SO_SNDBUF, SO_RCVBUF */
555 if (snd) {
556 sock->sk->sk_sndbuf = snd;
557 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
558 }
559 if (rcv) {
560 sock->sk->sk_rcvbuf = rcv;
561 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
562 }
563}
564
bde89a9e 565static struct socket *drbd_try_connect(struct drbd_connection *connection)
b411b363
PR
566{
567 const char *what;
568 struct socket *sock;
569 struct sockaddr_in6 src_in6;
44ed167d
PR
570 struct sockaddr_in6 peer_in6;
571 struct net_conf *nc;
572 int err, peer_addr_len, my_addr_len;
69ef82de 573 int sndbuf_size, rcvbuf_size, connect_int;
b411b363
PR
574 int disconnect_on_error = 1;
575
44ed167d 576 rcu_read_lock();
bde89a9e 577 nc = rcu_dereference(connection->net_conf);
44ed167d
PR
578 if (!nc) {
579 rcu_read_unlock();
b411b363 580 return NULL;
44ed167d 581 }
44ed167d
PR
582 sndbuf_size = nc->sndbuf_size;
583 rcvbuf_size = nc->rcvbuf_size;
69ef82de 584 connect_int = nc->connect_int;
089c075d 585 rcu_read_unlock();
44ed167d 586
bde89a9e
AG
587 my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
588 memcpy(&src_in6, &connection->my_addr, my_addr_len);
44ed167d 589
bde89a9e 590 if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
44ed167d
PR
591 src_in6.sin6_port = 0;
592 else
593 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
594
bde89a9e
AG
595 peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
596 memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
b411b363
PR
597
598 what = "sock_create_kern";
44ed167d
PR
599 err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
600 SOCK_STREAM, IPPROTO_TCP, &sock);
b411b363
PR
601 if (err < 0) {
602 sock = NULL;
603 goto out;
604 }
605
606 sock->sk->sk_rcvtimeo =
69ef82de 607 sock->sk->sk_sndtimeo = connect_int * HZ;
44ed167d 608 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
b411b363
PR
609
610 /* explicitly bind to the configured IP as source IP
611 * for the outgoing connections.
612 * This is needed for multihomed hosts and to be
613 * able to use lo: interfaces for drbd.
614 * Make sure to use 0 as port number, so linux selects
615 * a free one dynamically.
616 */
b411b363 617 what = "bind before connect";
44ed167d 618 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
b411b363
PR
619 if (err < 0)
620 goto out;
621
622 /* connect may fail, peer not yet available.
623 * stay C_WF_CONNECTION, don't go Disconnecting! */
624 disconnect_on_error = 0;
625 what = "connect";
44ed167d 626 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
b411b363
PR
627
628out:
629 if (err < 0) {
630 if (sock) {
631 sock_release(sock);
632 sock = NULL;
633 }
634 switch (-err) {
635 /* timeout, busy, signal pending */
636 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
637 case EINTR: case ERESTARTSYS:
638 /* peer not (yet) available, network problem */
639 case ECONNREFUSED: case ENETUNREACH:
640 case EHOSTDOWN: case EHOSTUNREACH:
641 disconnect_on_error = 0;
642 break;
643 default:
1ec861eb 644 drbd_err(connection, "%s failed, err = %d\n", what, err);
b411b363
PR
645 }
646 if (disconnect_on_error)
bde89a9e 647 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363 648 }
44ed167d 649
b411b363
PR
650 return sock;
651}
652
7a426fd8 653struct accept_wait_data {
bde89a9e 654 struct drbd_connection *connection;
7a426fd8
PR
655 struct socket *s_listen;
656 struct completion door_bell;
657 void (*original_sk_state_change)(struct sock *sk);
658
659};
660
715306f6 661static void drbd_incoming_connection(struct sock *sk)
7a426fd8
PR
662{
663 struct accept_wait_data *ad = sk->sk_user_data;
715306f6 664 void (*state_change)(struct sock *sk);
7a426fd8 665
715306f6
AG
666 state_change = ad->original_sk_state_change;
667 if (sk->sk_state == TCP_ESTABLISHED)
668 complete(&ad->door_bell);
669 state_change(sk);
7a426fd8
PR
670}
671
bde89a9e 672static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
b411b363 673{
1f3e509b 674 int err, sndbuf_size, rcvbuf_size, my_addr_len;
44ed167d 675 struct sockaddr_in6 my_addr;
1f3e509b 676 struct socket *s_listen;
44ed167d 677 struct net_conf *nc;
b411b363
PR
678 const char *what;
679
44ed167d 680 rcu_read_lock();
bde89a9e 681 nc = rcu_dereference(connection->net_conf);
44ed167d
PR
682 if (!nc) {
683 rcu_read_unlock();
7a426fd8 684 return -EIO;
44ed167d 685 }
44ed167d
PR
686 sndbuf_size = nc->sndbuf_size;
687 rcvbuf_size = nc->rcvbuf_size;
44ed167d 688 rcu_read_unlock();
b411b363 689
bde89a9e
AG
690 my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
691 memcpy(&my_addr, &connection->my_addr, my_addr_len);
b411b363
PR
692
693 what = "sock_create_kern";
44ed167d 694 err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
1f3e509b 695 SOCK_STREAM, IPPROTO_TCP, &s_listen);
b411b363
PR
696 if (err) {
697 s_listen = NULL;
698 goto out;
699 }
700
98683650 701 s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
44ed167d 702 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
b411b363
PR
703
704 what = "bind before listen";
44ed167d 705 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
b411b363
PR
706 if (err < 0)
707 goto out;
708
7a426fd8
PR
709 ad->s_listen = s_listen;
710 write_lock_bh(&s_listen->sk->sk_callback_lock);
711 ad->original_sk_state_change = s_listen->sk->sk_state_change;
715306f6 712 s_listen->sk->sk_state_change = drbd_incoming_connection;
7a426fd8
PR
713 s_listen->sk->sk_user_data = ad;
714 write_unlock_bh(&s_listen->sk->sk_callback_lock);
b411b363 715
2820fd39
PR
716 what = "listen";
717 err = s_listen->ops->listen(s_listen, 5);
718 if (err < 0)
719 goto out;
720
7a426fd8 721 return 0;
b411b363
PR
722out:
723 if (s_listen)
724 sock_release(s_listen);
725 if (err < 0) {
726 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
1ec861eb 727 drbd_err(connection, "%s failed, err = %d\n", what, err);
bde89a9e 728 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363
PR
729 }
730 }
b411b363 731
7a426fd8 732 return -EIO;
b411b363
PR
733}
734
715306f6 735static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
b411b363 736{
715306f6
AG
737 write_lock_bh(&sk->sk_callback_lock);
738 sk->sk_state_change = ad->original_sk_state_change;
739 sk->sk_user_data = NULL;
740 write_unlock_bh(&sk->sk_callback_lock);
b411b363
PR
741}
742
bde89a9e 743static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
b411b363 744{
1f3e509b
PR
745 int timeo, connect_int, err = 0;
746 struct socket *s_estab = NULL;
1f3e509b
PR
747 struct net_conf *nc;
748
749 rcu_read_lock();
bde89a9e 750 nc = rcu_dereference(connection->net_conf);
1f3e509b
PR
751 if (!nc) {
752 rcu_read_unlock();
753 return NULL;
754 }
755 connect_int = nc->connect_int;
756 rcu_read_unlock();
757
758 timeo = connect_int * HZ;
38b682b2
AM
759 /* 28.5% random jitter */
760 timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
1f3e509b 761
7a426fd8
PR
762 err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
763 if (err <= 0)
764 return NULL;
b411b363 765
7a426fd8 766 err = kernel_accept(ad->s_listen, &s_estab, 0);
b411b363
PR
767 if (err < 0) {
768 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
1ec861eb 769 drbd_err(connection, "accept failed, err = %d\n", err);
bde89a9e 770 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363
PR
771 }
772 }
b411b363 773
715306f6
AG
774 if (s_estab)
775 unregister_state_change(s_estab->sk, ad);
b411b363 776
b411b363
PR
777 return s_estab;
778}
b411b363 779
bde89a9e 780static int decode_header(struct drbd_connection *, void *, struct packet_info *);
b411b363 781
bde89a9e 782static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
9f5bdc33
AG
783 enum drbd_packet cmd)
784{
bde89a9e 785 if (!conn_prepare_command(connection, sock))
9f5bdc33 786 return -EIO;
bde89a9e 787 return conn_send_command(connection, sock, cmd, 0, NULL, 0);
b411b363
PR
788}
789
bde89a9e 790static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
b411b363 791{
bde89a9e 792 unsigned int header_size = drbd_header_size(connection);
9f5bdc33
AG
793 struct packet_info pi;
794 int err;
b411b363 795
bde89a9e 796 err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
9f5bdc33
AG
797 if (err != header_size) {
798 if (err >= 0)
799 err = -EIO;
800 return err;
801 }
bde89a9e 802 err = decode_header(connection, connection->data.rbuf, &pi);
9f5bdc33
AG
803 if (err)
804 return err;
805 return pi.cmd;
b411b363
PR
806}
807
808/**
809 * drbd_socket_okay() - Free the socket if its connection is not okay
b411b363
PR
810 * @sock: pointer to the pointer to the socket.
811 */
dbd9eea0 812static int drbd_socket_okay(struct socket **sock)
b411b363
PR
813{
814 int rr;
815 char tb[4];
816
817 if (!*sock)
81e84650 818 return false;
b411b363 819
dbd9eea0 820 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
b411b363
PR
821
822 if (rr > 0 || rr == -EAGAIN) {
81e84650 823 return true;
b411b363
PR
824 } else {
825 sock_release(*sock);
826 *sock = NULL;
81e84650 827 return false;
b411b363
PR
828 }
829}
2325eb66
PR
830/* Gets called if a connection is established, or if a new minor gets created
831 in a connection */
69a22773 832int drbd_connected(struct drbd_peer_device *peer_device)
907599e0 833{
69a22773 834 struct drbd_device *device = peer_device->device;
0829f5ed 835 int err;
907599e0 836
b30ab791
AG
837 atomic_set(&device->packet_seq, 0);
838 device->peer_seq = 0;
907599e0 839
69a22773
AG
840 device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
841 &peer_device->connection->cstate_mutex :
b30ab791 842 &device->own_state_mutex;
8410da8f 843
69a22773 844 err = drbd_send_sync_param(peer_device);
0829f5ed 845 if (!err)
69a22773 846 err = drbd_send_sizes(peer_device, 0, 0);
0829f5ed 847 if (!err)
69a22773 848 err = drbd_send_uuids(peer_device);
0829f5ed 849 if (!err)
69a22773 850 err = drbd_send_current_state(peer_device);
b30ab791
AG
851 clear_bit(USE_DEGR_WFC_T, &device->flags);
852 clear_bit(RESIZE_PENDING, &device->flags);
853 atomic_set(&device->ap_in_flight, 0);
854 mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
0829f5ed 855 return err;
907599e0 856}
b411b363
PR
857
858/*
859 * return values:
860 * 1 yes, we have a valid connection
861 * 0 oops, did not work out, please try again
862 * -1 peer talks different language,
863 * no point in trying again, please go standalone.
864 * -2 We do not have a network config...
865 */
bde89a9e 866static int conn_connect(struct drbd_connection *connection)
b411b363 867{
7da35862 868 struct drbd_socket sock, msock;
c06ece6b 869 struct drbd_peer_device *peer_device;
44ed167d 870 struct net_conf *nc;
92f14951 871 int vnr, timeout, h, ok;
08b165ba 872 bool discard_my_data;
197296ff 873 enum drbd_state_rv rv;
7a426fd8 874 struct accept_wait_data ad = {
bde89a9e 875 .connection = connection,
7a426fd8
PR
876 .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
877 };
b411b363 878
bde89a9e
AG
879 clear_bit(DISCONNECT_SENT, &connection->flags);
880 if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
b411b363
PR
881 return -2;
882
7da35862 883 mutex_init(&sock.mutex);
bde89a9e
AG
884 sock.sbuf = connection->data.sbuf;
885 sock.rbuf = connection->data.rbuf;
7da35862
PR
886 sock.socket = NULL;
887 mutex_init(&msock.mutex);
bde89a9e
AG
888 msock.sbuf = connection->meta.sbuf;
889 msock.rbuf = connection->meta.rbuf;
7da35862
PR
890 msock.socket = NULL;
891
0916e0e3 892 /* Assume that the peer only understands protocol 80 until we know better. */
bde89a9e 893 connection->agreed_pro_version = 80;
b411b363 894
bde89a9e 895 if (prepare_listen_socket(connection, &ad))
7a426fd8 896 return 0;
b411b363
PR
897
898 do {
2bf89621 899 struct socket *s;
b411b363 900
bde89a9e 901 s = drbd_try_connect(connection);
b411b363 902 if (s) {
7da35862
PR
903 if (!sock.socket) {
904 sock.socket = s;
bde89a9e 905 send_first_packet(connection, &sock, P_INITIAL_DATA);
7da35862 906 } else if (!msock.socket) {
bde89a9e 907 clear_bit(RESOLVE_CONFLICTS, &connection->flags);
7da35862 908 msock.socket = s;
bde89a9e 909 send_first_packet(connection, &msock, P_INITIAL_META);
b411b363 910 } else {
1ec861eb 911 drbd_err(connection, "Logic error in conn_connect()\n");
b411b363
PR
912 goto out_release_sockets;
913 }
914 }
915
7da35862
PR
916 if (sock.socket && msock.socket) {
917 rcu_read_lock();
bde89a9e 918 nc = rcu_dereference(connection->net_conf);
7da35862
PR
919 timeout = nc->ping_timeo * HZ / 10;
920 rcu_read_unlock();
921 schedule_timeout_interruptible(timeout);
922 ok = drbd_socket_okay(&sock.socket);
923 ok = drbd_socket_okay(&msock.socket) && ok;
b411b363
PR
924 if (ok)
925 break;
926 }
927
928retry:
bde89a9e 929 s = drbd_wait_for_connect(connection, &ad);
b411b363 930 if (s) {
bde89a9e 931 int fp = receive_first_packet(connection, s);
7da35862
PR
932 drbd_socket_okay(&sock.socket);
933 drbd_socket_okay(&msock.socket);
92f14951 934 switch (fp) {
e5d6f33a 935 case P_INITIAL_DATA:
7da35862 936 if (sock.socket) {
1ec861eb 937 drbd_warn(connection, "initial packet S crossed\n");
7da35862 938 sock_release(sock.socket);
80c6eed4
PR
939 sock.socket = s;
940 goto randomize;
b411b363 941 }
7da35862 942 sock.socket = s;
b411b363 943 break;
e5d6f33a 944 case P_INITIAL_META:
bde89a9e 945 set_bit(RESOLVE_CONFLICTS, &connection->flags);
7da35862 946 if (msock.socket) {
1ec861eb 947 drbd_warn(connection, "initial packet M crossed\n");
7da35862 948 sock_release(msock.socket);
80c6eed4
PR
949 msock.socket = s;
950 goto randomize;
b411b363 951 }
7da35862 952 msock.socket = s;
b411b363
PR
953 break;
954 default:
1ec861eb 955 drbd_warn(connection, "Error receiving initial packet\n");
b411b363 956 sock_release(s);
80c6eed4 957randomize:
38b682b2 958 if (prandom_u32() & 1)
b411b363
PR
959 goto retry;
960 }
961 }
962
bde89a9e 963 if (connection->cstate <= C_DISCONNECTING)
b411b363
PR
964 goto out_release_sockets;
965 if (signal_pending(current)) {
966 flush_signals(current);
967 smp_rmb();
bde89a9e 968 if (get_t_state(&connection->receiver) == EXITING)
b411b363
PR
969 goto out_release_sockets;
970 }
971
b666dbf8
PR
972 ok = drbd_socket_okay(&sock.socket);
973 ok = drbd_socket_okay(&msock.socket) && ok;
974 } while (!ok);
b411b363 975
7a426fd8
PR
976 if (ad.s_listen)
977 sock_release(ad.s_listen);
b411b363 978
98683650
PR
979 sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
980 msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
b411b363 981
7da35862
PR
982 sock.socket->sk->sk_allocation = GFP_NOIO;
983 msock.socket->sk->sk_allocation = GFP_NOIO;
b411b363 984
7da35862
PR
985 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
986 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
b411b363 987
b411b363 988 /* NOT YET ...
bde89a9e 989 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
7da35862 990 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
6038178e 991 * first set it to the P_CONNECTION_FEATURES timeout,
b411b363 992 * which we set to 4x the configured ping_timeout. */
44ed167d 993 rcu_read_lock();
bde89a9e 994 nc = rcu_dereference(connection->net_conf);
44ed167d 995
7da35862
PR
996 sock.socket->sk->sk_sndtimeo =
997 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
b411b363 998
7da35862 999 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
44ed167d 1000 timeout = nc->timeout * HZ / 10;
08b165ba 1001 discard_my_data = nc->discard_my_data;
44ed167d 1002 rcu_read_unlock();
b411b363 1003
7da35862 1004 msock.socket->sk->sk_sndtimeo = timeout;
b411b363
PR
1005
1006 /* we don't want delays.
25985edc 1007 * we use TCP_CORK where appropriate, though */
7da35862
PR
1008 drbd_tcp_nodelay(sock.socket);
1009 drbd_tcp_nodelay(msock.socket);
b411b363 1010
bde89a9e
AG
1011 connection->data.socket = sock.socket;
1012 connection->meta.socket = msock.socket;
1013 connection->last_received = jiffies;
b411b363 1014
bde89a9e 1015 h = drbd_do_features(connection);
b411b363
PR
1016 if (h <= 0)
1017 return h;
1018
bde89a9e 1019 if (connection->cram_hmac_tfm) {
b30ab791 1020 /* drbd_request_state(device, NS(conn, WFAuth)); */
bde89a9e 1021 switch (drbd_do_auth(connection)) {
b10d96cb 1022 case -1:
1ec861eb 1023 drbd_err(connection, "Authentication of peer failed\n");
b411b363 1024 return -1;
b10d96cb 1025 case 0:
1ec861eb 1026 drbd_err(connection, "Authentication of peer failed, trying again.\n");
b10d96cb 1027 return 0;
b411b363
PR
1028 }
1029 }
1030
bde89a9e
AG
1031 connection->data.socket->sk->sk_sndtimeo = timeout;
1032 connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
b411b363 1033
bde89a9e 1034 if (drbd_send_protocol(connection) == -EOPNOTSUPP)
7e2455c1 1035 return -1;
b411b363 1036
31007745
PR
1037 /* Prevent a race between resync-handshake and
1038 * being promoted to Primary.
1039 *
1040 * Grab and release the state mutex, so we know that any current
1041 * drbd_set_role() is finished, and any incoming drbd_set_role
1042 * will see the STATE_SENT flag, and wait for it to be cleared.
1043 */
1044 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1045 mutex_lock(peer_device->device->state_mutex);
1046
bde89a9e 1047 set_bit(STATE_SENT, &connection->flags);
a1096a6e 1048
31007745
PR
1049 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1050 mutex_unlock(peer_device->device->state_mutex);
1051
c141ebda 1052 rcu_read_lock();
c06ece6b
AG
1053 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1054 struct drbd_device *device = peer_device->device;
b30ab791 1055 kref_get(&device->kref);
26ea8f92
AG
1056 rcu_read_unlock();
1057
08b165ba 1058 if (discard_my_data)
b30ab791 1059 set_bit(DISCARD_MY_DATA, &device->flags);
08b165ba 1060 else
b30ab791 1061 clear_bit(DISCARD_MY_DATA, &device->flags);
08b165ba 1062
69a22773 1063 drbd_connected(peer_device);
05a10ec7 1064 kref_put(&device->kref, drbd_destroy_device);
c141ebda
PR
1065 rcu_read_lock();
1066 }
1067 rcu_read_unlock();
1068
bde89a9e
AG
1069 rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1070 if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1071 clear_bit(STATE_SENT, &connection->flags);
1e86ac48 1072 return 0;
a1096a6e 1073 }
1e86ac48 1074
bde89a9e 1075 drbd_thread_start(&connection->asender);
b411b363 1076
0500813f 1077 mutex_lock(&connection->resource->conf_update);
08b165ba
PR
1078 /* The discard_my_data flag is a single-shot modifier to the next
1079 * connection attempt, the handshake of which is now well underway.
1080 * No need for rcu style copying of the whole struct
1081 * just to clear a single value. */
bde89a9e 1082 connection->net_conf->discard_my_data = 0;
0500813f 1083 mutex_unlock(&connection->resource->conf_update);
08b165ba 1084
d3fcb490 1085 return h;
b411b363
PR
1086
1087out_release_sockets:
7a426fd8
PR
1088 if (ad.s_listen)
1089 sock_release(ad.s_listen);
7da35862
PR
1090 if (sock.socket)
1091 sock_release(sock.socket);
1092 if (msock.socket)
1093 sock_release(msock.socket);
b411b363
PR
1094 return -1;
1095}
1096
bde89a9e 1097static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
b411b363 1098{
bde89a9e 1099 unsigned int header_size = drbd_header_size(connection);
e658983a 1100
0c8e36d9
AG
1101 if (header_size == sizeof(struct p_header100) &&
1102 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1103 struct p_header100 *h = header;
1104 if (h->pad != 0) {
1ec861eb 1105 drbd_err(connection, "Header padding is not zero\n");
0c8e36d9
AG
1106 return -EINVAL;
1107 }
1108 pi->vnr = be16_to_cpu(h->volume);
1109 pi->cmd = be16_to_cpu(h->command);
1110 pi->size = be32_to_cpu(h->length);
1111 } else if (header_size == sizeof(struct p_header95) &&
1112 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
e658983a 1113 struct p_header95 *h = header;
e658983a 1114 pi->cmd = be16_to_cpu(h->command);
b55d84ba
AG
1115 pi->size = be32_to_cpu(h->length);
1116 pi->vnr = 0;
e658983a
AG
1117 } else if (header_size == sizeof(struct p_header80) &&
1118 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1119 struct p_header80 *h = header;
1120 pi->cmd = be16_to_cpu(h->command);
1121 pi->size = be16_to_cpu(h->length);
77351055 1122 pi->vnr = 0;
02918be2 1123 } else {
1ec861eb 1124 drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
e658983a 1125 be32_to_cpu(*(__be32 *)header),
bde89a9e 1126 connection->agreed_pro_version);
8172f3e9 1127 return -EINVAL;
b411b363 1128 }
e658983a 1129 pi->data = header + header_size;
8172f3e9 1130 return 0;
257d0af6 1131}
b411b363 1132
bde89a9e 1133static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
257d0af6 1134{
bde89a9e 1135 void *buffer = connection->data.rbuf;
69bc7bc3 1136 int err;
257d0af6 1137
bde89a9e 1138 err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
a5c31904 1139 if (err)
69bc7bc3 1140 return err;
257d0af6 1141
bde89a9e
AG
1142 err = decode_header(connection, buffer, pi);
1143 connection->last_received = jiffies;
b411b363 1144
69bc7bc3 1145 return err;
b411b363
PR
1146}
1147
bde89a9e 1148static void drbd_flush(struct drbd_connection *connection)
b411b363
PR
1149{
1150 int rv;
c06ece6b 1151 struct drbd_peer_device *peer_device;
4b0007c0
PR
1152 int vnr;
1153
e9526580 1154 if (connection->resource->write_ordering >= WO_bdev_flush) {
615e087f 1155 rcu_read_lock();
c06ece6b
AG
1156 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1157 struct drbd_device *device = peer_device->device;
1158
b30ab791 1159 if (!get_ldev(device))
615e087f 1160 continue;
b30ab791 1161 kref_get(&device->kref);
615e087f
LE
1162 rcu_read_unlock();
1163
b30ab791 1164 rv = blkdev_issue_flush(device->ldev->backing_bdev,
615e087f
LE
1165 GFP_NOIO, NULL);
1166 if (rv) {
d0180171 1167 drbd_info(device, "local disk flush failed with status %d\n", rv);
615e087f
LE
1168 /* would rather check on EOPNOTSUPP, but that is not reliable.
1169 * don't try again for ANY return value != 0
1170 * if (rv == -EOPNOTSUPP) */
8fe39aac 1171 drbd_bump_write_ordering(connection->resource, NULL, WO_drain_io);
4b0007c0 1172 }
b30ab791 1173 put_ldev(device);
05a10ec7 1174 kref_put(&device->kref, drbd_destroy_device);
b411b363 1175
615e087f
LE
1176 rcu_read_lock();
1177 if (rv)
1178 break;
b411b363 1179 }
615e087f 1180 rcu_read_unlock();
b411b363 1181 }
b411b363
PR
1182}
1183
1184/**
1185 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
b30ab791 1186 * @device: DRBD device.
b411b363
PR
1187 * @epoch: Epoch object.
1188 * @ev: Epoch event.
1189 */
bde89a9e 1190static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
b411b363
PR
1191 struct drbd_epoch *epoch,
1192 enum epoch_event ev)
1193{
2451fc3b 1194 int epoch_size;
b411b363 1195 struct drbd_epoch *next_epoch;
b411b363
PR
1196 enum finish_epoch rv = FE_STILL_LIVE;
1197
bde89a9e 1198 spin_lock(&connection->epoch_lock);
b411b363
PR
1199 do {
1200 next_epoch = NULL;
b411b363
PR
1201
1202 epoch_size = atomic_read(&epoch->epoch_size);
1203
1204 switch (ev & ~EV_CLEANUP) {
1205 case EV_PUT:
1206 atomic_dec(&epoch->active);
1207 break;
1208 case EV_GOT_BARRIER_NR:
1209 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
b411b363
PR
1210 break;
1211 case EV_BECAME_LAST:
1212 /* nothing to do*/
1213 break;
1214 }
1215
b411b363
PR
1216 if (epoch_size != 0 &&
1217 atomic_read(&epoch->active) == 0 &&
80f9fd55 1218 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
b411b363 1219 if (!(ev & EV_CLEANUP)) {
bde89a9e
AG
1220 spin_unlock(&connection->epoch_lock);
1221 drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1222 spin_lock(&connection->epoch_lock);
b411b363 1223 }
9ed57dcb
LE
1224#if 0
1225 /* FIXME: dec unacked on connection, once we have
1226 * something to count pending connection packets in. */
80f9fd55 1227 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
bde89a9e 1228 dec_unacked(epoch->connection);
9ed57dcb 1229#endif
b411b363 1230
bde89a9e 1231 if (connection->current_epoch != epoch) {
b411b363
PR
1232 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1233 list_del(&epoch->list);
1234 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
bde89a9e 1235 connection->epochs--;
b411b363
PR
1236 kfree(epoch);
1237
1238 if (rv == FE_STILL_LIVE)
1239 rv = FE_DESTROYED;
1240 } else {
1241 epoch->flags = 0;
1242 atomic_set(&epoch->epoch_size, 0);
698f9315 1243 /* atomic_set(&epoch->active, 0); is already zero */
b411b363
PR
1244 if (rv == FE_STILL_LIVE)
1245 rv = FE_RECYCLED;
1246 }
1247 }
1248
1249 if (!next_epoch)
1250 break;
1251
1252 epoch = next_epoch;
1253 } while (1);
1254
bde89a9e 1255 spin_unlock(&connection->epoch_lock);
b411b363 1256
b411b363
PR
1257 return rv;
1258}
1259
8fe39aac
PR
1260static enum write_ordering_e
1261max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
1262{
1263 struct disk_conf *dc;
1264
1265 dc = rcu_dereference(bdev->disk_conf);
1266
1267 if (wo == WO_bdev_flush && !dc->disk_flushes)
1268 wo = WO_drain_io;
1269 if (wo == WO_drain_io && !dc->disk_drain)
1270 wo = WO_none;
1271
1272 return wo;
1273}
1274
b411b363
PR
1275/**
1276 * drbd_bump_write_ordering() - Fall back to an other write ordering method
bde89a9e 1277 * @connection: DRBD connection.
b411b363
PR
1278 * @wo: Write ordering method to try.
1279 */
8fe39aac
PR
1280void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
1281 enum write_ordering_e wo)
b411b363 1282{
e9526580 1283 struct drbd_device *device;
b411b363 1284 enum write_ordering_e pwo;
4b0007c0 1285 int vnr;
b411b363
PR
1286 static char *write_ordering_str[] = {
1287 [WO_none] = "none",
1288 [WO_drain_io] = "drain",
1289 [WO_bdev_flush] = "flush",
b411b363
PR
1290 };
1291
e9526580 1292 pwo = resource->write_ordering;
70df7092
LE
1293 if (wo != WO_bdev_flush)
1294 wo = min(pwo, wo);
daeda1cc 1295 rcu_read_lock();
e9526580 1296 idr_for_each_entry(&resource->devices, device, vnr) {
8fe39aac
PR
1297 if (get_ldev(device)) {
1298 wo = max_allowed_wo(device->ldev, wo);
1299 if (device->ldev == bdev)
1300 bdev = NULL;
1301 put_ldev(device);
1302 }
4b0007c0 1303 }
8fe39aac
PR
1304
1305 if (bdev)
1306 wo = max_allowed_wo(bdev, wo);
1307
70df7092
LE
1308 rcu_read_unlock();
1309
e9526580
PR
1310 resource->write_ordering = wo;
1311 if (pwo != resource->write_ordering || wo == WO_bdev_flush)
1312 drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
b411b363
PR
1313}
1314
45bb912b 1315/**
fbe29dec 1316 * drbd_submit_peer_request()
b30ab791 1317 * @device: DRBD device.
db830c46 1318 * @peer_req: peer request
45bb912b 1319 * @rw: flag field, see bio->bi_rw
10f6d992
LE
1320 *
1321 * May spread the pages to multiple bios,
1322 * depending on bio_add_page restrictions.
1323 *
1324 * Returns 0 if all bios have been submitted,
1325 * -ENOMEM if we could not allocate enough bios,
1326 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1327 * single page to an empty bio (which should never happen and likely indicates
1328 * that the lower level IO stack is in some way broken). This has been observed
1329 * on certain Xen deployments.
45bb912b
LE
1330 */
1331/* TODO allocate from our own bio_set. */
b30ab791 1332int drbd_submit_peer_request(struct drbd_device *device,
fbe29dec
AG
1333 struct drbd_peer_request *peer_req,
1334 const unsigned rw, const int fault_type)
45bb912b
LE
1335{
1336 struct bio *bios = NULL;
1337 struct bio *bio;
db830c46
AG
1338 struct page *page = peer_req->pages;
1339 sector_t sector = peer_req->i.sector;
1340 unsigned ds = peer_req->i.size;
45bb912b
LE
1341 unsigned n_bios = 0;
1342 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
10f6d992 1343 int err = -ENOMEM;
45bb912b 1344
a0fb3c47
LE
1345 if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) {
1346 /* wait for all pending IO completions, before we start
1347 * zeroing things out. */
1348 conn_wait_active_ee_empty(first_peer_device(device)->connection);
1349 if (blkdev_issue_zeroout(device->ldev->backing_bdev,
1350 sector, ds >> 9, GFP_NOIO))
1351 peer_req->flags |= EE_WAS_ERROR;
1352 drbd_endio_write_sec_final(peer_req);
1353 return 0;
1354 }
1355
54ed4ed8
LE
1356 /* Discards don't have any payload.
1357 * But the scsi layer still expects a bio_vec it can use internally,
1358 * see sd_setup_discard_cmnd() and blk_add_request_payload(). */
a0fb3c47 1359 if (peer_req->flags & EE_IS_TRIM)
54ed4ed8 1360 nr_pages = 1;
a0fb3c47 1361
45bb912b
LE
1362 /* In most cases, we will only need one bio. But in case the lower
1363 * level restrictions happen to be different at this offset on this
1364 * side than those of the sending peer, we may need to submit the
9476f39d
LE
1365 * request in more than one bio.
1366 *
1367 * Plain bio_alloc is good enough here, this is no DRBD internally
1368 * generated bio, but a bio allocated on behalf of the peer.
1369 */
45bb912b
LE
1370next_bio:
1371 bio = bio_alloc(GFP_NOIO, nr_pages);
1372 if (!bio) {
a0fb3c47 1373 drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
45bb912b
LE
1374 goto fail;
1375 }
db830c46 1376 /* > peer_req->i.sector, unless this is the first bio */
4f024f37 1377 bio->bi_iter.bi_sector = sector;
b30ab791 1378 bio->bi_bdev = device->ldev->backing_bdev;
45bb912b 1379 bio->bi_rw = rw;
db830c46 1380 bio->bi_private = peer_req;
fcefa62e 1381 bio->bi_end_io = drbd_peer_request_endio;
45bb912b
LE
1382
1383 bio->bi_next = bios;
1384 bios = bio;
1385 ++n_bios;
1386
a0fb3c47
LE
1387 if (rw & REQ_DISCARD) {
1388 bio->bi_iter.bi_size = ds;
1389 goto submit;
1390 }
1391
45bb912b
LE
1392 page_chain_for_each(page) {
1393 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1394 if (!bio_add_page(bio, page, len, 0)) {
10f6d992
LE
1395 /* A single page must always be possible!
1396 * But in case it fails anyways,
1397 * we deal with it, and complain (below). */
1398 if (bio->bi_vcnt == 0) {
d0180171 1399 drbd_err(device,
10f6d992
LE
1400 "bio_add_page failed for len=%u, "
1401 "bi_vcnt=0 (bi_sector=%llu)\n",
4f024f37 1402 len, (uint64_t)bio->bi_iter.bi_sector);
10f6d992
LE
1403 err = -ENOSPC;
1404 goto fail;
1405 }
45bb912b
LE
1406 goto next_bio;
1407 }
1408 ds -= len;
1409 sector += len >> 9;
1410 --nr_pages;
1411 }
0b0ba1ef 1412 D_ASSERT(device, ds == 0);
a0fb3c47
LE
1413submit:
1414 D_ASSERT(device, page == NULL);
45bb912b 1415
db830c46 1416 atomic_set(&peer_req->pending_bios, n_bios);
45bb912b
LE
1417 do {
1418 bio = bios;
1419 bios = bios->bi_next;
1420 bio->bi_next = NULL;
1421
b30ab791 1422 drbd_generic_make_request(device, fault_type, bio);
45bb912b 1423 } while (bios);
45bb912b
LE
1424 return 0;
1425
1426fail:
1427 while (bios) {
1428 bio = bios;
1429 bios = bios->bi_next;
1430 bio_put(bio);
1431 }
10f6d992 1432 return err;
45bb912b
LE
1433}
1434
b30ab791 1435static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
db830c46 1436 struct drbd_peer_request *peer_req)
53840641 1437{
db830c46 1438 struct drbd_interval *i = &peer_req->i;
53840641 1439
b30ab791 1440 drbd_remove_interval(&device->write_requests, i);
53840641
AG
1441 drbd_clear_interval(i);
1442
6c852bec 1443 /* Wake up any processes waiting for this peer request to complete. */
53840641 1444 if (i->waiting)
b30ab791 1445 wake_up(&device->misc_wait);
53840641
AG
1446}
1447
bde89a9e 1448static void conn_wait_active_ee_empty(struct drbd_connection *connection)
77fede51 1449{
c06ece6b 1450 struct drbd_peer_device *peer_device;
77fede51
PR
1451 int vnr;
1452
1453 rcu_read_lock();
c06ece6b
AG
1454 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1455 struct drbd_device *device = peer_device->device;
1456
b30ab791 1457 kref_get(&device->kref);
77fede51 1458 rcu_read_unlock();
b30ab791 1459 drbd_wait_ee_list_empty(device, &device->active_ee);
05a10ec7 1460 kref_put(&device->kref, drbd_destroy_device);
77fede51
PR
1461 rcu_read_lock();
1462 }
1463 rcu_read_unlock();
1464}
1465
9f4fe9ad
AG
1466static struct drbd_peer_device *
1467conn_peer_device(struct drbd_connection *connection, int volume_number)
1468{
1469 return idr_find(&connection->peer_devices, volume_number);
1470}
1471
bde89a9e 1472static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
b411b363 1473{
2451fc3b 1474 int rv;
e658983a 1475 struct p_barrier *p = pi->data;
b411b363
PR
1476 struct drbd_epoch *epoch;
1477
9ed57dcb
LE
1478 /* FIXME these are unacked on connection,
1479 * not a specific (peer)device.
1480 */
bde89a9e
AG
1481 connection->current_epoch->barrier_nr = p->barrier;
1482 connection->current_epoch->connection = connection;
1483 rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
b411b363
PR
1484
1485 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1486 * the activity log, which means it would not be resynced in case the
1487 * R_PRIMARY crashes now.
1488 * Therefore we must send the barrier_ack after the barrier request was
1489 * completed. */
e9526580 1490 switch (connection->resource->write_ordering) {
b411b363
PR
1491 case WO_none:
1492 if (rv == FE_RECYCLED)
82bc0194 1493 return 0;
2451fc3b
PR
1494
1495 /* receiver context, in the writeout path of the other node.
1496 * avoid potential distributed deadlock */
1497 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1498 if (epoch)
1499 break;
1500 else
1ec861eb 1501 drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
2451fc3b 1502 /* Fall through */
b411b363
PR
1503
1504 case WO_bdev_flush:
1505 case WO_drain_io:
bde89a9e
AG
1506 conn_wait_active_ee_empty(connection);
1507 drbd_flush(connection);
2451fc3b 1508
bde89a9e 1509 if (atomic_read(&connection->current_epoch->epoch_size)) {
2451fc3b
PR
1510 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1511 if (epoch)
1512 break;
b411b363
PR
1513 }
1514
82bc0194 1515 return 0;
2451fc3b 1516 default:
e9526580
PR
1517 drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
1518 connection->resource->write_ordering);
82bc0194 1519 return -EIO;
b411b363
PR
1520 }
1521
1522 epoch->flags = 0;
1523 atomic_set(&epoch->epoch_size, 0);
1524 atomic_set(&epoch->active, 0);
1525
bde89a9e
AG
1526 spin_lock(&connection->epoch_lock);
1527 if (atomic_read(&connection->current_epoch->epoch_size)) {
1528 list_add(&epoch->list, &connection->current_epoch->list);
1529 connection->current_epoch = epoch;
1530 connection->epochs++;
b411b363
PR
1531 } else {
1532 /* The current_epoch got recycled while we allocated this one... */
1533 kfree(epoch);
1534 }
bde89a9e 1535 spin_unlock(&connection->epoch_lock);
b411b363 1536
82bc0194 1537 return 0;
b411b363
PR
1538}
1539
1540/* used from receive_RSDataReply (recv_resync_read)
1541 * and from receive_Data */
f6ffca9f 1542static struct drbd_peer_request *
69a22773 1543read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
a0fb3c47 1544 struct packet_info *pi) __must_hold(local)
b411b363 1545{
69a22773 1546 struct drbd_device *device = peer_device->device;
b30ab791 1547 const sector_t capacity = drbd_get_capacity(device->this_bdev);
db830c46 1548 struct drbd_peer_request *peer_req;
b411b363 1549 struct page *page;
a5c31904 1550 int dgs, ds, err;
a0fb3c47 1551 int data_size = pi->size;
69a22773
AG
1552 void *dig_in = peer_device->connection->int_dig_in;
1553 void *dig_vv = peer_device->connection->int_dig_vv;
6b4388ac 1554 unsigned long *data;
a0fb3c47 1555 struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
b411b363 1556
88104ca4 1557 dgs = 0;
a0fb3c47 1558 if (!trim && peer_device->connection->peer_integrity_tfm) {
69a22773 1559 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
9f5bdc33
AG
1560 /*
1561 * FIXME: Receive the incoming digest into the receive buffer
1562 * here, together with its struct p_data?
1563 */
69a22773 1564 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
a5c31904 1565 if (err)
b411b363 1566 return NULL;
88104ca4 1567 data_size -= dgs;
b411b363
PR
1568 }
1569
a0fb3c47
LE
1570 if (trim) {
1571 D_ASSERT(peer_device, data_size == 0);
1572 data_size = be32_to_cpu(trim->size);
1573 }
1574
841ce241
AG
1575 if (!expect(IS_ALIGNED(data_size, 512)))
1576 return NULL;
a0fb3c47
LE
1577 /* prepare for larger trim requests. */
1578 if (!trim && !expect(data_size <= DRBD_MAX_BIO_SIZE))
841ce241 1579 return NULL;
b411b363 1580
6666032a
LE
1581 /* even though we trust out peer,
1582 * we sometimes have to double check. */
1583 if (sector + (data_size>>9) > capacity) {
d0180171 1584 drbd_err(device, "request from peer beyond end of local disk: "
fdda6544 1585 "capacity: %llus < sector: %llus + size: %u\n",
6666032a
LE
1586 (unsigned long long)capacity,
1587 (unsigned long long)sector, data_size);
1588 return NULL;
1589 }
1590
b411b363
PR
1591 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1592 * "criss-cross" setup, that might cause write-out on some other DRBD,
1593 * which in turn might block on the other node at this very place. */
a0fb3c47 1594 peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, trim == NULL, GFP_NOIO);
db830c46 1595 if (!peer_req)
b411b363 1596 return NULL;
45bb912b 1597
a0fb3c47 1598 if (trim)
81a3537a 1599 return peer_req;
a73ff323 1600
b411b363 1601 ds = data_size;
db830c46 1602 page = peer_req->pages;
45bb912b
LE
1603 page_chain_for_each(page) {
1604 unsigned len = min_t(int, ds, PAGE_SIZE);
6b4388ac 1605 data = kmap(page);
69a22773 1606 err = drbd_recv_all_warn(peer_device->connection, data, len);
b30ab791 1607 if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
d0180171 1608 drbd_err(device, "Fault injection: Corrupting data on receive\n");
6b4388ac
PR
1609 data[0] = data[0] ^ (unsigned long)-1;
1610 }
b411b363 1611 kunmap(page);
a5c31904 1612 if (err) {
b30ab791 1613 drbd_free_peer_req(device, peer_req);
b411b363
PR
1614 return NULL;
1615 }
a5c31904 1616 ds -= len;
b411b363
PR
1617 }
1618
1619 if (dgs) {
69a22773 1620 drbd_csum_ee(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv);
b411b363 1621 if (memcmp(dig_in, dig_vv, dgs)) {
d0180171 1622 drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
470be44a 1623 (unsigned long long)sector, data_size);
b30ab791 1624 drbd_free_peer_req(device, peer_req);
b411b363
PR
1625 return NULL;
1626 }
1627 }
b30ab791 1628 device->recv_cnt += data_size>>9;
db830c46 1629 return peer_req;
b411b363
PR
1630}
1631
1632/* drbd_drain_block() just takes a data block
1633 * out of the socket input buffer, and discards it.
1634 */
69a22773 1635static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
b411b363
PR
1636{
1637 struct page *page;
a5c31904 1638 int err = 0;
b411b363
PR
1639 void *data;
1640
c3470cde 1641 if (!data_size)
fc5be839 1642 return 0;
c3470cde 1643
69a22773 1644 page = drbd_alloc_pages(peer_device, 1, 1);
b411b363
PR
1645
1646 data = kmap(page);
1647 while (data_size) {
fc5be839
AG
1648 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1649
69a22773 1650 err = drbd_recv_all_warn(peer_device->connection, data, len);
a5c31904 1651 if (err)
b411b363 1652 break;
a5c31904 1653 data_size -= len;
b411b363
PR
1654 }
1655 kunmap(page);
69a22773 1656 drbd_free_pages(peer_device->device, page, 0);
fc5be839 1657 return err;
b411b363
PR
1658}
1659
69a22773 1660static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
b411b363
PR
1661 sector_t sector, int data_size)
1662{
7988613b
KO
1663 struct bio_vec bvec;
1664 struct bvec_iter iter;
b411b363 1665 struct bio *bio;
7988613b 1666 int dgs, err, expect;
69a22773
AG
1667 void *dig_in = peer_device->connection->int_dig_in;
1668 void *dig_vv = peer_device->connection->int_dig_vv;
b411b363 1669
88104ca4 1670 dgs = 0;
69a22773
AG
1671 if (peer_device->connection->peer_integrity_tfm) {
1672 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
1673 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
a5c31904
AG
1674 if (err)
1675 return err;
88104ca4 1676 data_size -= dgs;
b411b363
PR
1677 }
1678
b411b363
PR
1679 /* optimistically update recv_cnt. if receiving fails below,
1680 * we disconnect anyways, and counters will be reset. */
69a22773 1681 peer_device->device->recv_cnt += data_size>>9;
b411b363
PR
1682
1683 bio = req->master_bio;
69a22773 1684 D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
b411b363 1685
7988613b
KO
1686 bio_for_each_segment(bvec, bio, iter) {
1687 void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
1688 expect = min_t(int, data_size, bvec.bv_len);
69a22773 1689 err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
7988613b 1690 kunmap(bvec.bv_page);
a5c31904
AG
1691 if (err)
1692 return err;
1693 data_size -= expect;
b411b363
PR
1694 }
1695
1696 if (dgs) {
69a22773 1697 drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
b411b363 1698 if (memcmp(dig_in, dig_vv, dgs)) {
69a22773 1699 drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
28284cef 1700 return -EINVAL;
b411b363
PR
1701 }
1702 }
1703
69a22773 1704 D_ASSERT(peer_device->device, data_size == 0);
28284cef 1705 return 0;
b411b363
PR
1706}
1707
a990be46
AG
1708/*
1709 * e_end_resync_block() is called in asender context via
1710 * drbd_finish_peer_reqs().
1711 */
99920dc5 1712static int e_end_resync_block(struct drbd_work *w, int unused)
b411b363 1713{
8050e6d0 1714 struct drbd_peer_request *peer_req =
a8cd15ba
AG
1715 container_of(w, struct drbd_peer_request, w);
1716 struct drbd_peer_device *peer_device = peer_req->peer_device;
1717 struct drbd_device *device = peer_device->device;
db830c46 1718 sector_t sector = peer_req->i.sector;
99920dc5 1719 int err;
b411b363 1720
0b0ba1ef 1721 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
b411b363 1722
db830c46 1723 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b30ab791 1724 drbd_set_in_sync(device, sector, peer_req->i.size);
a8cd15ba 1725 err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
b411b363
PR
1726 } else {
1727 /* Record failure to sync */
b30ab791 1728 drbd_rs_failed_io(device, sector, peer_req->i.size);
b411b363 1729
a8cd15ba 1730 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
b411b363 1731 }
b30ab791 1732 dec_unacked(device);
b411b363 1733
99920dc5 1734 return err;
b411b363
PR
1735}
1736
69a22773 1737static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
a0fb3c47 1738 struct packet_info *pi) __releases(local)
b411b363 1739{
69a22773 1740 struct drbd_device *device = peer_device->device;
db830c46 1741 struct drbd_peer_request *peer_req;
b411b363 1742
a0fb3c47 1743 peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
db830c46 1744 if (!peer_req)
45bb912b 1745 goto fail;
b411b363 1746
b30ab791 1747 dec_rs_pending(device);
b411b363 1748
b30ab791 1749 inc_unacked(device);
b411b363
PR
1750 /* corresponding dec_unacked() in e_end_resync_block()
1751 * respective _drbd_clear_done_ee */
1752
a8cd15ba 1753 peer_req->w.cb = e_end_resync_block;
45bb912b 1754
0500813f 1755 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 1756 list_add(&peer_req->w.list, &device->sync_ee);
0500813f 1757 spin_unlock_irq(&device->resource->req_lock);
b411b363 1758
a0fb3c47 1759 atomic_add(pi->size >> 9, &device->rs_sect_ev);
b30ab791 1760 if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
e1c1b0fc 1761 return 0;
b411b363 1762
10f6d992 1763 /* don't care for the reason here */
d0180171 1764 drbd_err(device, "submit failed, triggering re-connect\n");
0500813f 1765 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 1766 list_del(&peer_req->w.list);
0500813f 1767 spin_unlock_irq(&device->resource->req_lock);
22cc37a9 1768
b30ab791 1769 drbd_free_peer_req(device, peer_req);
45bb912b 1770fail:
b30ab791 1771 put_ldev(device);
e1c1b0fc 1772 return -EIO;
b411b363
PR
1773}
1774
668eebc6 1775static struct drbd_request *
b30ab791 1776find_request(struct drbd_device *device, struct rb_root *root, u64 id,
bc9c5c41 1777 sector_t sector, bool missing_ok, const char *func)
51624585 1778{
51624585
AG
1779 struct drbd_request *req;
1780
bc9c5c41
AG
1781 /* Request object according to our peer */
1782 req = (struct drbd_request *)(unsigned long)id;
5e472264 1783 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
668eebc6 1784 return req;
c3afd8f5 1785 if (!missing_ok) {
d0180171 1786 drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
c3afd8f5
AG
1787 (unsigned long)id, (unsigned long long)sector);
1788 }
51624585 1789 return NULL;
b411b363
PR
1790}
1791
bde89a9e 1792static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 1793{
9f4fe9ad 1794 struct drbd_peer_device *peer_device;
b30ab791 1795 struct drbd_device *device;
b411b363
PR
1796 struct drbd_request *req;
1797 sector_t sector;
82bc0194 1798 int err;
e658983a 1799 struct p_data *p = pi->data;
4a76b161 1800
9f4fe9ad
AG
1801 peer_device = conn_peer_device(connection, pi->vnr);
1802 if (!peer_device)
4a76b161 1803 return -EIO;
9f4fe9ad 1804 device = peer_device->device;
b411b363
PR
1805
1806 sector = be64_to_cpu(p->sector);
1807
0500813f 1808 spin_lock_irq(&device->resource->req_lock);
b30ab791 1809 req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
0500813f 1810 spin_unlock_irq(&device->resource->req_lock);
c3afd8f5 1811 if (unlikely(!req))
82bc0194 1812 return -EIO;
b411b363 1813
24c4830c 1814 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
b411b363
PR
1815 * special casing it there for the various failure cases.
1816 * still no race with drbd_fail_pending_reads */
69a22773 1817 err = recv_dless_read(peer_device, req, sector, pi->size);
82bc0194 1818 if (!err)
8554df1c 1819 req_mod(req, DATA_RECEIVED);
b411b363
PR
1820 /* else: nothing. handled from drbd_disconnect...
1821 * I don't think we may complete this just yet
1822 * in case we are "on-disconnect: freeze" */
1823
82bc0194 1824 return err;
b411b363
PR
1825}
1826
bde89a9e 1827static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 1828{
9f4fe9ad 1829 struct drbd_peer_device *peer_device;
b30ab791 1830 struct drbd_device *device;
b411b363 1831 sector_t sector;
82bc0194 1832 int err;
e658983a 1833 struct p_data *p = pi->data;
4a76b161 1834
9f4fe9ad
AG
1835 peer_device = conn_peer_device(connection, pi->vnr);
1836 if (!peer_device)
4a76b161 1837 return -EIO;
9f4fe9ad 1838 device = peer_device->device;
b411b363
PR
1839
1840 sector = be64_to_cpu(p->sector);
0b0ba1ef 1841 D_ASSERT(device, p->block_id == ID_SYNCER);
b411b363 1842
b30ab791 1843 if (get_ldev(device)) {
b411b363
PR
1844 /* data is submitted to disk within recv_resync_read.
1845 * corresponding put_ldev done below on error,
fcefa62e 1846 * or in drbd_peer_request_endio. */
a0fb3c47 1847 err = recv_resync_read(peer_device, sector, pi);
b411b363
PR
1848 } else {
1849 if (__ratelimit(&drbd_ratelimit_state))
d0180171 1850 drbd_err(device, "Can not write resync data to local disk.\n");
b411b363 1851
69a22773 1852 err = drbd_drain_block(peer_device, pi->size);
b411b363 1853
69a22773 1854 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
b411b363
PR
1855 }
1856
b30ab791 1857 atomic_add(pi->size >> 9, &device->rs_sect_in);
778f271d 1858
82bc0194 1859 return err;
b411b363
PR
1860}
1861
b30ab791 1862static void restart_conflicting_writes(struct drbd_device *device,
7be8da07 1863 sector_t sector, int size)
b411b363 1864{
7be8da07
AG
1865 struct drbd_interval *i;
1866 struct drbd_request *req;
1867
b30ab791 1868 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
7be8da07
AG
1869 if (!i->local)
1870 continue;
1871 req = container_of(i, struct drbd_request, i);
1872 if (req->rq_state & RQ_LOCAL_PENDING ||
1873 !(req->rq_state & RQ_POSTPONED))
1874 continue;
2312f0b3
LE
1875 /* as it is RQ_POSTPONED, this will cause it to
1876 * be queued on the retry workqueue. */
d4dabbe2 1877 __req_mod(req, CONFLICT_RESOLVED, NULL);
7be8da07
AG
1878 }
1879}
b411b363 1880
a990be46
AG
1881/*
1882 * e_end_block() is called in asender context via drbd_finish_peer_reqs().
b411b363 1883 */
99920dc5 1884static int e_end_block(struct drbd_work *w, int cancel)
b411b363 1885{
8050e6d0 1886 struct drbd_peer_request *peer_req =
a8cd15ba
AG
1887 container_of(w, struct drbd_peer_request, w);
1888 struct drbd_peer_device *peer_device = peer_req->peer_device;
1889 struct drbd_device *device = peer_device->device;
db830c46 1890 sector_t sector = peer_req->i.sector;
99920dc5 1891 int err = 0, pcmd;
b411b363 1892
303d1448 1893 if (peer_req->flags & EE_SEND_WRITE_ACK) {
db830c46 1894 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b30ab791
AG
1895 pcmd = (device->state.conn >= C_SYNC_SOURCE &&
1896 device->state.conn <= C_PAUSED_SYNC_T &&
db830c46 1897 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
b411b363 1898 P_RS_WRITE_ACK : P_WRITE_ACK;
a8cd15ba 1899 err = drbd_send_ack(peer_device, pcmd, peer_req);
b411b363 1900 if (pcmd == P_RS_WRITE_ACK)
b30ab791 1901 drbd_set_in_sync(device, sector, peer_req->i.size);
b411b363 1902 } else {
a8cd15ba 1903 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
b411b363
PR
1904 /* we expect it to be marked out of sync anyways...
1905 * maybe assert this? */
1906 }
b30ab791 1907 dec_unacked(device);
b411b363
PR
1908 }
1909 /* we delete from the conflict detection hash _after_ we sent out the
1910 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
302bdeae 1911 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
0500813f 1912 spin_lock_irq(&device->resource->req_lock);
0b0ba1ef 1913 D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
b30ab791 1914 drbd_remove_epoch_entry_interval(device, peer_req);
7be8da07 1915 if (peer_req->flags & EE_RESTART_REQUESTS)
b30ab791 1916 restart_conflicting_writes(device, sector, peer_req->i.size);
0500813f 1917 spin_unlock_irq(&device->resource->req_lock);
bb3bfe96 1918 } else
0b0ba1ef 1919 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
b411b363 1920
a6b32bc3 1921 drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
b411b363 1922
99920dc5 1923 return err;
b411b363
PR
1924}
1925
a8cd15ba 1926static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
b411b363 1927{
8050e6d0 1928 struct drbd_peer_request *peer_req =
a8cd15ba
AG
1929 container_of(w, struct drbd_peer_request, w);
1930 struct drbd_peer_device *peer_device = peer_req->peer_device;
99920dc5 1931 int err;
b411b363 1932
a8cd15ba
AG
1933 err = drbd_send_ack(peer_device, ack, peer_req);
1934 dec_unacked(peer_device->device);
b411b363 1935
99920dc5 1936 return err;
b411b363
PR
1937}
1938
d4dabbe2 1939static int e_send_superseded(struct drbd_work *w, int unused)
7be8da07 1940{
a8cd15ba 1941 return e_send_ack(w, P_SUPERSEDED);
7be8da07
AG
1942}
1943
99920dc5 1944static int e_send_retry_write(struct drbd_work *w, int unused)
7be8da07 1945{
a8cd15ba
AG
1946 struct drbd_peer_request *peer_req =
1947 container_of(w, struct drbd_peer_request, w);
1948 struct drbd_connection *connection = peer_req->peer_device->connection;
7be8da07 1949
a8cd15ba 1950 return e_send_ack(w, connection->agreed_pro_version >= 100 ?
d4dabbe2 1951 P_RETRY_WRITE : P_SUPERSEDED);
7be8da07 1952}
b411b363 1953
3e394da1
AG
1954static bool seq_greater(u32 a, u32 b)
1955{
1956 /*
1957 * We assume 32-bit wrap-around here.
1958 * For 24-bit wrap-around, we would have to shift:
1959 * a <<= 8; b <<= 8;
1960 */
1961 return (s32)a - (s32)b > 0;
1962}
b411b363 1963
3e394da1
AG
1964static u32 seq_max(u32 a, u32 b)
1965{
1966 return seq_greater(a, b) ? a : b;
b411b363
PR
1967}
1968
69a22773 1969static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
3e394da1 1970{
69a22773 1971 struct drbd_device *device = peer_device->device;
3c13b680 1972 unsigned int newest_peer_seq;
3e394da1 1973
69a22773 1974 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
b30ab791
AG
1975 spin_lock(&device->peer_seq_lock);
1976 newest_peer_seq = seq_max(device->peer_seq, peer_seq);
1977 device->peer_seq = newest_peer_seq;
1978 spin_unlock(&device->peer_seq_lock);
1979 /* wake up only if we actually changed device->peer_seq */
3c13b680 1980 if (peer_seq == newest_peer_seq)
b30ab791 1981 wake_up(&device->seq_wait);
7be8da07 1982 }
b411b363
PR
1983}
1984
d93f6302 1985static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
b6a370ba 1986{
d93f6302
LE
1987 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
1988}
b6a370ba 1989
d93f6302 1990/* maybe change sync_ee into interval trees as well? */
b30ab791 1991static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
d93f6302
LE
1992{
1993 struct drbd_peer_request *rs_req;
b6a370ba
PR
1994 bool rv = 0;
1995
0500813f 1996 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 1997 list_for_each_entry(rs_req, &device->sync_ee, w.list) {
d93f6302
LE
1998 if (overlaps(peer_req->i.sector, peer_req->i.size,
1999 rs_req->i.sector, rs_req->i.size)) {
b6a370ba
PR
2000 rv = 1;
2001 break;
2002 }
2003 }
0500813f 2004 spin_unlock_irq(&device->resource->req_lock);
b6a370ba
PR
2005
2006 return rv;
2007}
2008
b411b363
PR
2009/* Called from receive_Data.
2010 * Synchronize packets on sock with packets on msock.
2011 *
2012 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
2013 * packet traveling on msock, they are still processed in the order they have
2014 * been sent.
2015 *
2016 * Note: we don't care for Ack packets overtaking P_DATA packets.
2017 *
b30ab791 2018 * In case packet_seq is larger than device->peer_seq number, there are
b411b363 2019 * outstanding packets on the msock. We wait for them to arrive.
b30ab791 2020 * In case we are the logically next packet, we update device->peer_seq
b411b363
PR
2021 * ourselves. Correctly handles 32bit wrap around.
2022 *
2023 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
2024 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
2025 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
2026 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
2027 *
2028 * returns 0 if we may process the packet,
2029 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
69a22773 2030static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
b411b363 2031{
69a22773 2032 struct drbd_device *device = peer_device->device;
b411b363 2033 DEFINE_WAIT(wait);
b411b363 2034 long timeout;
b874d231 2035 int ret = 0, tp;
7be8da07 2036
69a22773 2037 if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
7be8da07
AG
2038 return 0;
2039
b30ab791 2040 spin_lock(&device->peer_seq_lock);
b411b363 2041 for (;;) {
b30ab791
AG
2042 if (!seq_greater(peer_seq - 1, device->peer_seq)) {
2043 device->peer_seq = seq_max(device->peer_seq, peer_seq);
b411b363 2044 break;
7be8da07 2045 }
b874d231 2046
b411b363
PR
2047 if (signal_pending(current)) {
2048 ret = -ERESTARTSYS;
2049 break;
2050 }
b874d231
PR
2051
2052 rcu_read_lock();
a6b32bc3 2053 tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries;
b874d231
PR
2054 rcu_read_unlock();
2055
2056 if (!tp)
2057 break;
2058
2059 /* Only need to wait if two_primaries is enabled */
b30ab791
AG
2060 prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2061 spin_unlock(&device->peer_seq_lock);
44ed167d 2062 rcu_read_lock();
69a22773 2063 timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
44ed167d 2064 rcu_read_unlock();
71b1c1eb 2065 timeout = schedule_timeout(timeout);
b30ab791 2066 spin_lock(&device->peer_seq_lock);
7be8da07 2067 if (!timeout) {
b411b363 2068 ret = -ETIMEDOUT;
d0180171 2069 drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
b411b363
PR
2070 break;
2071 }
2072 }
b30ab791
AG
2073 spin_unlock(&device->peer_seq_lock);
2074 finish_wait(&device->seq_wait, &wait);
b411b363
PR
2075 return ret;
2076}
2077
688593c5
LE
2078/* see also bio_flags_to_wire()
2079 * DRBD_REQ_*, because we need to semantically map the flags to data packet
2080 * flags and back. We may replicate to other kernel versions. */
81f0ffd2 2081static unsigned long wire_flags_to_bio(u32 dpf)
76d2e7ec 2082{
688593c5
LE
2083 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
2084 (dpf & DP_FUA ? REQ_FUA : 0) |
2085 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
2086 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
76d2e7ec
PR
2087}
2088
b30ab791 2089static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
7be8da07
AG
2090 unsigned int size)
2091{
2092 struct drbd_interval *i;
2093
2094 repeat:
b30ab791 2095 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
7be8da07
AG
2096 struct drbd_request *req;
2097 struct bio_and_error m;
2098
2099 if (!i->local)
2100 continue;
2101 req = container_of(i, struct drbd_request, i);
2102 if (!(req->rq_state & RQ_POSTPONED))
2103 continue;
2104 req->rq_state &= ~RQ_POSTPONED;
2105 __req_mod(req, NEG_ACKED, &m);
0500813f 2106 spin_unlock_irq(&device->resource->req_lock);
7be8da07 2107 if (m.bio)
b30ab791 2108 complete_master_bio(device, &m);
0500813f 2109 spin_lock_irq(&device->resource->req_lock);
7be8da07
AG
2110 goto repeat;
2111 }
2112}
2113
b30ab791 2114static int handle_write_conflicts(struct drbd_device *device,
7be8da07
AG
2115 struct drbd_peer_request *peer_req)
2116{
e33b32de 2117 struct drbd_connection *connection = peer_req->peer_device->connection;
bde89a9e 2118 bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
7be8da07
AG
2119 sector_t sector = peer_req->i.sector;
2120 const unsigned int size = peer_req->i.size;
2121 struct drbd_interval *i;
2122 bool equal;
2123 int err;
2124
2125 /*
2126 * Inserting the peer request into the write_requests tree will prevent
2127 * new conflicting local requests from being added.
2128 */
b30ab791 2129 drbd_insert_interval(&device->write_requests, &peer_req->i);
7be8da07
AG
2130
2131 repeat:
b30ab791 2132 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
7be8da07
AG
2133 if (i == &peer_req->i)
2134 continue;
2135
2136 if (!i->local) {
2137 /*
2138 * Our peer has sent a conflicting remote request; this
2139 * should not happen in a two-node setup. Wait for the
2140 * earlier peer request to complete.
2141 */
b30ab791 2142 err = drbd_wait_misc(device, i);
7be8da07
AG
2143 if (err)
2144 goto out;
2145 goto repeat;
2146 }
2147
2148 equal = i->sector == sector && i->size == size;
2149 if (resolve_conflicts) {
2150 /*
2151 * If the peer request is fully contained within the
d4dabbe2
LE
2152 * overlapping request, it can be considered overwritten
2153 * and thus superseded; otherwise, it will be retried
2154 * once all overlapping requests have completed.
7be8da07 2155 */
d4dabbe2 2156 bool superseded = i->sector <= sector && i->sector +
7be8da07
AG
2157 (i->size >> 9) >= sector + (size >> 9);
2158
2159 if (!equal)
d0180171 2160 drbd_alert(device, "Concurrent writes detected: "
7be8da07
AG
2161 "local=%llus +%u, remote=%llus +%u, "
2162 "assuming %s came first\n",
2163 (unsigned long long)i->sector, i->size,
2164 (unsigned long long)sector, size,
d4dabbe2 2165 superseded ? "local" : "remote");
7be8da07 2166
b30ab791 2167 inc_unacked(device);
a8cd15ba 2168 peer_req->w.cb = superseded ? e_send_superseded :
7be8da07 2169 e_send_retry_write;
a8cd15ba 2170 list_add_tail(&peer_req->w.list, &device->done_ee);
e33b32de 2171 wake_asender(connection);
7be8da07
AG
2172
2173 err = -ENOENT;
2174 goto out;
2175 } else {
2176 struct drbd_request *req =
2177 container_of(i, struct drbd_request, i);
2178
2179 if (!equal)
d0180171 2180 drbd_alert(device, "Concurrent writes detected: "
7be8da07
AG
2181 "local=%llus +%u, remote=%llus +%u\n",
2182 (unsigned long long)i->sector, i->size,
2183 (unsigned long long)sector, size);
2184
2185 if (req->rq_state & RQ_LOCAL_PENDING ||
2186 !(req->rq_state & RQ_POSTPONED)) {
2187 /*
2188 * Wait for the node with the discard flag to
d4dabbe2
LE
2189 * decide if this request has been superseded
2190 * or needs to be retried.
2191 * Requests that have been superseded will
7be8da07
AG
2192 * disappear from the write_requests tree.
2193 *
2194 * In addition, wait for the conflicting
2195 * request to finish locally before submitting
2196 * the conflicting peer request.
2197 */
b30ab791 2198 err = drbd_wait_misc(device, &req->i);
7be8da07 2199 if (err) {
e33b32de 2200 _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
b30ab791 2201 fail_postponed_requests(device, sector, size);
7be8da07
AG
2202 goto out;
2203 }
2204 goto repeat;
2205 }
2206 /*
2207 * Remember to restart the conflicting requests after
2208 * the new peer request has completed.
2209 */
2210 peer_req->flags |= EE_RESTART_REQUESTS;
2211 }
2212 }
2213 err = 0;
2214
2215 out:
2216 if (err)
b30ab791 2217 drbd_remove_epoch_entry_interval(device, peer_req);
7be8da07
AG
2218 return err;
2219}
2220
b411b363 2221/* mirrored write */
bde89a9e 2222static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
b411b363 2223{
9f4fe9ad 2224 struct drbd_peer_device *peer_device;
b30ab791 2225 struct drbd_device *device;
b411b363 2226 sector_t sector;
db830c46 2227 struct drbd_peer_request *peer_req;
e658983a 2228 struct p_data *p = pi->data;
7be8da07 2229 u32 peer_seq = be32_to_cpu(p->seq_num);
b411b363
PR
2230 int rw = WRITE;
2231 u32 dp_flags;
302bdeae 2232 int err, tp;
b411b363 2233
9f4fe9ad
AG
2234 peer_device = conn_peer_device(connection, pi->vnr);
2235 if (!peer_device)
4a76b161 2236 return -EIO;
9f4fe9ad 2237 device = peer_device->device;
b411b363 2238
b30ab791 2239 if (!get_ldev(device)) {
82bc0194
AG
2240 int err2;
2241
69a22773
AG
2242 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
2243 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
bde89a9e 2244 atomic_inc(&connection->current_epoch->epoch_size);
69a22773 2245 err2 = drbd_drain_block(peer_device, pi->size);
82bc0194
AG
2246 if (!err)
2247 err = err2;
2248 return err;
b411b363
PR
2249 }
2250
fcefa62e
AG
2251 /*
2252 * Corresponding put_ldev done either below (on various errors), or in
2253 * drbd_peer_request_endio, if we successfully submit the data at the
2254 * end of this function.
2255 */
b411b363
PR
2256
2257 sector = be64_to_cpu(p->sector);
a0fb3c47 2258 peer_req = read_in_block(peer_device, p->block_id, sector, pi);
db830c46 2259 if (!peer_req) {
b30ab791 2260 put_ldev(device);
82bc0194 2261 return -EIO;
b411b363
PR
2262 }
2263
a8cd15ba 2264 peer_req->w.cb = e_end_block;
b411b363 2265
688593c5 2266 dp_flags = be32_to_cpu(p->dp_flags);
81f0ffd2 2267 rw |= wire_flags_to_bio(dp_flags);
a0fb3c47
LE
2268 if (pi->cmd == P_TRIM) {
2269 struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
2270 peer_req->flags |= EE_IS_TRIM;
2271 if (!blk_queue_discard(q))
2272 peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
2273 D_ASSERT(peer_device, peer_req->i.size > 0);
2274 D_ASSERT(peer_device, rw & REQ_DISCARD);
2275 D_ASSERT(peer_device, peer_req->pages == NULL);
2276 } else if (peer_req->pages == NULL) {
0b0ba1ef
AG
2277 D_ASSERT(device, peer_req->i.size == 0);
2278 D_ASSERT(device, dp_flags & DP_FLUSH);
a73ff323 2279 }
688593c5
LE
2280
2281 if (dp_flags & DP_MAY_SET_IN_SYNC)
db830c46 2282 peer_req->flags |= EE_MAY_SET_IN_SYNC;
688593c5 2283
bde89a9e
AG
2284 spin_lock(&connection->epoch_lock);
2285 peer_req->epoch = connection->current_epoch;
db830c46
AG
2286 atomic_inc(&peer_req->epoch->epoch_size);
2287 atomic_inc(&peer_req->epoch->active);
bde89a9e 2288 spin_unlock(&connection->epoch_lock);
b411b363 2289
302bdeae 2290 rcu_read_lock();
9f4fe9ad 2291 tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
302bdeae
PR
2292 rcu_read_unlock();
2293 if (tp) {
2294 peer_req->flags |= EE_IN_INTERVAL_TREE;
69a22773 2295 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
7be8da07 2296 if (err)
b411b363 2297 goto out_interrupted;
0500813f 2298 spin_lock_irq(&device->resource->req_lock);
b30ab791 2299 err = handle_write_conflicts(device, peer_req);
7be8da07 2300 if (err) {
0500813f 2301 spin_unlock_irq(&device->resource->req_lock);
7be8da07 2302 if (err == -ENOENT) {
b30ab791 2303 put_ldev(device);
82bc0194 2304 return 0;
b411b363 2305 }
7be8da07 2306 goto out_interrupted;
b411b363 2307 }
b874d231 2308 } else {
69a22773 2309 update_peer_seq(peer_device, peer_seq);
0500813f 2310 spin_lock_irq(&device->resource->req_lock);
b874d231 2311 }
a0fb3c47
LE
2312 /* if we use the zeroout fallback code, we process synchronously
2313 * and we wait for all pending requests, respectively wait for
2314 * active_ee to become empty in drbd_submit_peer_request();
2315 * better not add ourselves here. */
2316 if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0)
2317 list_add(&peer_req->w.list, &device->active_ee);
0500813f 2318 spin_unlock_irq(&device->resource->req_lock);
b411b363 2319
b30ab791
AG
2320 if (device->state.conn == C_SYNC_TARGET)
2321 wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
b411b363 2322
9f4fe9ad 2323 if (peer_device->connection->agreed_pro_version < 100) {
44ed167d 2324 rcu_read_lock();
9f4fe9ad 2325 switch (rcu_dereference(peer_device->connection->net_conf)->wire_protocol) {
303d1448
PR
2326 case DRBD_PROT_C:
2327 dp_flags |= DP_SEND_WRITE_ACK;
2328 break;
2329 case DRBD_PROT_B:
2330 dp_flags |= DP_SEND_RECEIVE_ACK;
2331 break;
b411b363 2332 }
44ed167d 2333 rcu_read_unlock();
b411b363
PR
2334 }
2335
303d1448
PR
2336 if (dp_flags & DP_SEND_WRITE_ACK) {
2337 peer_req->flags |= EE_SEND_WRITE_ACK;
b30ab791 2338 inc_unacked(device);
b411b363
PR
2339 /* corresponding dec_unacked() in e_end_block()
2340 * respective _drbd_clear_done_ee */
303d1448
PR
2341 }
2342
2343 if (dp_flags & DP_SEND_RECEIVE_ACK) {
b411b363
PR
2344 /* I really don't like it that the receiver thread
2345 * sends on the msock, but anyways */
69a22773 2346 drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
b411b363
PR
2347 }
2348
b30ab791 2349 if (device->state.pdsk < D_INCONSISTENT) {
b411b363 2350 /* In case we have the only disk of the cluster, */
b30ab791 2351 drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
db830c46
AG
2352 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2353 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
4dd726f0 2354 drbd_al_begin_io(device, &peer_req->i);
b411b363
PR
2355 }
2356
b30ab791 2357 err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR);
82bc0194
AG
2358 if (!err)
2359 return 0;
b411b363 2360
10f6d992 2361 /* don't care for the reason here */
d0180171 2362 drbd_err(device, "submit failed, triggering re-connect\n");
0500813f 2363 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 2364 list_del(&peer_req->w.list);
b30ab791 2365 drbd_remove_epoch_entry_interval(device, peer_req);
0500813f 2366 spin_unlock_irq(&device->resource->req_lock);
db830c46 2367 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
b30ab791 2368 drbd_al_complete_io(device, &peer_req->i);
22cc37a9 2369
b411b363 2370out_interrupted:
bde89a9e 2371 drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP);
b30ab791
AG
2372 put_ldev(device);
2373 drbd_free_peer_req(device, peer_req);
82bc0194 2374 return err;
b411b363
PR
2375}
2376
0f0601f4
LE
2377/* We may throttle resync, if the lower device seems to be busy,
2378 * and current sync rate is above c_min_rate.
2379 *
2380 * To decide whether or not the lower device is busy, we use a scheme similar
2381 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2382 * (more than 64 sectors) of activity we cannot account for with our own resync
2383 * activity, it obviously is "busy".
2384 *
2385 * The current sync rate used here uses only the most recent two step marks,
2386 * to have a short time average so we can react faster.
2387 */
e8299874 2388bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
0f0601f4 2389{
e3555d85 2390 struct lc_element *tmp;
e8299874 2391 bool throttle = true;
daeda1cc 2392
e8299874
LE
2393 if (!drbd_rs_c_min_rate_throttle(device))
2394 return false;
0f0601f4 2395
b30ab791
AG
2396 spin_lock_irq(&device->al_lock);
2397 tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
e3555d85
PR
2398 if (tmp) {
2399 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
e8299874
LE
2400 if (test_bit(BME_PRIORITY, &bm_ext->flags))
2401 throttle = false;
e3555d85
PR
2402 /* Do not slow down if app IO is already waiting for this extent */
2403 }
b30ab791 2404 spin_unlock_irq(&device->al_lock);
e3555d85 2405
e8299874
LE
2406 return throttle;
2407}
2408
2409bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
2410{
2411 struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
2412 unsigned long db, dt, dbdt;
2413 unsigned int c_min_rate;
2414 int curr_events;
2415
2416 rcu_read_lock();
2417 c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
2418 rcu_read_unlock();
2419
2420 /* feature disabled? */
2421 if (c_min_rate == 0)
2422 return false;
2423
0f0601f4
LE
2424 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2425 (int)part_stat_read(&disk->part0, sectors[1]) -
b30ab791 2426 atomic_read(&device->rs_sect_ev);
b30ab791 2427 if (!device->rs_last_events || curr_events - device->rs_last_events > 64) {
0f0601f4
LE
2428 unsigned long rs_left;
2429 int i;
2430
b30ab791 2431 device->rs_last_events = curr_events;
0f0601f4
LE
2432
2433 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2434 * approx. */
b30ab791 2435 i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
2649f080 2436
b30ab791
AG
2437 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2438 rs_left = device->ov_left;
2649f080 2439 else
b30ab791 2440 rs_left = drbd_bm_total_weight(device) - device->rs_failed;
0f0601f4 2441
b30ab791 2442 dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
0f0601f4
LE
2443 if (!dt)
2444 dt++;
b30ab791 2445 db = device->rs_mark_left[i] - rs_left;
0f0601f4
LE
2446 dbdt = Bit2KB(db/dt);
2447
daeda1cc 2448 if (dbdt > c_min_rate)
e8299874 2449 return true;
0f0601f4 2450 }
e8299874 2451 return false;
0f0601f4
LE
2452}
2453
bde89a9e 2454static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
b411b363 2455{
9f4fe9ad 2456 struct drbd_peer_device *peer_device;
b30ab791 2457 struct drbd_device *device;
b411b363 2458 sector_t sector;
4a76b161 2459 sector_t capacity;
db830c46 2460 struct drbd_peer_request *peer_req;
b411b363 2461 struct digest_info *di = NULL;
b18b37be 2462 int size, verb;
b411b363 2463 unsigned int fault_type;
e658983a 2464 struct p_block_req *p = pi->data;
4a76b161 2465
9f4fe9ad
AG
2466 peer_device = conn_peer_device(connection, pi->vnr);
2467 if (!peer_device)
4a76b161 2468 return -EIO;
9f4fe9ad 2469 device = peer_device->device;
b30ab791 2470 capacity = drbd_get_capacity(device->this_bdev);
b411b363
PR
2471
2472 sector = be64_to_cpu(p->sector);
2473 size = be32_to_cpu(p->blksize);
2474
c670a398 2475 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
d0180171 2476 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
b411b363 2477 (unsigned long long)sector, size);
82bc0194 2478 return -EINVAL;
b411b363
PR
2479 }
2480 if (sector + (size>>9) > capacity) {
d0180171 2481 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
b411b363 2482 (unsigned long long)sector, size);
82bc0194 2483 return -EINVAL;
b411b363
PR
2484 }
2485
b30ab791 2486 if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
b18b37be 2487 verb = 1;
e2857216 2488 switch (pi->cmd) {
b18b37be 2489 case P_DATA_REQUEST:
69a22773 2490 drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
b18b37be
PR
2491 break;
2492 case P_RS_DATA_REQUEST:
2493 case P_CSUM_RS_REQUEST:
2494 case P_OV_REQUEST:
69a22773 2495 drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
b18b37be
PR
2496 break;
2497 case P_OV_REPLY:
2498 verb = 0;
b30ab791 2499 dec_rs_pending(device);
69a22773 2500 drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
b18b37be
PR
2501 break;
2502 default:
49ba9b1b 2503 BUG();
b18b37be
PR
2504 }
2505 if (verb && __ratelimit(&drbd_ratelimit_state))
d0180171 2506 drbd_err(device, "Can not satisfy peer's read request, "
b411b363 2507 "no local data.\n");
b18b37be 2508
a821cc4a 2509 /* drain possibly payload */
69a22773 2510 return drbd_drain_block(peer_device, pi->size);
b411b363
PR
2511 }
2512
2513 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2514 * "criss-cross" setup, that might cause write-out on some other DRBD,
2515 * which in turn might block on the other node at this very place. */
a0fb3c47
LE
2516 peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
2517 true /* has real payload */, GFP_NOIO);
db830c46 2518 if (!peer_req) {
b30ab791 2519 put_ldev(device);
82bc0194 2520 return -ENOMEM;
b411b363
PR
2521 }
2522
e2857216 2523 switch (pi->cmd) {
b411b363 2524 case P_DATA_REQUEST:
a8cd15ba 2525 peer_req->w.cb = w_e_end_data_req;
b411b363 2526 fault_type = DRBD_FAULT_DT_RD;
80a40e43
LE
2527 /* application IO, don't drbd_rs_begin_io */
2528 goto submit;
2529
b411b363 2530 case P_RS_DATA_REQUEST:
a8cd15ba 2531 peer_req->w.cb = w_e_end_rsdata_req;
b411b363 2532 fault_type = DRBD_FAULT_RS_RD;
5f9915bb 2533 /* used in the sector offset progress display */
b30ab791 2534 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
b411b363
PR
2535 break;
2536
2537 case P_OV_REPLY:
2538 case P_CSUM_RS_REQUEST:
2539 fault_type = DRBD_FAULT_RS_RD;
e2857216 2540 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
b411b363
PR
2541 if (!di)
2542 goto out_free_e;
2543
e2857216 2544 di->digest_size = pi->size;
b411b363
PR
2545 di->digest = (((char *)di)+sizeof(struct digest_info));
2546
db830c46
AG
2547 peer_req->digest = di;
2548 peer_req->flags |= EE_HAS_DIGEST;
c36c3ced 2549
9f4fe9ad 2550 if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
b411b363
PR
2551 goto out_free_e;
2552
e2857216 2553 if (pi->cmd == P_CSUM_RS_REQUEST) {
9f4fe9ad 2554 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
a8cd15ba 2555 peer_req->w.cb = w_e_end_csum_rs_req;
5f9915bb 2556 /* used in the sector offset progress display */
b30ab791 2557 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
e2857216 2558 } else if (pi->cmd == P_OV_REPLY) {
2649f080 2559 /* track progress, we may need to throttle */
b30ab791 2560 atomic_add(size >> 9, &device->rs_sect_in);
a8cd15ba 2561 peer_req->w.cb = w_e_end_ov_reply;
b30ab791 2562 dec_rs_pending(device);
0f0601f4
LE
2563 /* drbd_rs_begin_io done when we sent this request,
2564 * but accounting still needs to be done. */
2565 goto submit_for_resync;
b411b363
PR
2566 }
2567 break;
2568
2569 case P_OV_REQUEST:
b30ab791 2570 if (device->ov_start_sector == ~(sector_t)0 &&
9f4fe9ad 2571 peer_device->connection->agreed_pro_version >= 90) {
de228bba
LE
2572 unsigned long now = jiffies;
2573 int i;
b30ab791
AG
2574 device->ov_start_sector = sector;
2575 device->ov_position = sector;
2576 device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2577 device->rs_total = device->ov_left;
de228bba 2578 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
b30ab791
AG
2579 device->rs_mark_left[i] = device->ov_left;
2580 device->rs_mark_time[i] = now;
de228bba 2581 }
d0180171 2582 drbd_info(device, "Online Verify start sector: %llu\n",
b411b363
PR
2583 (unsigned long long)sector);
2584 }
a8cd15ba 2585 peer_req->w.cb = w_e_end_ov_req;
b411b363 2586 fault_type = DRBD_FAULT_RS_RD;
b411b363
PR
2587 break;
2588
b411b363 2589 default:
49ba9b1b 2590 BUG();
b411b363
PR
2591 }
2592
0f0601f4
LE
2593 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2594 * wrt the receiver, but it is not as straightforward as it may seem.
2595 * Various places in the resync start and stop logic assume resync
2596 * requests are processed in order, requeuing this on the worker thread
2597 * introduces a bunch of new code for synchronization between threads.
2598 *
2599 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2600 * "forever", throttling after drbd_rs_begin_io will lock that extent
2601 * for application writes for the same time. For now, just throttle
2602 * here, where the rest of the code expects the receiver to sleep for
2603 * a while, anyways.
2604 */
2605
2606 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2607 * this defers syncer requests for some time, before letting at least
2608 * on request through. The resync controller on the receiving side
2609 * will adapt to the incoming rate accordingly.
2610 *
2611 * We cannot throttle here if remote is Primary/SyncTarget:
2612 * we would also throttle its application reads.
2613 * In that case, throttling is done on the SyncTarget only.
2614 */
b30ab791 2615 if (device->state.peer != R_PRIMARY && drbd_rs_should_slow_down(device, sector))
e3555d85 2616 schedule_timeout_uninterruptible(HZ/10);
b30ab791 2617 if (drbd_rs_begin_io(device, sector))
80a40e43 2618 goto out_free_e;
b411b363 2619
0f0601f4 2620submit_for_resync:
b30ab791 2621 atomic_add(size >> 9, &device->rs_sect_ev);
0f0601f4 2622
80a40e43 2623submit:
b30ab791 2624 inc_unacked(device);
0500813f 2625 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 2626 list_add_tail(&peer_req->w.list, &device->read_ee);
0500813f 2627 spin_unlock_irq(&device->resource->req_lock);
b411b363 2628
b30ab791 2629 if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0)
82bc0194 2630 return 0;
b411b363 2631
10f6d992 2632 /* don't care for the reason here */
d0180171 2633 drbd_err(device, "submit failed, triggering re-connect\n");
0500813f 2634 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 2635 list_del(&peer_req->w.list);
0500813f 2636 spin_unlock_irq(&device->resource->req_lock);
22cc37a9
LE
2637 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2638
b411b363 2639out_free_e:
b30ab791
AG
2640 put_ldev(device);
2641 drbd_free_peer_req(device, peer_req);
82bc0194 2642 return -EIO;
b411b363
PR
2643}
2644
69a22773
AG
2645/**
2646 * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries
2647 */
2648static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
b411b363 2649{
69a22773 2650 struct drbd_device *device = peer_device->device;
b411b363
PR
2651 int self, peer, rv = -100;
2652 unsigned long ch_self, ch_peer;
44ed167d 2653 enum drbd_after_sb_p after_sb_0p;
b411b363 2654
b30ab791
AG
2655 self = device->ldev->md.uuid[UI_BITMAP] & 1;
2656 peer = device->p_uuid[UI_BITMAP] & 1;
b411b363 2657
b30ab791
AG
2658 ch_peer = device->p_uuid[UI_SIZE];
2659 ch_self = device->comm_bm_set;
b411b363 2660
44ed167d 2661 rcu_read_lock();
69a22773 2662 after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
44ed167d
PR
2663 rcu_read_unlock();
2664 switch (after_sb_0p) {
b411b363
PR
2665 case ASB_CONSENSUS:
2666 case ASB_DISCARD_SECONDARY:
2667 case ASB_CALL_HELPER:
44ed167d 2668 case ASB_VIOLENTLY:
d0180171 2669 drbd_err(device, "Configuration error.\n");
b411b363
PR
2670 break;
2671 case ASB_DISCONNECT:
2672 break;
2673 case ASB_DISCARD_YOUNGER_PRI:
2674 if (self == 0 && peer == 1) {
2675 rv = -1;
2676 break;
2677 }
2678 if (self == 1 && peer == 0) {
2679 rv = 1;
2680 break;
2681 }
2682 /* Else fall through to one of the other strategies... */
2683 case ASB_DISCARD_OLDER_PRI:
2684 if (self == 0 && peer == 1) {
2685 rv = 1;
2686 break;
2687 }
2688 if (self == 1 && peer == 0) {
2689 rv = -1;
2690 break;
2691 }
2692 /* Else fall through to one of the other strategies... */
d0180171 2693 drbd_warn(device, "Discard younger/older primary did not find a decision\n"
b411b363
PR
2694 "Using discard-least-changes instead\n");
2695 case ASB_DISCARD_ZERO_CHG:
2696 if (ch_peer == 0 && ch_self == 0) {
69a22773 2697 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
b411b363
PR
2698 ? -1 : 1;
2699 break;
2700 } else {
2701 if (ch_peer == 0) { rv = 1; break; }
2702 if (ch_self == 0) { rv = -1; break; }
2703 }
44ed167d 2704 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
b411b363
PR
2705 break;
2706 case ASB_DISCARD_LEAST_CHG:
2707 if (ch_self < ch_peer)
2708 rv = -1;
2709 else if (ch_self > ch_peer)
2710 rv = 1;
2711 else /* ( ch_self == ch_peer ) */
2712 /* Well, then use something else. */
69a22773 2713 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
b411b363
PR
2714 ? -1 : 1;
2715 break;
2716 case ASB_DISCARD_LOCAL:
2717 rv = -1;
2718 break;
2719 case ASB_DISCARD_REMOTE:
2720 rv = 1;
2721 }
2722
2723 return rv;
2724}
2725
69a22773
AG
2726/**
2727 * drbd_asb_recover_1p - Recover after split-brain with one remaining primary
2728 */
2729static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
b411b363 2730{
69a22773 2731 struct drbd_device *device = peer_device->device;
6184ea21 2732 int hg, rv = -100;
44ed167d 2733 enum drbd_after_sb_p after_sb_1p;
b411b363 2734
44ed167d 2735 rcu_read_lock();
69a22773 2736 after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
44ed167d
PR
2737 rcu_read_unlock();
2738 switch (after_sb_1p) {
b411b363
PR
2739 case ASB_DISCARD_YOUNGER_PRI:
2740 case ASB_DISCARD_OLDER_PRI:
2741 case ASB_DISCARD_LEAST_CHG:
2742 case ASB_DISCARD_LOCAL:
2743 case ASB_DISCARD_REMOTE:
44ed167d 2744 case ASB_DISCARD_ZERO_CHG:
d0180171 2745 drbd_err(device, "Configuration error.\n");
b411b363
PR
2746 break;
2747 case ASB_DISCONNECT:
2748 break;
2749 case ASB_CONSENSUS:
69a22773 2750 hg = drbd_asb_recover_0p(peer_device);
b30ab791 2751 if (hg == -1 && device->state.role == R_SECONDARY)
b411b363 2752 rv = hg;
b30ab791 2753 if (hg == 1 && device->state.role == R_PRIMARY)
b411b363
PR
2754 rv = hg;
2755 break;
2756 case ASB_VIOLENTLY:
69a22773 2757 rv = drbd_asb_recover_0p(peer_device);
b411b363
PR
2758 break;
2759 case ASB_DISCARD_SECONDARY:
b30ab791 2760 return device->state.role == R_PRIMARY ? 1 : -1;
b411b363 2761 case ASB_CALL_HELPER:
69a22773 2762 hg = drbd_asb_recover_0p(peer_device);
b30ab791 2763 if (hg == -1 && device->state.role == R_PRIMARY) {
bb437946
AG
2764 enum drbd_state_rv rv2;
2765
b411b363
PR
2766 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2767 * we might be here in C_WF_REPORT_PARAMS which is transient.
2768 * we do not need to wait for the after state change work either. */
b30ab791 2769 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
bb437946 2770 if (rv2 != SS_SUCCESS) {
b30ab791 2771 drbd_khelper(device, "pri-lost-after-sb");
b411b363 2772 } else {
d0180171 2773 drbd_warn(device, "Successfully gave up primary role.\n");
b411b363
PR
2774 rv = hg;
2775 }
2776 } else
2777 rv = hg;
2778 }
2779
2780 return rv;
2781}
2782
69a22773
AG
2783/**
2784 * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries
2785 */
2786static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
b411b363 2787{
69a22773 2788 struct drbd_device *device = peer_device->device;
6184ea21 2789 int hg, rv = -100;
44ed167d 2790 enum drbd_after_sb_p after_sb_2p;
b411b363 2791
44ed167d 2792 rcu_read_lock();
69a22773 2793 after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
44ed167d
PR
2794 rcu_read_unlock();
2795 switch (after_sb_2p) {
b411b363
PR
2796 case ASB_DISCARD_YOUNGER_PRI:
2797 case ASB_DISCARD_OLDER_PRI:
2798 case ASB_DISCARD_LEAST_CHG:
2799 case ASB_DISCARD_LOCAL:
2800 case ASB_DISCARD_REMOTE:
2801 case ASB_CONSENSUS:
2802 case ASB_DISCARD_SECONDARY:
44ed167d 2803 case ASB_DISCARD_ZERO_CHG:
d0180171 2804 drbd_err(device, "Configuration error.\n");
b411b363
PR
2805 break;
2806 case ASB_VIOLENTLY:
69a22773 2807 rv = drbd_asb_recover_0p(peer_device);
b411b363
PR
2808 break;
2809 case ASB_DISCONNECT:
2810 break;
2811 case ASB_CALL_HELPER:
69a22773 2812 hg = drbd_asb_recover_0p(peer_device);
b411b363 2813 if (hg == -1) {
bb437946
AG
2814 enum drbd_state_rv rv2;
2815
b411b363
PR
2816 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2817 * we might be here in C_WF_REPORT_PARAMS which is transient.
2818 * we do not need to wait for the after state change work either. */
b30ab791 2819 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
bb437946 2820 if (rv2 != SS_SUCCESS) {
b30ab791 2821 drbd_khelper(device, "pri-lost-after-sb");
b411b363 2822 } else {
d0180171 2823 drbd_warn(device, "Successfully gave up primary role.\n");
b411b363
PR
2824 rv = hg;
2825 }
2826 } else
2827 rv = hg;
2828 }
2829
2830 return rv;
2831}
2832
b30ab791 2833static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
b411b363
PR
2834 u64 bits, u64 flags)
2835{
2836 if (!uuid) {
d0180171 2837 drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
b411b363
PR
2838 return;
2839 }
d0180171 2840 drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
b411b363
PR
2841 text,
2842 (unsigned long long)uuid[UI_CURRENT],
2843 (unsigned long long)uuid[UI_BITMAP],
2844 (unsigned long long)uuid[UI_HISTORY_START],
2845 (unsigned long long)uuid[UI_HISTORY_END],
2846 (unsigned long long)bits,
2847 (unsigned long long)flags);
2848}
2849
2850/*
2851 100 after split brain try auto recover
2852 2 C_SYNC_SOURCE set BitMap
2853 1 C_SYNC_SOURCE use BitMap
2854 0 no Sync
2855 -1 C_SYNC_TARGET use BitMap
2856 -2 C_SYNC_TARGET set BitMap
2857 -100 after split brain, disconnect
2858-1000 unrelated data
4a23f264
PR
2859-1091 requires proto 91
2860-1096 requires proto 96
b411b363 2861 */
44a4d551 2862static int drbd_uuid_compare(struct drbd_device *const device, int *rule_nr) __must_hold(local)
b411b363 2863{
44a4d551
LE
2864 struct drbd_peer_device *const peer_device = first_peer_device(device);
2865 struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
b411b363
PR
2866 u64 self, peer;
2867 int i, j;
2868
b30ab791
AG
2869 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2870 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
b411b363
PR
2871
2872 *rule_nr = 10;
2873 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2874 return 0;
2875
2876 *rule_nr = 20;
2877 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2878 peer != UUID_JUST_CREATED)
2879 return -2;
2880
2881 *rule_nr = 30;
2882 if (self != UUID_JUST_CREATED &&
2883 (peer == UUID_JUST_CREATED || peer == (u64)0))
2884 return 2;
2885
2886 if (self == peer) {
2887 int rct, dc; /* roles at crash time */
2888
b30ab791 2889 if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
b411b363 2890
44a4d551 2891 if (connection->agreed_pro_version < 91)
4a23f264 2892 return -1091;
b411b363 2893
b30ab791
AG
2894 if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2895 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
d0180171 2896 drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
b30ab791
AG
2897 drbd_uuid_move_history(device);
2898 device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
2899 device->ldev->md.uuid[UI_BITMAP] = 0;
b411b363 2900
b30ab791
AG
2901 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2902 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
b411b363
PR
2903 *rule_nr = 34;
2904 } else {
d0180171 2905 drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
b411b363
PR
2906 *rule_nr = 36;
2907 }
2908
2909 return 1;
2910 }
2911
b30ab791 2912 if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
b411b363 2913
44a4d551 2914 if (connection->agreed_pro_version < 91)
4a23f264 2915 return -1091;
b411b363 2916
b30ab791
AG
2917 if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2918 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
d0180171 2919 drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
b411b363 2920
b30ab791
AG
2921 device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
2922 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
2923 device->p_uuid[UI_BITMAP] = 0UL;
b411b363 2924
b30ab791 2925 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
b411b363
PR
2926 *rule_nr = 35;
2927 } else {
d0180171 2928 drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
b411b363
PR
2929 *rule_nr = 37;
2930 }
2931
2932 return -1;
2933 }
2934
2935 /* Common power [off|failure] */
b30ab791
AG
2936 rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
2937 (device->p_uuid[UI_FLAGS] & 2);
b411b363
PR
2938 /* lowest bit is set when we were primary,
2939 * next bit (weight 2) is set when peer was primary */
2940 *rule_nr = 40;
2941
2942 switch (rct) {
2943 case 0: /* !self_pri && !peer_pri */ return 0;
2944 case 1: /* self_pri && !peer_pri */ return 1;
2945 case 2: /* !self_pri && peer_pri */ return -1;
2946 case 3: /* self_pri && peer_pri */
44a4d551 2947 dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
b411b363
PR
2948 return dc ? -1 : 1;
2949 }
2950 }
2951
2952 *rule_nr = 50;
b30ab791 2953 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
b411b363
PR
2954 if (self == peer)
2955 return -1;
2956
2957 *rule_nr = 51;
b30ab791 2958 peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
b411b363 2959 if (self == peer) {
44a4d551 2960 if (connection->agreed_pro_version < 96 ?
b30ab791
AG
2961 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2962 (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2963 peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
b411b363
PR
2964 /* The last P_SYNC_UUID did not get though. Undo the last start of
2965 resync as sync source modifications of the peer's UUIDs. */
2966
44a4d551 2967 if (connection->agreed_pro_version < 91)
4a23f264 2968 return -1091;
b411b363 2969
b30ab791
AG
2970 device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
2971 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
4a23f264 2972
d0180171 2973 drbd_info(device, "Lost last syncUUID packet, corrected:\n");
b30ab791 2974 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
4a23f264 2975
b411b363
PR
2976 return -1;
2977 }
2978 }
2979
2980 *rule_nr = 60;
b30ab791 2981 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
b411b363 2982 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
b30ab791 2983 peer = device->p_uuid[i] & ~((u64)1);
b411b363
PR
2984 if (self == peer)
2985 return -2;
2986 }
2987
2988 *rule_nr = 70;
b30ab791
AG
2989 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2990 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
b411b363
PR
2991 if (self == peer)
2992 return 1;
2993
2994 *rule_nr = 71;
b30ab791 2995 self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
b411b363 2996 if (self == peer) {
44a4d551 2997 if (connection->agreed_pro_version < 96 ?
b30ab791
AG
2998 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2999 (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
3000 self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
b411b363
PR
3001 /* The last P_SYNC_UUID did not get though. Undo the last start of
3002 resync as sync source modifications of our UUIDs. */
3003
44a4d551 3004 if (connection->agreed_pro_version < 91)
4a23f264 3005 return -1091;
b411b363 3006
b30ab791
AG
3007 __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
3008 __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
b411b363 3009
d0180171 3010 drbd_info(device, "Last syncUUID did not get through, corrected:\n");
b30ab791
AG
3011 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3012 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
b411b363
PR
3013
3014 return 1;
3015 }
3016 }
3017
3018
3019 *rule_nr = 80;
b30ab791 3020 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
b411b363 3021 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
b30ab791 3022 self = device->ldev->md.uuid[i] & ~((u64)1);
b411b363
PR
3023 if (self == peer)
3024 return 2;
3025 }
3026
3027 *rule_nr = 90;
b30ab791
AG
3028 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3029 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
b411b363
PR
3030 if (self == peer && self != ((u64)0))
3031 return 100;
3032
3033 *rule_nr = 100;
3034 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
b30ab791 3035 self = device->ldev->md.uuid[i] & ~((u64)1);
b411b363 3036 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
b30ab791 3037 peer = device->p_uuid[j] & ~((u64)1);
b411b363
PR
3038 if (self == peer)
3039 return -100;
3040 }
3041 }
3042
3043 return -1000;
3044}
3045
3046/* drbd_sync_handshake() returns the new conn state on success, or
3047 CONN_MASK (-1) on failure.
3048 */
69a22773
AG
3049static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
3050 enum drbd_role peer_role,
b411b363
PR
3051 enum drbd_disk_state peer_disk) __must_hold(local)
3052{
69a22773 3053 struct drbd_device *device = peer_device->device;
b411b363
PR
3054 enum drbd_conns rv = C_MASK;
3055 enum drbd_disk_state mydisk;
44ed167d 3056 struct net_conf *nc;
6dff2902 3057 int hg, rule_nr, rr_conflict, tentative;
b411b363 3058
b30ab791 3059 mydisk = device->state.disk;
b411b363 3060 if (mydisk == D_NEGOTIATING)
b30ab791 3061 mydisk = device->new_state_tmp.disk;
b411b363 3062
d0180171 3063 drbd_info(device, "drbd_sync_handshake:\n");
9f2247bb 3064
b30ab791
AG
3065 spin_lock_irq(&device->ldev->md.uuid_lock);
3066 drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3067 drbd_uuid_dump(device, "peer", device->p_uuid,
3068 device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
b411b363 3069
b30ab791
AG
3070 hg = drbd_uuid_compare(device, &rule_nr);
3071 spin_unlock_irq(&device->ldev->md.uuid_lock);
b411b363 3072
d0180171 3073 drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
b411b363
PR
3074
3075 if (hg == -1000) {
d0180171 3076 drbd_alert(device, "Unrelated data, aborting!\n");
b411b363
PR
3077 return C_MASK;
3078 }
4a23f264 3079 if (hg < -1000) {
d0180171 3080 drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
b411b363
PR
3081 return C_MASK;
3082 }
3083
3084 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3085 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
3086 int f = (hg == -100) || abs(hg) == 2;
3087 hg = mydisk > D_INCONSISTENT ? 1 : -1;
3088 if (f)
3089 hg = hg*2;
d0180171 3090 drbd_info(device, "Becoming sync %s due to disk states.\n",
b411b363
PR
3091 hg > 0 ? "source" : "target");
3092 }
3093
3a11a487 3094 if (abs(hg) == 100)
b30ab791 3095 drbd_khelper(device, "initial-split-brain");
3a11a487 3096
44ed167d 3097 rcu_read_lock();
69a22773 3098 nc = rcu_dereference(peer_device->connection->net_conf);
44ed167d
PR
3099
3100 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
b30ab791 3101 int pcount = (device->state.role == R_PRIMARY)
b411b363
PR
3102 + (peer_role == R_PRIMARY);
3103 int forced = (hg == -100);
3104
3105 switch (pcount) {
3106 case 0:
69a22773 3107 hg = drbd_asb_recover_0p(peer_device);
b411b363
PR
3108 break;
3109 case 1:
69a22773 3110 hg = drbd_asb_recover_1p(peer_device);
b411b363
PR
3111 break;
3112 case 2:
69a22773 3113 hg = drbd_asb_recover_2p(peer_device);
b411b363
PR
3114 break;
3115 }
3116 if (abs(hg) < 100) {
d0180171 3117 drbd_warn(device, "Split-Brain detected, %d primaries, "
b411b363
PR
3118 "automatically solved. Sync from %s node\n",
3119 pcount, (hg < 0) ? "peer" : "this");
3120 if (forced) {
d0180171 3121 drbd_warn(device, "Doing a full sync, since"
b411b363
PR
3122 " UUIDs where ambiguous.\n");
3123 hg = hg*2;
3124 }
3125 }
3126 }
3127
3128 if (hg == -100) {
b30ab791 3129 if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
b411b363 3130 hg = -1;
b30ab791 3131 if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
b411b363
PR
3132 hg = 1;
3133
3134 if (abs(hg) < 100)
d0180171 3135 drbd_warn(device, "Split-Brain detected, manually solved. "
b411b363
PR
3136 "Sync from %s node\n",
3137 (hg < 0) ? "peer" : "this");
3138 }
44ed167d 3139 rr_conflict = nc->rr_conflict;
6dff2902 3140 tentative = nc->tentative;
44ed167d 3141 rcu_read_unlock();
b411b363
PR
3142
3143 if (hg == -100) {
580b9767
LE
3144 /* FIXME this log message is not correct if we end up here
3145 * after an attempted attach on a diskless node.
3146 * We just refuse to attach -- well, we drop the "connection"
3147 * to that disk, in a way... */
d0180171 3148 drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
b30ab791 3149 drbd_khelper(device, "split-brain");
b411b363
PR
3150 return C_MASK;
3151 }
3152
3153 if (hg > 0 && mydisk <= D_INCONSISTENT) {
d0180171 3154 drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
b411b363
PR
3155 return C_MASK;
3156 }
3157
3158 if (hg < 0 && /* by intention we do not use mydisk here. */
b30ab791 3159 device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
44ed167d 3160 switch (rr_conflict) {
b411b363 3161 case ASB_CALL_HELPER:
b30ab791 3162 drbd_khelper(device, "pri-lost");
b411b363
PR
3163 /* fall through */
3164 case ASB_DISCONNECT:
d0180171 3165 drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
b411b363
PR
3166 return C_MASK;
3167 case ASB_VIOLENTLY:
d0180171 3168 drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
b411b363
PR
3169 "assumption\n");
3170 }
3171 }
3172
69a22773 3173 if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
cf14c2e9 3174 if (hg == 0)
d0180171 3175 drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
cf14c2e9 3176 else
d0180171 3177 drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
cf14c2e9
PR
3178 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3179 abs(hg) >= 2 ? "full" : "bit-map based");
3180 return C_MASK;
3181 }
3182
b411b363 3183 if (abs(hg) >= 2) {
d0180171 3184 drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
b30ab791 3185 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
20ceb2b2 3186 BM_LOCKED_SET_ALLOWED))
b411b363
PR
3187 return C_MASK;
3188 }
3189
3190 if (hg > 0) { /* become sync source. */
3191 rv = C_WF_BITMAP_S;
3192 } else if (hg < 0) { /* become sync target */
3193 rv = C_WF_BITMAP_T;
3194 } else {
3195 rv = C_CONNECTED;
b30ab791 3196 if (drbd_bm_total_weight(device)) {
d0180171 3197 drbd_info(device, "No resync, but %lu bits in bitmap!\n",
b30ab791 3198 drbd_bm_total_weight(device));
b411b363
PR
3199 }
3200 }
3201
3202 return rv;
3203}
3204
f179d76d 3205static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
b411b363
PR
3206{
3207 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
f179d76d
PR
3208 if (peer == ASB_DISCARD_REMOTE)
3209 return ASB_DISCARD_LOCAL;
b411b363
PR
3210
3211 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
f179d76d
PR
3212 if (peer == ASB_DISCARD_LOCAL)
3213 return ASB_DISCARD_REMOTE;
b411b363
PR
3214
3215 /* everything else is valid if they are equal on both sides. */
f179d76d 3216 return peer;
b411b363
PR
3217}
3218
bde89a9e 3219static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3220{
e658983a 3221 struct p_protocol *p = pi->data;
036b17ea
PR
3222 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3223 int p_proto, p_discard_my_data, p_two_primaries, cf;
3224 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3225 char integrity_alg[SHARED_SECRET_MAX] = "";
accdbcc5 3226 struct crypto_hash *peer_integrity_tfm = NULL;
7aca6c75 3227 void *int_dig_in = NULL, *int_dig_vv = NULL;
b411b363 3228
b411b363
PR
3229 p_proto = be32_to_cpu(p->protocol);
3230 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3231 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3232 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
b411b363 3233 p_two_primaries = be32_to_cpu(p->two_primaries);
cf14c2e9 3234 cf = be32_to_cpu(p->conn_flags);
6139f60d 3235 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
cf14c2e9 3236
bde89a9e 3237 if (connection->agreed_pro_version >= 87) {
86db0618 3238 int err;
cf14c2e9 3239
88104ca4 3240 if (pi->size > sizeof(integrity_alg))
86db0618 3241 return -EIO;
bde89a9e 3242 err = drbd_recv_all(connection, integrity_alg, pi->size);
86db0618
AG
3243 if (err)
3244 return err;
036b17ea 3245 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
b411b363
PR
3246 }
3247
7d4c782c 3248 if (pi->cmd != P_PROTOCOL_UPDATE) {
bde89a9e 3249 clear_bit(CONN_DRY_RUN, &connection->flags);
b411b363 3250
fbc12f45 3251 if (cf & CF_DRY_RUN)
bde89a9e 3252 set_bit(CONN_DRY_RUN, &connection->flags);
b411b363 3253
fbc12f45 3254 rcu_read_lock();
bde89a9e 3255 nc = rcu_dereference(connection->net_conf);
b411b363 3256
fbc12f45 3257 if (p_proto != nc->wire_protocol) {
1ec861eb 3258 drbd_err(connection, "incompatible %s settings\n", "protocol");
fbc12f45
AG
3259 goto disconnect_rcu_unlock;
3260 }
b411b363 3261
fbc12f45 3262 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
1ec861eb 3263 drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
fbc12f45
AG
3264 goto disconnect_rcu_unlock;
3265 }
b411b363 3266
fbc12f45 3267 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
1ec861eb 3268 drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
fbc12f45
AG
3269 goto disconnect_rcu_unlock;
3270 }
b411b363 3271
fbc12f45 3272 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
1ec861eb 3273 drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
fbc12f45
AG
3274 goto disconnect_rcu_unlock;
3275 }
b411b363 3276
fbc12f45 3277 if (p_discard_my_data && nc->discard_my_data) {
1ec861eb 3278 drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
fbc12f45
AG
3279 goto disconnect_rcu_unlock;
3280 }
b411b363 3281
fbc12f45 3282 if (p_two_primaries != nc->two_primaries) {
1ec861eb 3283 drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
fbc12f45
AG
3284 goto disconnect_rcu_unlock;
3285 }
b411b363 3286
fbc12f45 3287 if (strcmp(integrity_alg, nc->integrity_alg)) {
1ec861eb 3288 drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
fbc12f45
AG
3289 goto disconnect_rcu_unlock;
3290 }
b411b363 3291
fbc12f45 3292 rcu_read_unlock();
b411b363
PR
3293 }
3294
7d4c782c
AG
3295 if (integrity_alg[0]) {
3296 int hash_size;
3297
3298 /*
3299 * We can only change the peer data integrity algorithm
3300 * here. Changing our own data integrity algorithm
3301 * requires that we send a P_PROTOCOL_UPDATE packet at
3302 * the same time; otherwise, the peer has no way to
3303 * tell between which packets the algorithm should
3304 * change.
3305 */
b411b363 3306
7d4c782c
AG
3307 peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
3308 if (!peer_integrity_tfm) {
1ec861eb 3309 drbd_err(connection, "peer data-integrity-alg %s not supported\n",
7d4c782c
AG
3310 integrity_alg);
3311 goto disconnect;
3312 }
b411b363 3313
7d4c782c
AG
3314 hash_size = crypto_hash_digestsize(peer_integrity_tfm);
3315 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3316 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3317 if (!(int_dig_in && int_dig_vv)) {
1ec861eb 3318 drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
b411b363
PR
3319 goto disconnect;
3320 }
b411b363
PR
3321 }
3322
7d4c782c
AG
3323 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3324 if (!new_net_conf) {
1ec861eb 3325 drbd_err(connection, "Allocation of new net_conf failed\n");
7d4c782c
AG
3326 goto disconnect;
3327 }
3328
bde89a9e 3329 mutex_lock(&connection->data.mutex);
0500813f 3330 mutex_lock(&connection->resource->conf_update);
bde89a9e 3331 old_net_conf = connection->net_conf;
7d4c782c
AG
3332 *new_net_conf = *old_net_conf;
3333
3334 new_net_conf->wire_protocol = p_proto;
3335 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3336 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3337 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3338 new_net_conf->two_primaries = p_two_primaries;
3339
bde89a9e 3340 rcu_assign_pointer(connection->net_conf, new_net_conf);
0500813f 3341 mutex_unlock(&connection->resource->conf_update);
bde89a9e 3342 mutex_unlock(&connection->data.mutex);
7d4c782c 3343
bde89a9e
AG
3344 crypto_free_hash(connection->peer_integrity_tfm);
3345 kfree(connection->int_dig_in);
3346 kfree(connection->int_dig_vv);
3347 connection->peer_integrity_tfm = peer_integrity_tfm;
3348 connection->int_dig_in = int_dig_in;
3349 connection->int_dig_vv = int_dig_vv;
7d4c782c
AG
3350
3351 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
1ec861eb 3352 drbd_info(connection, "peer data-integrity-alg: %s\n",
7d4c782c
AG
3353 integrity_alg[0] ? integrity_alg : "(none)");
3354
3355 synchronize_rcu();
3356 kfree(old_net_conf);
82bc0194 3357 return 0;
b411b363 3358
44ed167d
PR
3359disconnect_rcu_unlock:
3360 rcu_read_unlock();
b411b363 3361disconnect:
b792c35c 3362 crypto_free_hash(peer_integrity_tfm);
036b17ea
PR
3363 kfree(int_dig_in);
3364 kfree(int_dig_vv);
bde89a9e 3365 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 3366 return -EIO;
b411b363
PR
3367}
3368
3369/* helper function
3370 * input: alg name, feature name
3371 * return: NULL (alg name was "")
3372 * ERR_PTR(error) if something goes wrong
3373 * or the crypto hash ptr, if it worked out ok. */
8ce953aa 3374static struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
b411b363
PR
3375 const char *alg, const char *name)
3376{
3377 struct crypto_hash *tfm;
3378
3379 if (!alg[0])
3380 return NULL;
3381
3382 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
3383 if (IS_ERR(tfm)) {
d0180171 3384 drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
b411b363
PR
3385 alg, name, PTR_ERR(tfm));
3386 return tfm;
3387 }
b411b363
PR
3388 return tfm;
3389}
3390
bde89a9e 3391static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
4a76b161 3392{
bde89a9e 3393 void *buffer = connection->data.rbuf;
4a76b161
AG
3394 int size = pi->size;
3395
3396 while (size) {
3397 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
bde89a9e 3398 s = drbd_recv(connection, buffer, s);
4a76b161
AG
3399 if (s <= 0) {
3400 if (s < 0)
3401 return s;
3402 break;
3403 }
3404 size -= s;
3405 }
3406 if (size)
3407 return -EIO;
3408 return 0;
3409}
3410
3411/*
3412 * config_unknown_volume - device configuration command for unknown volume
3413 *
3414 * When a device is added to an existing connection, the node on which the
3415 * device is added first will send configuration commands to its peer but the
3416 * peer will not know about the device yet. It will warn and ignore these
3417 * commands. Once the device is added on the second node, the second node will
3418 * send the same device configuration commands, but in the other direction.
3419 *
3420 * (We can also end up here if drbd is misconfigured.)
3421 */
bde89a9e 3422static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
4a76b161 3423{
1ec861eb 3424 drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
2fcb8f30 3425 cmdname(pi->cmd), pi->vnr);
bde89a9e 3426 return ignore_remaining_packet(connection, pi);
4a76b161
AG
3427}
3428
bde89a9e 3429static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3430{
9f4fe9ad 3431 struct drbd_peer_device *peer_device;
b30ab791 3432 struct drbd_device *device;
e658983a 3433 struct p_rs_param_95 *p;
b411b363
PR
3434 unsigned int header_size, data_size, exp_max_sz;
3435 struct crypto_hash *verify_tfm = NULL;
3436 struct crypto_hash *csums_tfm = NULL;
2ec91e0e 3437 struct net_conf *old_net_conf, *new_net_conf = NULL;
813472ce 3438 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
bde89a9e 3439 const int apv = connection->agreed_pro_version;
813472ce 3440 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
778f271d 3441 int fifo_size = 0;
82bc0194 3442 int err;
b411b363 3443
9f4fe9ad
AG
3444 peer_device = conn_peer_device(connection, pi->vnr);
3445 if (!peer_device)
bde89a9e 3446 return config_unknown_volume(connection, pi);
9f4fe9ad 3447 device = peer_device->device;
b411b363
PR
3448
3449 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3450 : apv == 88 ? sizeof(struct p_rs_param)
3451 + SHARED_SECRET_MAX
8e26f9cc
PR
3452 : apv <= 94 ? sizeof(struct p_rs_param_89)
3453 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
b411b363 3454
e2857216 3455 if (pi->size > exp_max_sz) {
d0180171 3456 drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
e2857216 3457 pi->size, exp_max_sz);
82bc0194 3458 return -EIO;
b411b363
PR
3459 }
3460
3461 if (apv <= 88) {
e658983a 3462 header_size = sizeof(struct p_rs_param);
e2857216 3463 data_size = pi->size - header_size;
8e26f9cc 3464 } else if (apv <= 94) {
e658983a 3465 header_size = sizeof(struct p_rs_param_89);
e2857216 3466 data_size = pi->size - header_size;
0b0ba1ef 3467 D_ASSERT(device, data_size == 0);
8e26f9cc 3468 } else {
e658983a 3469 header_size = sizeof(struct p_rs_param_95);
e2857216 3470 data_size = pi->size - header_size;
0b0ba1ef 3471 D_ASSERT(device, data_size == 0);
b411b363
PR
3472 }
3473
3474 /* initialize verify_alg and csums_alg */
e658983a 3475 p = pi->data;
b411b363
PR
3476 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3477
9f4fe9ad 3478 err = drbd_recv_all(peer_device->connection, p, header_size);
82bc0194
AG
3479 if (err)
3480 return err;
b411b363 3481
0500813f 3482 mutex_lock(&connection->resource->conf_update);
9f4fe9ad 3483 old_net_conf = peer_device->connection->net_conf;
b30ab791 3484 if (get_ldev(device)) {
813472ce
PR
3485 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3486 if (!new_disk_conf) {
b30ab791 3487 put_ldev(device);
0500813f 3488 mutex_unlock(&connection->resource->conf_update);
d0180171 3489 drbd_err(device, "Allocation of new disk_conf failed\n");
813472ce
PR
3490 return -ENOMEM;
3491 }
daeda1cc 3492
b30ab791 3493 old_disk_conf = device->ldev->disk_conf;
813472ce 3494 *new_disk_conf = *old_disk_conf;
b411b363 3495
6394b935 3496 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
813472ce 3497 }
b411b363
PR
3498
3499 if (apv >= 88) {
3500 if (apv == 88) {
5de73827 3501 if (data_size > SHARED_SECRET_MAX || data_size == 0) {
d0180171 3502 drbd_err(device, "verify-alg of wrong size, "
5de73827
PR
3503 "peer wants %u, accepting only up to %u byte\n",
3504 data_size, SHARED_SECRET_MAX);
813472ce
PR
3505 err = -EIO;
3506 goto reconnect;
b411b363
PR
3507 }
3508
9f4fe9ad 3509 err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
813472ce
PR
3510 if (err)
3511 goto reconnect;
b411b363
PR
3512 /* we expect NUL terminated string */
3513 /* but just in case someone tries to be evil */
0b0ba1ef 3514 D_ASSERT(device, p->verify_alg[data_size-1] == 0);
b411b363
PR
3515 p->verify_alg[data_size-1] = 0;
3516
3517 } else /* apv >= 89 */ {
3518 /* we still expect NUL terminated strings */
3519 /* but just in case someone tries to be evil */
0b0ba1ef
AG
3520 D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3521 D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
b411b363
PR
3522 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3523 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3524 }
3525
2ec91e0e 3526 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
b30ab791 3527 if (device->state.conn == C_WF_REPORT_PARAMS) {
d0180171 3528 drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
2ec91e0e 3529 old_net_conf->verify_alg, p->verify_alg);
b411b363
PR
3530 goto disconnect;
3531 }
b30ab791 3532 verify_tfm = drbd_crypto_alloc_digest_safe(device,
b411b363
PR
3533 p->verify_alg, "verify-alg");
3534 if (IS_ERR(verify_tfm)) {
3535 verify_tfm = NULL;
3536 goto disconnect;
3537 }
3538 }
3539
2ec91e0e 3540 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
b30ab791 3541 if (device->state.conn == C_WF_REPORT_PARAMS) {
d0180171 3542 drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
2ec91e0e 3543 old_net_conf->csums_alg, p->csums_alg);
b411b363
PR
3544 goto disconnect;
3545 }
b30ab791 3546 csums_tfm = drbd_crypto_alloc_digest_safe(device,
b411b363
PR
3547 p->csums_alg, "csums-alg");
3548 if (IS_ERR(csums_tfm)) {
3549 csums_tfm = NULL;
3550 goto disconnect;
3551 }
3552 }
3553
813472ce 3554 if (apv > 94 && new_disk_conf) {
daeda1cc
PR
3555 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3556 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3557 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3558 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
778f271d 3559
daeda1cc 3560 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
b30ab791 3561 if (fifo_size != device->rs_plan_s->size) {
813472ce
PR
3562 new_plan = fifo_alloc(fifo_size);
3563 if (!new_plan) {
d0180171 3564 drbd_err(device, "kmalloc of fifo_buffer failed");
b30ab791 3565 put_ldev(device);
778f271d
PR
3566 goto disconnect;
3567 }
3568 }
8e26f9cc 3569 }
b411b363 3570
91fd4dad 3571 if (verify_tfm || csums_tfm) {
2ec91e0e
PR
3572 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3573 if (!new_net_conf) {
d0180171 3574 drbd_err(device, "Allocation of new net_conf failed\n");
91fd4dad
PR
3575 goto disconnect;
3576 }
3577
2ec91e0e 3578 *new_net_conf = *old_net_conf;
91fd4dad
PR
3579
3580 if (verify_tfm) {
2ec91e0e
PR
3581 strcpy(new_net_conf->verify_alg, p->verify_alg);
3582 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
9f4fe9ad
AG
3583 crypto_free_hash(peer_device->connection->verify_tfm);
3584 peer_device->connection->verify_tfm = verify_tfm;
d0180171 3585 drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
91fd4dad
PR
3586 }
3587 if (csums_tfm) {
2ec91e0e
PR
3588 strcpy(new_net_conf->csums_alg, p->csums_alg);
3589 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
9f4fe9ad
AG
3590 crypto_free_hash(peer_device->connection->csums_tfm);
3591 peer_device->connection->csums_tfm = csums_tfm;
d0180171 3592 drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
91fd4dad 3593 }
bde89a9e 3594 rcu_assign_pointer(connection->net_conf, new_net_conf);
778f271d 3595 }
b411b363
PR
3596 }
3597
813472ce 3598 if (new_disk_conf) {
b30ab791
AG
3599 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
3600 put_ldev(device);
813472ce
PR
3601 }
3602
3603 if (new_plan) {
b30ab791
AG
3604 old_plan = device->rs_plan_s;
3605 rcu_assign_pointer(device->rs_plan_s, new_plan);
b411b363 3606 }
daeda1cc 3607
0500813f 3608 mutex_unlock(&connection->resource->conf_update);
daeda1cc
PR
3609 synchronize_rcu();
3610 if (new_net_conf)
3611 kfree(old_net_conf);
3612 kfree(old_disk_conf);
813472ce 3613 kfree(old_plan);
daeda1cc 3614
82bc0194 3615 return 0;
b411b363 3616
813472ce
PR
3617reconnect:
3618 if (new_disk_conf) {
b30ab791 3619 put_ldev(device);
813472ce
PR
3620 kfree(new_disk_conf);
3621 }
0500813f 3622 mutex_unlock(&connection->resource->conf_update);
813472ce
PR
3623 return -EIO;
3624
b411b363 3625disconnect:
813472ce
PR
3626 kfree(new_plan);
3627 if (new_disk_conf) {
b30ab791 3628 put_ldev(device);
813472ce
PR
3629 kfree(new_disk_conf);
3630 }
0500813f 3631 mutex_unlock(&connection->resource->conf_update);
b411b363
PR
3632 /* just for completeness: actually not needed,
3633 * as this is not reached if csums_tfm was ok. */
3634 crypto_free_hash(csums_tfm);
3635 /* but free the verify_tfm again, if csums_tfm did not work out */
3636 crypto_free_hash(verify_tfm);
9f4fe9ad 3637 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 3638 return -EIO;
b411b363
PR
3639}
3640
b411b363 3641/* warn if the arguments differ by more than 12.5% */
b30ab791 3642static void warn_if_differ_considerably(struct drbd_device *device,
b411b363
PR
3643 const char *s, sector_t a, sector_t b)
3644{
3645 sector_t d;
3646 if (a == 0 || b == 0)
3647 return;
3648 d = (a > b) ? (a - b) : (b - a);
3649 if (d > (a>>3) || d > (b>>3))
d0180171 3650 drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
b411b363
PR
3651 (unsigned long long)a, (unsigned long long)b);
3652}
3653
bde89a9e 3654static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3655{
9f4fe9ad 3656 struct drbd_peer_device *peer_device;
b30ab791 3657 struct drbd_device *device;
e658983a 3658 struct p_sizes *p = pi->data;
e96c9633 3659 enum determine_dev_size dd = DS_UNCHANGED;
b411b363
PR
3660 sector_t p_size, p_usize, my_usize;
3661 int ldsc = 0; /* local disk size changed */
e89b591c 3662 enum dds_flags ddsf;
b411b363 3663
9f4fe9ad
AG
3664 peer_device = conn_peer_device(connection, pi->vnr);
3665 if (!peer_device)
bde89a9e 3666 return config_unknown_volume(connection, pi);
9f4fe9ad 3667 device = peer_device->device;
4a76b161 3668
b411b363
PR
3669 p_size = be64_to_cpu(p->d_size);
3670 p_usize = be64_to_cpu(p->u_size);
3671
b411b363
PR
3672 /* just store the peer's disk size for now.
3673 * we still need to figure out whether we accept that. */
b30ab791 3674 device->p_size = p_size;
b411b363 3675
b30ab791 3676 if (get_ldev(device)) {
daeda1cc 3677 rcu_read_lock();
b30ab791 3678 my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
daeda1cc
PR
3679 rcu_read_unlock();
3680
b30ab791
AG
3681 warn_if_differ_considerably(device, "lower level device sizes",
3682 p_size, drbd_get_max_capacity(device->ldev));
3683 warn_if_differ_considerably(device, "user requested size",
daeda1cc 3684 p_usize, my_usize);
b411b363
PR
3685
3686 /* if this is the first connect, or an otherwise expected
3687 * param exchange, choose the minimum */
b30ab791 3688 if (device->state.conn == C_WF_REPORT_PARAMS)
daeda1cc 3689 p_usize = min_not_zero(my_usize, p_usize);
b411b363
PR
3690
3691 /* Never shrink a device with usable data during connect.
3692 But allow online shrinking if we are connected. */
b30ab791
AG
3693 if (drbd_new_dev_size(device, device->ldev, p_usize, 0) <
3694 drbd_get_capacity(device->this_bdev) &&
3695 device->state.disk >= D_OUTDATED &&
3696 device->state.conn < C_CONNECTED) {
d0180171 3697 drbd_err(device, "The peer's disk size is too small!\n");
9f4fe9ad 3698 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
b30ab791 3699 put_ldev(device);
82bc0194 3700 return -EIO;
b411b363 3701 }
daeda1cc
PR
3702
3703 if (my_usize != p_usize) {
3704 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3705
3706 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3707 if (!new_disk_conf) {
d0180171 3708 drbd_err(device, "Allocation of new disk_conf failed\n");
b30ab791 3709 put_ldev(device);
daeda1cc
PR
3710 return -ENOMEM;
3711 }
3712
0500813f 3713 mutex_lock(&connection->resource->conf_update);
b30ab791 3714 old_disk_conf = device->ldev->disk_conf;
daeda1cc
PR
3715 *new_disk_conf = *old_disk_conf;
3716 new_disk_conf->disk_size = p_usize;
3717
b30ab791 3718 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
0500813f 3719 mutex_unlock(&connection->resource->conf_update);
daeda1cc
PR
3720 synchronize_rcu();
3721 kfree(old_disk_conf);
3722
d0180171 3723 drbd_info(device, "Peer sets u_size to %lu sectors\n",
daeda1cc 3724 (unsigned long)my_usize);
b411b363 3725 }
daeda1cc 3726
b30ab791 3727 put_ldev(device);
b411b363 3728 }
b411b363 3729
20c68fde 3730 device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
20c68fde
LE
3731 /* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size().
3732 In case we cleared the QUEUE_FLAG_DISCARD from our queue in
3733 drbd_reconsider_max_bio_size(), we can be sure that after
3734 drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
3735
e89b591c 3736 ddsf = be16_to_cpu(p->dds_flags);
b30ab791 3737 if (get_ldev(device)) {
8fe39aac 3738 drbd_reconsider_max_bio_size(device, device->ldev);
b30ab791
AG
3739 dd = drbd_determine_dev_size(device, ddsf, NULL);
3740 put_ldev(device);
e96c9633 3741 if (dd == DS_ERROR)
82bc0194 3742 return -EIO;
b30ab791 3743 drbd_md_sync(device);
b411b363
PR
3744 } else {
3745 /* I am diskless, need to accept the peer's size. */
8fe39aac 3746 drbd_reconsider_max_bio_size(device, NULL);
b30ab791 3747 drbd_set_my_capacity(device, p_size);
b411b363
PR
3748 }
3749
b30ab791
AG
3750 if (get_ldev(device)) {
3751 if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
3752 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
b411b363
PR
3753 ldsc = 1;
3754 }
3755
b30ab791 3756 put_ldev(device);
b411b363
PR
3757 }
3758
b30ab791 3759 if (device->state.conn > C_WF_REPORT_PARAMS) {
b411b363 3760 if (be64_to_cpu(p->c_size) !=
b30ab791 3761 drbd_get_capacity(device->this_bdev) || ldsc) {
b411b363
PR
3762 /* we have different sizes, probably peer
3763 * needs to know my new size... */
69a22773 3764 drbd_send_sizes(peer_device, 0, ddsf);
b411b363 3765 }
b30ab791
AG
3766 if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
3767 (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
3768 if (device->state.pdsk >= D_INCONSISTENT &&
3769 device->state.disk >= D_INCONSISTENT) {
e89b591c 3770 if (ddsf & DDSF_NO_RESYNC)
d0180171 3771 drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
e89b591c 3772 else
b30ab791 3773 resync_after_online_grow(device);
e89b591c 3774 } else
b30ab791 3775 set_bit(RESYNC_AFTER_NEG, &device->flags);
b411b363
PR
3776 }
3777 }
3778
82bc0194 3779 return 0;
b411b363
PR
3780}
3781
bde89a9e 3782static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3783{
9f4fe9ad 3784 struct drbd_peer_device *peer_device;
b30ab791 3785 struct drbd_device *device;
e658983a 3786 struct p_uuids *p = pi->data;
b411b363 3787 u64 *p_uuid;
62b0da3a 3788 int i, updated_uuids = 0;
b411b363 3789
9f4fe9ad
AG
3790 peer_device = conn_peer_device(connection, pi->vnr);
3791 if (!peer_device)
bde89a9e 3792 return config_unknown_volume(connection, pi);
9f4fe9ad 3793 device = peer_device->device;
4a76b161 3794
b411b363 3795 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
063eacf8 3796 if (!p_uuid) {
d0180171 3797 drbd_err(device, "kmalloc of p_uuid failed\n");
063eacf8
JW
3798 return false;
3799 }
b411b363
PR
3800
3801 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3802 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3803
b30ab791
AG
3804 kfree(device->p_uuid);
3805 device->p_uuid = p_uuid;
b411b363 3806
b30ab791
AG
3807 if (device->state.conn < C_CONNECTED &&
3808 device->state.disk < D_INCONSISTENT &&
3809 device->state.role == R_PRIMARY &&
3810 (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
d0180171 3811 drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
b30ab791 3812 (unsigned long long)device->ed_uuid);
9f4fe9ad 3813 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 3814 return -EIO;
b411b363
PR
3815 }
3816
b30ab791 3817 if (get_ldev(device)) {
b411b363 3818 int skip_initial_sync =
b30ab791 3819 device->state.conn == C_CONNECTED &&
9f4fe9ad 3820 peer_device->connection->agreed_pro_version >= 90 &&
b30ab791 3821 device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
b411b363
PR
3822 (p_uuid[UI_FLAGS] & 8);
3823 if (skip_initial_sync) {
d0180171 3824 drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
b30ab791 3825 drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
20ceb2b2
LE
3826 "clear_n_write from receive_uuids",
3827 BM_LOCKED_TEST_ALLOWED);
b30ab791
AG
3828 _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
3829 _drbd_uuid_set(device, UI_BITMAP, 0);
3830 _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
b411b363 3831 CS_VERBOSE, NULL);
b30ab791 3832 drbd_md_sync(device);
62b0da3a 3833 updated_uuids = 1;
b411b363 3834 }
b30ab791
AG
3835 put_ldev(device);
3836 } else if (device->state.disk < D_INCONSISTENT &&
3837 device->state.role == R_PRIMARY) {
18a50fa2
PR
3838 /* I am a diskless primary, the peer just created a new current UUID
3839 for me. */
b30ab791 3840 updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
b411b363
PR
3841 }
3842
3843 /* Before we test for the disk state, we should wait until an eventually
3844 ongoing cluster wide state change is finished. That is important if
3845 we are primary and are detaching from our disk. We need to see the
3846 new disk state... */
b30ab791
AG
3847 mutex_lock(device->state_mutex);
3848 mutex_unlock(device->state_mutex);
3849 if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
3850 updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
62b0da3a
LE
3851
3852 if (updated_uuids)
b30ab791 3853 drbd_print_uuids(device, "receiver updated UUIDs to");
b411b363 3854
82bc0194 3855 return 0;
b411b363
PR
3856}
3857
3858/**
3859 * convert_state() - Converts the peer's view of the cluster state to our point of view
3860 * @ps: The state as seen by the peer.
3861 */
3862static union drbd_state convert_state(union drbd_state ps)
3863{
3864 union drbd_state ms;
3865
3866 static enum drbd_conns c_tab[] = {
369bea63 3867 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
b411b363
PR
3868 [C_CONNECTED] = C_CONNECTED,
3869
3870 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3871 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3872 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3873 [C_VERIFY_S] = C_VERIFY_T,
3874 [C_MASK] = C_MASK,
3875 };
3876
3877 ms.i = ps.i;
3878
3879 ms.conn = c_tab[ps.conn];
3880 ms.peer = ps.role;
3881 ms.role = ps.peer;
3882 ms.pdsk = ps.disk;
3883 ms.disk = ps.pdsk;
3884 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3885
3886 return ms;
3887}
3888
bde89a9e 3889static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3890{
9f4fe9ad 3891 struct drbd_peer_device *peer_device;
b30ab791 3892 struct drbd_device *device;
e658983a 3893 struct p_req_state *p = pi->data;
b411b363 3894 union drbd_state mask, val;
bf885f8a 3895 enum drbd_state_rv rv;
b411b363 3896
9f4fe9ad
AG
3897 peer_device = conn_peer_device(connection, pi->vnr);
3898 if (!peer_device)
4a76b161 3899 return -EIO;
9f4fe9ad 3900 device = peer_device->device;
4a76b161 3901
b411b363
PR
3902 mask.i = be32_to_cpu(p->mask);
3903 val.i = be32_to_cpu(p->val);
3904
9f4fe9ad 3905 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
b30ab791 3906 mutex_is_locked(device->state_mutex)) {
69a22773 3907 drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
82bc0194 3908 return 0;
b411b363
PR
3909 }
3910
3911 mask = convert_state(mask);
3912 val = convert_state(val);
3913
b30ab791 3914 rv = drbd_change_state(device, CS_VERBOSE, mask, val);
69a22773 3915 drbd_send_sr_reply(peer_device, rv);
b411b363 3916
b30ab791 3917 drbd_md_sync(device);
b411b363 3918
82bc0194 3919 return 0;
b411b363
PR
3920}
3921
bde89a9e 3922static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3923{
e658983a 3924 struct p_req_state *p = pi->data;
b411b363 3925 union drbd_state mask, val;
bf885f8a 3926 enum drbd_state_rv rv;
b411b363 3927
b411b363
PR
3928 mask.i = be32_to_cpu(p->mask);
3929 val.i = be32_to_cpu(p->val);
3930
bde89a9e
AG
3931 if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
3932 mutex_is_locked(&connection->cstate_mutex)) {
3933 conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
82bc0194 3934 return 0;
b411b363
PR
3935 }
3936
3937 mask = convert_state(mask);
3938 val = convert_state(val);
3939
bde89a9e
AG
3940 rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
3941 conn_send_sr_reply(connection, rv);
b411b363 3942
82bc0194 3943 return 0;
b411b363
PR
3944}
3945
bde89a9e 3946static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3947{
9f4fe9ad 3948 struct drbd_peer_device *peer_device;
b30ab791 3949 struct drbd_device *device;
e658983a 3950 struct p_state *p = pi->data;
4ac4aada 3951 union drbd_state os, ns, peer_state;
b411b363 3952 enum drbd_disk_state real_peer_disk;
65d922c3 3953 enum chg_state_flags cs_flags;
b411b363
PR
3954 int rv;
3955
9f4fe9ad
AG
3956 peer_device = conn_peer_device(connection, pi->vnr);
3957 if (!peer_device)
bde89a9e 3958 return config_unknown_volume(connection, pi);
9f4fe9ad 3959 device = peer_device->device;
4a76b161 3960
b411b363
PR
3961 peer_state.i = be32_to_cpu(p->state);
3962
3963 real_peer_disk = peer_state.disk;
3964 if (peer_state.disk == D_NEGOTIATING) {
b30ab791 3965 real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
d0180171 3966 drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
b411b363
PR
3967 }
3968
0500813f 3969 spin_lock_irq(&device->resource->req_lock);
b411b363 3970 retry:
b30ab791 3971 os = ns = drbd_read_state(device);
0500813f 3972 spin_unlock_irq(&device->resource->req_lock);
b411b363 3973
545752d5
LE
3974 /* If some other part of the code (asender thread, timeout)
3975 * already decided to close the connection again,
3976 * we must not "re-establish" it here. */
3977 if (os.conn <= C_TEAR_DOWN)
58ffa580 3978 return -ECONNRESET;
545752d5 3979
40424e4a
LE
3980 /* If this is the "end of sync" confirmation, usually the peer disk
3981 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
3982 * set) resync started in PausedSyncT, or if the timing of pause-/
3983 * unpause-sync events has been "just right", the peer disk may
3984 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
3985 */
3986 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
3987 real_peer_disk == D_UP_TO_DATE &&
e9ef7bb6
LE
3988 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3989 /* If we are (becoming) SyncSource, but peer is still in sync
3990 * preparation, ignore its uptodate-ness to avoid flapping, it
3991 * will change to inconsistent once the peer reaches active
3992 * syncing states.
3993 * It may have changed syncer-paused flags, however, so we
3994 * cannot ignore this completely. */
3995 if (peer_state.conn > C_CONNECTED &&
3996 peer_state.conn < C_SYNC_SOURCE)
3997 real_peer_disk = D_INCONSISTENT;
3998
3999 /* if peer_state changes to connected at the same time,
4000 * it explicitly notifies us that it finished resync.
4001 * Maybe we should finish it up, too? */
4002 else if (os.conn >= C_SYNC_SOURCE &&
4003 peer_state.conn == C_CONNECTED) {
b30ab791
AG
4004 if (drbd_bm_total_weight(device) <= device->rs_failed)
4005 drbd_resync_finished(device);
82bc0194 4006 return 0;
e9ef7bb6
LE
4007 }
4008 }
4009
02b91b55
LE
4010 /* explicit verify finished notification, stop sector reached. */
4011 if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
4012 peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
b30ab791
AG
4013 ov_out_of_sync_print(device);
4014 drbd_resync_finished(device);
58ffa580 4015 return 0;
02b91b55
LE
4016 }
4017
e9ef7bb6
LE
4018 /* peer says his disk is inconsistent, while we think it is uptodate,
4019 * and this happens while the peer still thinks we have a sync going on,
4020 * but we think we are already done with the sync.
4021 * We ignore this to avoid flapping pdsk.
4022 * This should not happen, if the peer is a recent version of drbd. */
4023 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
4024 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
4025 real_peer_disk = D_UP_TO_DATE;
4026
4ac4aada
LE
4027 if (ns.conn == C_WF_REPORT_PARAMS)
4028 ns.conn = C_CONNECTED;
b411b363 4029
67531718
PR
4030 if (peer_state.conn == C_AHEAD)
4031 ns.conn = C_BEHIND;
4032
b30ab791
AG
4033 if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4034 get_ldev_if_state(device, D_NEGOTIATING)) {
b411b363
PR
4035 int cr; /* consider resync */
4036
4037 /* if we established a new connection */
4ac4aada 4038 cr = (os.conn < C_CONNECTED);
b411b363
PR
4039 /* if we had an established connection
4040 * and one of the nodes newly attaches a disk */
4ac4aada 4041 cr |= (os.conn == C_CONNECTED &&
b411b363 4042 (peer_state.disk == D_NEGOTIATING ||
4ac4aada 4043 os.disk == D_NEGOTIATING));
b411b363
PR
4044 /* if we have both been inconsistent, and the peer has been
4045 * forced to be UpToDate with --overwrite-data */
b30ab791 4046 cr |= test_bit(CONSIDER_RESYNC, &device->flags);
b411b363
PR
4047 /* if we had been plain connected, and the admin requested to
4048 * start a sync by "invalidate" or "invalidate-remote" */
4ac4aada 4049 cr |= (os.conn == C_CONNECTED &&
b411b363
PR
4050 (peer_state.conn >= C_STARTING_SYNC_S &&
4051 peer_state.conn <= C_WF_BITMAP_T));
4052
4053 if (cr)
69a22773 4054 ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
b411b363 4055
b30ab791 4056 put_ldev(device);
4ac4aada
LE
4057 if (ns.conn == C_MASK) {
4058 ns.conn = C_CONNECTED;
b30ab791
AG
4059 if (device->state.disk == D_NEGOTIATING) {
4060 drbd_force_state(device, NS(disk, D_FAILED));
b411b363 4061 } else if (peer_state.disk == D_NEGOTIATING) {
d0180171 4062 drbd_err(device, "Disk attach process on the peer node was aborted.\n");
b411b363 4063 peer_state.disk = D_DISKLESS;
580b9767 4064 real_peer_disk = D_DISKLESS;
b411b363 4065 } else {
9f4fe9ad 4066 if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
82bc0194 4067 return -EIO;
0b0ba1ef 4068 D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
9f4fe9ad 4069 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 4070 return -EIO;
b411b363
PR
4071 }
4072 }
4073 }
4074
0500813f 4075 spin_lock_irq(&device->resource->req_lock);
b30ab791 4076 if (os.i != drbd_read_state(device).i)
b411b363 4077 goto retry;
b30ab791 4078 clear_bit(CONSIDER_RESYNC, &device->flags);
b411b363
PR
4079 ns.peer = peer_state.role;
4080 ns.pdsk = real_peer_disk;
4081 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
4ac4aada 4082 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
b30ab791 4083 ns.disk = device->new_state_tmp.disk;
4ac4aada 4084 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
b30ab791
AG
4085 if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4086 test_bit(NEW_CUR_UUID, &device->flags)) {
8554df1c 4087 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
481c6f50 4088 for temporal network outages! */
0500813f 4089 spin_unlock_irq(&device->resource->req_lock);
d0180171 4090 drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
9f4fe9ad 4091 tl_clear(peer_device->connection);
b30ab791
AG
4092 drbd_uuid_new_current(device);
4093 clear_bit(NEW_CUR_UUID, &device->flags);
9f4fe9ad 4094 conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
82bc0194 4095 return -EIO;
481c6f50 4096 }
b30ab791
AG
4097 rv = _drbd_set_state(device, ns, cs_flags, NULL);
4098 ns = drbd_read_state(device);
0500813f 4099 spin_unlock_irq(&device->resource->req_lock);
b411b363
PR
4100
4101 if (rv < SS_SUCCESS) {
9f4fe9ad 4102 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 4103 return -EIO;
b411b363
PR
4104 }
4105
4ac4aada
LE
4106 if (os.conn > C_WF_REPORT_PARAMS) {
4107 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
b411b363
PR
4108 peer_state.disk != D_NEGOTIATING ) {
4109 /* we want resync, peer has not yet decided to sync... */
4110 /* Nowadays only used when forcing a node into primary role and
4111 setting its disk to UpToDate with that */
69a22773
AG
4112 drbd_send_uuids(peer_device);
4113 drbd_send_current_state(peer_device);
b411b363
PR
4114 }
4115 }
4116
b30ab791 4117 clear_bit(DISCARD_MY_DATA, &device->flags);
b411b363 4118
b30ab791 4119 drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
b411b363 4120
82bc0194 4121 return 0;
b411b363
PR
4122}
4123
bde89a9e 4124static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
b411b363 4125{
9f4fe9ad 4126 struct drbd_peer_device *peer_device;
b30ab791 4127 struct drbd_device *device;
e658983a 4128 struct p_rs_uuid *p = pi->data;
4a76b161 4129
9f4fe9ad
AG
4130 peer_device = conn_peer_device(connection, pi->vnr);
4131 if (!peer_device)
4a76b161 4132 return -EIO;
9f4fe9ad 4133 device = peer_device->device;
b411b363 4134
b30ab791
AG
4135 wait_event(device->misc_wait,
4136 device->state.conn == C_WF_SYNC_UUID ||
4137 device->state.conn == C_BEHIND ||
4138 device->state.conn < C_CONNECTED ||
4139 device->state.disk < D_NEGOTIATING);
b411b363 4140
0b0ba1ef 4141 /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */
b411b363 4142
b411b363
PR
4143 /* Here the _drbd_uuid_ functions are right, current should
4144 _not_ be rotated into the history */
b30ab791
AG
4145 if (get_ldev_if_state(device, D_NEGOTIATING)) {
4146 _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4147 _drbd_uuid_set(device, UI_BITMAP, 0UL);
b411b363 4148
b30ab791
AG
4149 drbd_print_uuids(device, "updated sync uuid");
4150 drbd_start_resync(device, C_SYNC_TARGET);
b411b363 4151
b30ab791 4152 put_ldev(device);
b411b363 4153 } else
d0180171 4154 drbd_err(device, "Ignoring SyncUUID packet!\n");
b411b363 4155
82bc0194 4156 return 0;
b411b363
PR
4157}
4158
2c46407d
AG
4159/**
4160 * receive_bitmap_plain
4161 *
4162 * Return 0 when done, 1 when another iteration is needed, and a negative error
4163 * code upon failure.
4164 */
4165static int
69a22773 4166receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
e658983a 4167 unsigned long *p, struct bm_xfer_ctx *c)
b411b363 4168{
50d0b1ad 4169 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
69a22773 4170 drbd_header_size(peer_device->connection);
e658983a 4171 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
50d0b1ad 4172 c->bm_words - c->word_offset);
e658983a 4173 unsigned int want = num_words * sizeof(*p);
2c46407d 4174 int err;
b411b363 4175
50d0b1ad 4176 if (want != size) {
69a22773 4177 drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
2c46407d 4178 return -EIO;
b411b363
PR
4179 }
4180 if (want == 0)
2c46407d 4181 return 0;
69a22773 4182 err = drbd_recv_all(peer_device->connection, p, want);
82bc0194 4183 if (err)
2c46407d 4184 return err;
b411b363 4185
69a22773 4186 drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
b411b363
PR
4187
4188 c->word_offset += num_words;
4189 c->bit_offset = c->word_offset * BITS_PER_LONG;
4190 if (c->bit_offset > c->bm_bits)
4191 c->bit_offset = c->bm_bits;
4192
2c46407d 4193 return 1;
b411b363
PR
4194}
4195
a02d1240
AG
4196static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4197{
4198 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4199}
4200
4201static int dcbp_get_start(struct p_compressed_bm *p)
4202{
4203 return (p->encoding & 0x80) != 0;
4204}
4205
4206static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4207{
4208 return (p->encoding >> 4) & 0x7;
4209}
4210
2c46407d
AG
4211/**
4212 * recv_bm_rle_bits
4213 *
4214 * Return 0 when done, 1 when another iteration is needed, and a negative error
4215 * code upon failure.
4216 */
4217static int
69a22773 4218recv_bm_rle_bits(struct drbd_peer_device *peer_device,
b411b363 4219 struct p_compressed_bm *p,
c6d25cfe
PR
4220 struct bm_xfer_ctx *c,
4221 unsigned int len)
b411b363
PR
4222{
4223 struct bitstream bs;
4224 u64 look_ahead;
4225 u64 rl;
4226 u64 tmp;
4227 unsigned long s = c->bit_offset;
4228 unsigned long e;
a02d1240 4229 int toggle = dcbp_get_start(p);
b411b363
PR
4230 int have;
4231 int bits;
4232
a02d1240 4233 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
b411b363
PR
4234
4235 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4236 if (bits < 0)
2c46407d 4237 return -EIO;
b411b363
PR
4238
4239 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4240 bits = vli_decode_bits(&rl, look_ahead);
4241 if (bits <= 0)
2c46407d 4242 return -EIO;
b411b363
PR
4243
4244 if (toggle) {
4245 e = s + rl -1;
4246 if (e >= c->bm_bits) {
69a22773 4247 drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
2c46407d 4248 return -EIO;
b411b363 4249 }
69a22773 4250 _drbd_bm_set_bits(peer_device->device, s, e);
b411b363
PR
4251 }
4252
4253 if (have < bits) {
69a22773 4254 drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
b411b363
PR
4255 have, bits, look_ahead,
4256 (unsigned int)(bs.cur.b - p->code),
4257 (unsigned int)bs.buf_len);
2c46407d 4258 return -EIO;
b411b363 4259 }
d2da5b0c
LE
4260 /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4261 if (likely(bits < 64))
4262 look_ahead >>= bits;
4263 else
4264 look_ahead = 0;
b411b363
PR
4265 have -= bits;
4266
4267 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4268 if (bits < 0)
2c46407d 4269 return -EIO;
b411b363
PR
4270 look_ahead |= tmp << have;
4271 have += bits;
4272 }
4273
4274 c->bit_offset = s;
4275 bm_xfer_ctx_bit_to_word_offset(c);
4276
2c46407d 4277 return (s != c->bm_bits);
b411b363
PR
4278}
4279
2c46407d
AG
4280/**
4281 * decode_bitmap_c
4282 *
4283 * Return 0 when done, 1 when another iteration is needed, and a negative error
4284 * code upon failure.
4285 */
4286static int
69a22773 4287decode_bitmap_c(struct drbd_peer_device *peer_device,
b411b363 4288 struct p_compressed_bm *p,
c6d25cfe
PR
4289 struct bm_xfer_ctx *c,
4290 unsigned int len)
b411b363 4291{
a02d1240 4292 if (dcbp_get_code(p) == RLE_VLI_Bits)
69a22773 4293 return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
b411b363
PR
4294
4295 /* other variants had been implemented for evaluation,
4296 * but have been dropped as this one turned out to be "best"
4297 * during all our tests. */
4298
69a22773
AG
4299 drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
4300 conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
2c46407d 4301 return -EIO;
b411b363
PR
4302}
4303
b30ab791 4304void INFO_bm_xfer_stats(struct drbd_device *device,
b411b363
PR
4305 const char *direction, struct bm_xfer_ctx *c)
4306{
4307 /* what would it take to transfer it "plaintext" */
a6b32bc3 4308 unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
50d0b1ad
AG
4309 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4310 unsigned int plain =
4311 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4312 c->bm_words * sizeof(unsigned long);
4313 unsigned int total = c->bytes[0] + c->bytes[1];
4314 unsigned int r;
b411b363
PR
4315
4316 /* total can not be zero. but just in case: */
4317 if (total == 0)
4318 return;
4319
4320 /* don't report if not compressed */
4321 if (total >= plain)
4322 return;
4323
4324 /* total < plain. check for overflow, still */
4325 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4326 : (1000 * total / plain);
4327
4328 if (r > 1000)
4329 r = 1000;
4330
4331 r = 1000 - r;
d0180171 4332 drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
b411b363
PR
4333 "total %u; compression: %u.%u%%\n",
4334 direction,
4335 c->bytes[1], c->packets[1],
4336 c->bytes[0], c->packets[0],
4337 total, r/10, r % 10);
4338}
4339
4340/* Since we are processing the bitfield from lower addresses to higher,
4341 it does not matter if the process it in 32 bit chunks or 64 bit
4342 chunks as long as it is little endian. (Understand it as byte stream,
4343 beginning with the lowest byte...) If we would use big endian
4344 we would need to process it from the highest address to the lowest,
4345 in order to be agnostic to the 32 vs 64 bits issue.
4346
4347 returns 0 on failure, 1 if we successfully received it. */
bde89a9e 4348static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
b411b363 4349{
9f4fe9ad 4350 struct drbd_peer_device *peer_device;
b30ab791 4351 struct drbd_device *device;
b411b363 4352 struct bm_xfer_ctx c;
2c46407d 4353 int err;
4a76b161 4354
9f4fe9ad
AG
4355 peer_device = conn_peer_device(connection, pi->vnr);
4356 if (!peer_device)
4a76b161 4357 return -EIO;
9f4fe9ad 4358 device = peer_device->device;
b411b363 4359
b30ab791 4360 drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
20ceb2b2
LE
4361 /* you are supposed to send additional out-of-sync information
4362 * if you actually set bits during this phase */
b411b363 4363
b411b363 4364 c = (struct bm_xfer_ctx) {
b30ab791
AG
4365 .bm_bits = drbd_bm_bits(device),
4366 .bm_words = drbd_bm_words(device),
b411b363
PR
4367 };
4368
2c46407d 4369 for(;;) {
e658983a 4370 if (pi->cmd == P_BITMAP)
69a22773 4371 err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
e658983a 4372 else if (pi->cmd == P_COMPRESSED_BITMAP) {
b411b363
PR
4373 /* MAYBE: sanity check that we speak proto >= 90,
4374 * and the feature is enabled! */
e658983a 4375 struct p_compressed_bm *p = pi->data;
b411b363 4376
bde89a9e 4377 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
d0180171 4378 drbd_err(device, "ReportCBitmap packet too large\n");
82bc0194 4379 err = -EIO;
b411b363
PR
4380 goto out;
4381 }
e658983a 4382 if (pi->size <= sizeof(*p)) {
d0180171 4383 drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
82bc0194 4384 err = -EIO;
78fcbdae 4385 goto out;
b411b363 4386 }
9f4fe9ad 4387 err = drbd_recv_all(peer_device->connection, p, pi->size);
e658983a
AG
4388 if (err)
4389 goto out;
69a22773 4390 err = decode_bitmap_c(peer_device, p, &c, pi->size);
b411b363 4391 } else {
d0180171 4392 drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
82bc0194 4393 err = -EIO;
b411b363
PR
4394 goto out;
4395 }
4396
e2857216 4397 c.packets[pi->cmd == P_BITMAP]++;
bde89a9e 4398 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
b411b363 4399
2c46407d
AG
4400 if (err <= 0) {
4401 if (err < 0)
4402 goto out;
b411b363 4403 break;
2c46407d 4404 }
9f4fe9ad 4405 err = drbd_recv_header(peer_device->connection, pi);
82bc0194 4406 if (err)
b411b363 4407 goto out;
2c46407d 4408 }
b411b363 4409
b30ab791 4410 INFO_bm_xfer_stats(device, "receive", &c);
b411b363 4411
b30ab791 4412 if (device->state.conn == C_WF_BITMAP_T) {
de1f8e4a
AG
4413 enum drbd_state_rv rv;
4414
b30ab791 4415 err = drbd_send_bitmap(device);
82bc0194 4416 if (err)
b411b363
PR
4417 goto out;
4418 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
b30ab791 4419 rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
0b0ba1ef 4420 D_ASSERT(device, rv == SS_SUCCESS);
b30ab791 4421 } else if (device->state.conn != C_WF_BITMAP_S) {
b411b363
PR
4422 /* admin may have requested C_DISCONNECTING,
4423 * other threads may have noticed network errors */
d0180171 4424 drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
b30ab791 4425 drbd_conn_str(device->state.conn));
b411b363 4426 }
82bc0194 4427 err = 0;
b411b363 4428
b411b363 4429 out:
b30ab791
AG
4430 drbd_bm_unlock(device);
4431 if (!err && device->state.conn == C_WF_BITMAP_S)
4432 drbd_start_resync(device, C_SYNC_SOURCE);
82bc0194 4433 return err;
b411b363
PR
4434}
4435
bde89a9e 4436static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
b411b363 4437{
1ec861eb 4438 drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
e2857216 4439 pi->cmd, pi->size);
b411b363 4440
bde89a9e 4441 return ignore_remaining_packet(connection, pi);
b411b363
PR
4442}
4443
bde89a9e 4444static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
0ced55a3 4445{
e7f52dfb
LE
4446 /* Make sure we've acked all the TCP data associated
4447 * with the data requests being unplugged */
bde89a9e 4448 drbd_tcp_quickack(connection->data.socket);
0ced55a3 4449
82bc0194 4450 return 0;
0ced55a3
PR
4451}
4452
bde89a9e 4453static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
73a01a18 4454{
9f4fe9ad 4455 struct drbd_peer_device *peer_device;
b30ab791 4456 struct drbd_device *device;
e658983a 4457 struct p_block_desc *p = pi->data;
4a76b161 4458
9f4fe9ad
AG
4459 peer_device = conn_peer_device(connection, pi->vnr);
4460 if (!peer_device)
4a76b161 4461 return -EIO;
9f4fe9ad 4462 device = peer_device->device;
73a01a18 4463
b30ab791 4464 switch (device->state.conn) {
f735e363
LE
4465 case C_WF_SYNC_UUID:
4466 case C_WF_BITMAP_T:
4467 case C_BEHIND:
4468 break;
4469 default:
d0180171 4470 drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
b30ab791 4471 drbd_conn_str(device->state.conn));
f735e363
LE
4472 }
4473
b30ab791 4474 drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
73a01a18 4475
82bc0194 4476 return 0;
73a01a18
PR
4477}
4478
02918be2
PR
4479struct data_cmd {
4480 int expect_payload;
4481 size_t pkt_size;
bde89a9e 4482 int (*fn)(struct drbd_connection *, struct packet_info *);
02918be2
PR
4483};
4484
4485static struct data_cmd drbd_cmd_handler[] = {
4486 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4487 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4488 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4489 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
e658983a
AG
4490 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4491 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4492 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
02918be2
PR
4493 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4494 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
e658983a
AG
4495 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4496 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
02918be2
PR
4497 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4498 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4499 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4500 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4501 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4502 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4503 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4504 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4505 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4506 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
73a01a18 4507 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
4a76b161 4508 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
036b17ea 4509 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
a0fb3c47 4510 [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data },
b411b363
PR
4511};
4512
bde89a9e 4513static void drbdd(struct drbd_connection *connection)
b411b363 4514{
77351055 4515 struct packet_info pi;
02918be2 4516 size_t shs; /* sub header size */
82bc0194 4517 int err;
b411b363 4518
bde89a9e 4519 while (get_t_state(&connection->receiver) == RUNNING) {
deebe195 4520 struct data_cmd *cmd;
b411b363 4521
bde89a9e
AG
4522 drbd_thread_current_set_cpu(&connection->receiver);
4523 if (drbd_recv_header(connection, &pi))
02918be2 4524 goto err_out;
b411b363 4525
deebe195 4526 cmd = &drbd_cmd_handler[pi.cmd];
4a76b161 4527 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
1ec861eb 4528 drbd_err(connection, "Unexpected data packet %s (0x%04x)",
2fcb8f30 4529 cmdname(pi.cmd), pi.cmd);
02918be2 4530 goto err_out;
0b33a916 4531 }
b411b363 4532
e658983a
AG
4533 shs = cmd->pkt_size;
4534 if (pi.size > shs && !cmd->expect_payload) {
1ec861eb 4535 drbd_err(connection, "No payload expected %s l:%d\n",
2fcb8f30 4536 cmdname(pi.cmd), pi.size);
02918be2 4537 goto err_out;
b411b363 4538 }
b411b363 4539
c13f7e1a 4540 if (shs) {
bde89a9e 4541 err = drbd_recv_all_warn(connection, pi.data, shs);
a5c31904 4542 if (err)
c13f7e1a 4543 goto err_out;
e2857216 4544 pi.size -= shs;
c13f7e1a
LE
4545 }
4546
bde89a9e 4547 err = cmd->fn(connection, &pi);
4a76b161 4548 if (err) {
1ec861eb 4549 drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
9f5bdc33 4550 cmdname(pi.cmd), err, pi.size);
02918be2 4551 goto err_out;
b411b363
PR
4552 }
4553 }
82bc0194 4554 return;
b411b363 4555
82bc0194 4556 err_out:
bde89a9e 4557 conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
b411b363
PR
4558}
4559
bde89a9e 4560static void conn_disconnect(struct drbd_connection *connection)
b411b363 4561{
c06ece6b 4562 struct drbd_peer_device *peer_device;
bbeb641c 4563 enum drbd_conns oc;
376694a0 4564 int vnr;
b411b363 4565
bde89a9e 4566 if (connection->cstate == C_STANDALONE)
b411b363 4567 return;
b411b363 4568
545752d5
LE
4569 /* We are about to start the cleanup after connection loss.
4570 * Make sure drbd_make_request knows about that.
4571 * Usually we should be in some network failure state already,
4572 * but just in case we are not, we fix it up here.
4573 */
bde89a9e 4574 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
545752d5 4575
b411b363 4576 /* asender does not clean up anything. it must not interfere, either */
bde89a9e
AG
4577 drbd_thread_stop(&connection->asender);
4578 drbd_free_sock(connection);
360cc740 4579
c141ebda 4580 rcu_read_lock();
c06ece6b
AG
4581 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
4582 struct drbd_device *device = peer_device->device;
b30ab791 4583 kref_get(&device->kref);
c141ebda 4584 rcu_read_unlock();
69a22773 4585 drbd_disconnected(peer_device);
c06ece6b 4586 kref_put(&device->kref, drbd_destroy_device);
c141ebda
PR
4587 rcu_read_lock();
4588 }
4589 rcu_read_unlock();
4590
bde89a9e 4591 if (!list_empty(&connection->current_epoch->list))
1ec861eb 4592 drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
12038a3a 4593 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
bde89a9e
AG
4594 atomic_set(&connection->current_epoch->epoch_size, 0);
4595 connection->send.seen_any_write_yet = false;
12038a3a 4596
1ec861eb 4597 drbd_info(connection, "Connection closed\n");
360cc740 4598
bde89a9e
AG
4599 if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
4600 conn_try_outdate_peer_async(connection);
cb703454 4601
0500813f 4602 spin_lock_irq(&connection->resource->req_lock);
bde89a9e 4603 oc = connection->cstate;
bbeb641c 4604 if (oc >= C_UNCONNECTED)
bde89a9e 4605 _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
bbeb641c 4606
0500813f 4607 spin_unlock_irq(&connection->resource->req_lock);
360cc740 4608
f3dfa40a 4609 if (oc == C_DISCONNECTING)
bde89a9e 4610 conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
360cc740
PR
4611}
4612
69a22773 4613static int drbd_disconnected(struct drbd_peer_device *peer_device)
360cc740 4614{
69a22773 4615 struct drbd_device *device = peer_device->device;
360cc740 4616 unsigned int i;
b411b363 4617
85719573 4618 /* wait for current activity to cease. */
0500813f 4619 spin_lock_irq(&device->resource->req_lock);
b30ab791
AG
4620 _drbd_wait_ee_list_empty(device, &device->active_ee);
4621 _drbd_wait_ee_list_empty(device, &device->sync_ee);
4622 _drbd_wait_ee_list_empty(device, &device->read_ee);
0500813f 4623 spin_unlock_irq(&device->resource->req_lock);
b411b363
PR
4624
4625 /* We do not have data structures that would allow us to
4626 * get the rs_pending_cnt down to 0 again.
4627 * * On C_SYNC_TARGET we do not have any data structures describing
4628 * the pending RSDataRequest's we have sent.
4629 * * On C_SYNC_SOURCE there is no data structure that tracks
4630 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4631 * And no, it is not the sum of the reference counts in the
4632 * resync_LRU. The resync_LRU tracks the whole operation including
4633 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4634 * on the fly. */
b30ab791
AG
4635 drbd_rs_cancel_all(device);
4636 device->rs_total = 0;
4637 device->rs_failed = 0;
4638 atomic_set(&device->rs_pending_cnt, 0);
4639 wake_up(&device->misc_wait);
b411b363 4640
b30ab791
AG
4641 del_timer_sync(&device->resync_timer);
4642 resync_timer_fn((unsigned long)device);
b411b363 4643
b411b363
PR
4644 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4645 * w_make_resync_request etc. which may still be on the worker queue
4646 * to be "canceled" */
b5043c5e 4647 drbd_flush_workqueue(&peer_device->connection->sender_work);
b411b363 4648
b30ab791 4649 drbd_finish_peer_reqs(device);
b411b363 4650
d10b4ea3
PR
4651 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4652 might have issued a work again. The one before drbd_finish_peer_reqs() is
4653 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
b5043c5e 4654 drbd_flush_workqueue(&peer_device->connection->sender_work);
d10b4ea3 4655
08332d73
LE
4656 /* need to do it again, drbd_finish_peer_reqs() may have populated it
4657 * again via drbd_try_clear_on_disk_bm(). */
b30ab791 4658 drbd_rs_cancel_all(device);
b411b363 4659
b30ab791
AG
4660 kfree(device->p_uuid);
4661 device->p_uuid = NULL;
b411b363 4662
b30ab791 4663 if (!drbd_suspended(device))
69a22773 4664 tl_clear(peer_device->connection);
b411b363 4665
b30ab791 4666 drbd_md_sync(device);
b411b363 4667
20ceb2b2
LE
4668 /* serialize with bitmap writeout triggered by the state change,
4669 * if any. */
b30ab791 4670 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
20ceb2b2 4671
b411b363
PR
4672 /* tcp_close and release of sendpage pages can be deferred. I don't
4673 * want to use SO_LINGER, because apparently it can be deferred for
4674 * more than 20 seconds (longest time I checked).
4675 *
4676 * Actually we don't care for exactly when the network stack does its
4677 * put_page(), but release our reference on these pages right here.
4678 */
b30ab791 4679 i = drbd_free_peer_reqs(device, &device->net_ee);
b411b363 4680 if (i)
d0180171 4681 drbd_info(device, "net_ee not empty, killed %u entries\n", i);
b30ab791 4682 i = atomic_read(&device->pp_in_use_by_net);
435f0740 4683 if (i)
d0180171 4684 drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
b30ab791 4685 i = atomic_read(&device->pp_in_use);
b411b363 4686 if (i)
d0180171 4687 drbd_info(device, "pp_in_use = %d, expected 0\n", i);
b411b363 4688
0b0ba1ef
AG
4689 D_ASSERT(device, list_empty(&device->read_ee));
4690 D_ASSERT(device, list_empty(&device->active_ee));
4691 D_ASSERT(device, list_empty(&device->sync_ee));
4692 D_ASSERT(device, list_empty(&device->done_ee));
b411b363 4693
360cc740 4694 return 0;
b411b363
PR
4695}
4696
4697/*
4698 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4699 * we can agree on is stored in agreed_pro_version.
4700 *
4701 * feature flags and the reserved array should be enough room for future
4702 * enhancements of the handshake protocol, and possible plugins...
4703 *
4704 * for now, they are expected to be zero, but ignored.
4705 */
bde89a9e 4706static int drbd_send_features(struct drbd_connection *connection)
b411b363 4707{
9f5bdc33
AG
4708 struct drbd_socket *sock;
4709 struct p_connection_features *p;
b411b363 4710
bde89a9e
AG
4711 sock = &connection->data;
4712 p = conn_prepare_command(connection, sock);
9f5bdc33 4713 if (!p)
e8d17b01 4714 return -EIO;
b411b363
PR
4715 memset(p, 0, sizeof(*p));
4716 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4717 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
20c68fde 4718 p->feature_flags = cpu_to_be32(PRO_FEATURES);
bde89a9e 4719 return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
b411b363
PR
4720}
4721
4722/*
4723 * return values:
4724 * 1 yes, we have a valid connection
4725 * 0 oops, did not work out, please try again
4726 * -1 peer talks different language,
4727 * no point in trying again, please go standalone.
4728 */
bde89a9e 4729static int drbd_do_features(struct drbd_connection *connection)
b411b363 4730{
bde89a9e 4731 /* ASSERT current == connection->receiver ... */
e658983a
AG
4732 struct p_connection_features *p;
4733 const int expect = sizeof(struct p_connection_features);
77351055 4734 struct packet_info pi;
a5c31904 4735 int err;
b411b363 4736
bde89a9e 4737 err = drbd_send_features(connection);
e8d17b01 4738 if (err)
b411b363
PR
4739 return 0;
4740
bde89a9e 4741 err = drbd_recv_header(connection, &pi);
69bc7bc3 4742 if (err)
b411b363
PR
4743 return 0;
4744
6038178e 4745 if (pi.cmd != P_CONNECTION_FEATURES) {
1ec861eb 4746 drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
2fcb8f30 4747 cmdname(pi.cmd), pi.cmd);
b411b363
PR
4748 return -1;
4749 }
4750
77351055 4751 if (pi.size != expect) {
1ec861eb 4752 drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
77351055 4753 expect, pi.size);
b411b363
PR
4754 return -1;
4755 }
4756
e658983a 4757 p = pi.data;
bde89a9e 4758 err = drbd_recv_all_warn(connection, p, expect);
a5c31904 4759 if (err)
b411b363 4760 return 0;
b411b363 4761
b411b363
PR
4762 p->protocol_min = be32_to_cpu(p->protocol_min);
4763 p->protocol_max = be32_to_cpu(p->protocol_max);
4764 if (p->protocol_max == 0)
4765 p->protocol_max = p->protocol_min;
4766
4767 if (PRO_VERSION_MAX < p->protocol_min ||
4768 PRO_VERSION_MIN > p->protocol_max)
4769 goto incompat;
4770
bde89a9e 4771 connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
20c68fde 4772 connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
b411b363 4773
1ec861eb 4774 drbd_info(connection, "Handshake successful: "
bde89a9e 4775 "Agreed network protocol version %d\n", connection->agreed_pro_version);
b411b363 4776
20c68fde
LE
4777 drbd_info(connection, "Agreed to%ssupport TRIM on protocol level\n",
4778 connection->agreed_features & FF_TRIM ? " " : " not ");
4779
b411b363
PR
4780 return 1;
4781
4782 incompat:
1ec861eb 4783 drbd_err(connection, "incompatible DRBD dialects: "
b411b363
PR
4784 "I support %d-%d, peer supports %d-%d\n",
4785 PRO_VERSION_MIN, PRO_VERSION_MAX,
4786 p->protocol_min, p->protocol_max);
4787 return -1;
4788}
4789
4790#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
bde89a9e 4791static int drbd_do_auth(struct drbd_connection *connection)
b411b363 4792{
1ec861eb
AG
4793 drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4794 drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
b10d96cb 4795 return -1;
b411b363
PR
4796}
4797#else
4798#define CHALLENGE_LEN 64
b10d96cb
JT
4799
4800/* Return value:
4801 1 - auth succeeded,
4802 0 - failed, try again (network error),
4803 -1 - auth failed, don't try again.
4804*/
4805
bde89a9e 4806static int drbd_do_auth(struct drbd_connection *connection)
b411b363 4807{
9f5bdc33 4808 struct drbd_socket *sock;
b411b363
PR
4809 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4810 struct scatterlist sg;
4811 char *response = NULL;
4812 char *right_response = NULL;
4813 char *peers_ch = NULL;
44ed167d
PR
4814 unsigned int key_len;
4815 char secret[SHARED_SECRET_MAX]; /* 64 byte */
b411b363
PR
4816 unsigned int resp_size;
4817 struct hash_desc desc;
77351055 4818 struct packet_info pi;
44ed167d 4819 struct net_conf *nc;
69bc7bc3 4820 int err, rv;
b411b363 4821
9f5bdc33 4822 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
b411b363 4823
44ed167d 4824 rcu_read_lock();
bde89a9e 4825 nc = rcu_dereference(connection->net_conf);
44ed167d
PR
4826 key_len = strlen(nc->shared_secret);
4827 memcpy(secret, nc->shared_secret, key_len);
4828 rcu_read_unlock();
4829
bde89a9e 4830 desc.tfm = connection->cram_hmac_tfm;
b411b363
PR
4831 desc.flags = 0;
4832
bde89a9e 4833 rv = crypto_hash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
b411b363 4834 if (rv) {
1ec861eb 4835 drbd_err(connection, "crypto_hash_setkey() failed with %d\n", rv);
b10d96cb 4836 rv = -1;
b411b363
PR
4837 goto fail;
4838 }
4839
4840 get_random_bytes(my_challenge, CHALLENGE_LEN);
4841
bde89a9e
AG
4842 sock = &connection->data;
4843 if (!conn_prepare_command(connection, sock)) {
9f5bdc33
AG
4844 rv = 0;
4845 goto fail;
4846 }
bde89a9e 4847 rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
9f5bdc33 4848 my_challenge, CHALLENGE_LEN);
b411b363
PR
4849 if (!rv)
4850 goto fail;
4851
bde89a9e 4852 err = drbd_recv_header(connection, &pi);
69bc7bc3
AG
4853 if (err) {
4854 rv = 0;
b411b363 4855 goto fail;
69bc7bc3 4856 }
b411b363 4857
77351055 4858 if (pi.cmd != P_AUTH_CHALLENGE) {
1ec861eb 4859 drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
2fcb8f30 4860 cmdname(pi.cmd), pi.cmd);
b411b363
PR
4861 rv = 0;
4862 goto fail;
4863 }
4864
77351055 4865 if (pi.size > CHALLENGE_LEN * 2) {
1ec861eb 4866 drbd_err(connection, "expected AuthChallenge payload too big.\n");
b10d96cb 4867 rv = -1;
b411b363
PR
4868 goto fail;
4869 }
4870
67cca286
PR
4871 if (pi.size < CHALLENGE_LEN) {
4872 drbd_err(connection, "AuthChallenge payload too small.\n");
4873 rv = -1;
4874 goto fail;
4875 }
4876
77351055 4877 peers_ch = kmalloc(pi.size, GFP_NOIO);
b411b363 4878 if (peers_ch == NULL) {
1ec861eb 4879 drbd_err(connection, "kmalloc of peers_ch failed\n");
b10d96cb 4880 rv = -1;
b411b363
PR
4881 goto fail;
4882 }
4883
bde89a9e 4884 err = drbd_recv_all_warn(connection, peers_ch, pi.size);
a5c31904 4885 if (err) {
b411b363
PR
4886 rv = 0;
4887 goto fail;
4888 }
4889
67cca286
PR
4890 if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
4891 drbd_err(connection, "Peer presented the same challenge!\n");
4892 rv = -1;
4893 goto fail;
4894 }
4895
bde89a9e 4896 resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm);
b411b363
PR
4897 response = kmalloc(resp_size, GFP_NOIO);
4898 if (response == NULL) {
1ec861eb 4899 drbd_err(connection, "kmalloc of response failed\n");
b10d96cb 4900 rv = -1;
b411b363
PR
4901 goto fail;
4902 }
4903
4904 sg_init_table(&sg, 1);
77351055 4905 sg_set_buf(&sg, peers_ch, pi.size);
b411b363
PR
4906
4907 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4908 if (rv) {
1ec861eb 4909 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
b10d96cb 4910 rv = -1;
b411b363
PR
4911 goto fail;
4912 }
4913
bde89a9e 4914 if (!conn_prepare_command(connection, sock)) {
9f5bdc33 4915 rv = 0;
b411b363 4916 goto fail;
9f5bdc33 4917 }
bde89a9e 4918 rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
9f5bdc33 4919 response, resp_size);
b411b363
PR
4920 if (!rv)
4921 goto fail;
4922
bde89a9e 4923 err = drbd_recv_header(connection, &pi);
69bc7bc3 4924 if (err) {
b411b363
PR
4925 rv = 0;
4926 goto fail;
4927 }
4928
77351055 4929 if (pi.cmd != P_AUTH_RESPONSE) {
1ec861eb 4930 drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
2fcb8f30 4931 cmdname(pi.cmd), pi.cmd);
b411b363
PR
4932 rv = 0;
4933 goto fail;
4934 }
4935
77351055 4936 if (pi.size != resp_size) {
1ec861eb 4937 drbd_err(connection, "expected AuthResponse payload of wrong size\n");
b411b363
PR
4938 rv = 0;
4939 goto fail;
4940 }
b411b363 4941
bde89a9e 4942 err = drbd_recv_all_warn(connection, response , resp_size);
a5c31904 4943 if (err) {
b411b363
PR
4944 rv = 0;
4945 goto fail;
4946 }
4947
4948 right_response = kmalloc(resp_size, GFP_NOIO);
2d1ee87d 4949 if (right_response == NULL) {
1ec861eb 4950 drbd_err(connection, "kmalloc of right_response failed\n");
b10d96cb 4951 rv = -1;
b411b363
PR
4952 goto fail;
4953 }
4954
4955 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4956
4957 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4958 if (rv) {
1ec861eb 4959 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
b10d96cb 4960 rv = -1;
b411b363
PR
4961 goto fail;
4962 }
4963
4964 rv = !memcmp(response, right_response, resp_size);
4965
4966 if (rv)
1ec861eb 4967 drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
44ed167d 4968 resp_size);
b10d96cb
JT
4969 else
4970 rv = -1;
b411b363
PR
4971
4972 fail:
4973 kfree(peers_ch);
4974 kfree(response);
4975 kfree(right_response);
4976
4977 return rv;
4978}
4979#endif
4980
8fe60551 4981int drbd_receiver(struct drbd_thread *thi)
b411b363 4982{
bde89a9e 4983 struct drbd_connection *connection = thi->connection;
b411b363
PR
4984 int h;
4985
1ec861eb 4986 drbd_info(connection, "receiver (re)started\n");
b411b363
PR
4987
4988 do {
bde89a9e 4989 h = conn_connect(connection);
b411b363 4990 if (h == 0) {
bde89a9e 4991 conn_disconnect(connection);
20ee6390 4992 schedule_timeout_interruptible(HZ);
b411b363
PR
4993 }
4994 if (h == -1) {
1ec861eb 4995 drbd_warn(connection, "Discarding network configuration.\n");
bde89a9e 4996 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363
PR
4997 }
4998 } while (h == 0);
4999
91fd4dad 5000 if (h > 0)
bde89a9e 5001 drbdd(connection);
b411b363 5002
bde89a9e 5003 conn_disconnect(connection);
b411b363 5004
1ec861eb 5005 drbd_info(connection, "receiver terminated\n");
b411b363
PR
5006 return 0;
5007}
5008
5009/* ********* acknowledge sender ******** */
5010
bde89a9e 5011static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5012{
e658983a 5013 struct p_req_state_reply *p = pi->data;
e4f78ede
PR
5014 int retcode = be32_to_cpu(p->retcode);
5015
5016 if (retcode >= SS_SUCCESS) {
bde89a9e 5017 set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
e4f78ede 5018 } else {
bde89a9e 5019 set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
1ec861eb 5020 drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
e4f78ede
PR
5021 drbd_set_st_err_str(retcode), retcode);
5022 }
bde89a9e 5023 wake_up(&connection->ping_wait);
e4f78ede 5024
2735a594 5025 return 0;
e4f78ede 5026}
b411b363 5027
bde89a9e 5028static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5029{
9f4fe9ad 5030 struct drbd_peer_device *peer_device;
b30ab791 5031 struct drbd_device *device;
e658983a 5032 struct p_req_state_reply *p = pi->data;
b411b363
PR
5033 int retcode = be32_to_cpu(p->retcode);
5034
9f4fe9ad
AG
5035 peer_device = conn_peer_device(connection, pi->vnr);
5036 if (!peer_device)
2735a594 5037 return -EIO;
9f4fe9ad 5038 device = peer_device->device;
1952e916 5039
bde89a9e 5040 if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
0b0ba1ef 5041 D_ASSERT(device, connection->agreed_pro_version < 100);
bde89a9e 5042 return got_conn_RqSReply(connection, pi);
4d0fc3fd
PR
5043 }
5044
b411b363 5045 if (retcode >= SS_SUCCESS) {
b30ab791 5046 set_bit(CL_ST_CHG_SUCCESS, &device->flags);
b411b363 5047 } else {
b30ab791 5048 set_bit(CL_ST_CHG_FAIL, &device->flags);
d0180171 5049 drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
e4f78ede 5050 drbd_set_st_err_str(retcode), retcode);
b411b363 5051 }
b30ab791 5052 wake_up(&device->state_wait);
b411b363 5053
2735a594 5054 return 0;
b411b363
PR
5055}
5056
bde89a9e 5057static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5058{
bde89a9e 5059 return drbd_send_ping_ack(connection);
b411b363
PR
5060
5061}
5062
bde89a9e 5063static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
b411b363
PR
5064{
5065 /* restore idle timeout */
bde89a9e
AG
5066 connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
5067 if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
5068 wake_up(&connection->ping_wait);
b411b363 5069
2735a594 5070 return 0;
b411b363
PR
5071}
5072
bde89a9e 5073static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5074{
9f4fe9ad 5075 struct drbd_peer_device *peer_device;
b30ab791 5076 struct drbd_device *device;
e658983a 5077 struct p_block_ack *p = pi->data;
b411b363
PR
5078 sector_t sector = be64_to_cpu(p->sector);
5079 int blksize = be32_to_cpu(p->blksize);
5080
9f4fe9ad
AG
5081 peer_device = conn_peer_device(connection, pi->vnr);
5082 if (!peer_device)
2735a594 5083 return -EIO;
9f4fe9ad 5084 device = peer_device->device;
1952e916 5085
9f4fe9ad 5086 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
b411b363 5087
69a22773 5088 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
b411b363 5089
b30ab791
AG
5090 if (get_ldev(device)) {
5091 drbd_rs_complete_io(device, sector);
5092 drbd_set_in_sync(device, sector, blksize);
1d53f09e 5093 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
b30ab791
AG
5094 device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5095 put_ldev(device);
1d53f09e 5096 }
b30ab791
AG
5097 dec_rs_pending(device);
5098 atomic_add(blksize >> 9, &device->rs_sect_in);
b411b363 5099
2735a594 5100 return 0;
b411b363
PR
5101}
5102
bc9c5c41 5103static int
b30ab791 5104validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
bc9c5c41
AG
5105 struct rb_root *root, const char *func,
5106 enum drbd_req_event what, bool missing_ok)
b411b363
PR
5107{
5108 struct drbd_request *req;
5109 struct bio_and_error m;
5110
0500813f 5111 spin_lock_irq(&device->resource->req_lock);
b30ab791 5112 req = find_request(device, root, id, sector, missing_ok, func);
b411b363 5113 if (unlikely(!req)) {
0500813f 5114 spin_unlock_irq(&device->resource->req_lock);
85997675 5115 return -EIO;
b411b363
PR
5116 }
5117 __req_mod(req, what, &m);
0500813f 5118 spin_unlock_irq(&device->resource->req_lock);
b411b363
PR
5119
5120 if (m.bio)
b30ab791 5121 complete_master_bio(device, &m);
85997675 5122 return 0;
b411b363
PR
5123}
5124
bde89a9e 5125static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5126{
9f4fe9ad 5127 struct drbd_peer_device *peer_device;
b30ab791 5128 struct drbd_device *device;
e658983a 5129 struct p_block_ack *p = pi->data;
b411b363
PR
5130 sector_t sector = be64_to_cpu(p->sector);
5131 int blksize = be32_to_cpu(p->blksize);
5132 enum drbd_req_event what;
5133
9f4fe9ad
AG
5134 peer_device = conn_peer_device(connection, pi->vnr);
5135 if (!peer_device)
2735a594 5136 return -EIO;
9f4fe9ad 5137 device = peer_device->device;
1952e916 5138
69a22773 5139 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
b411b363 5140
579b57ed 5141 if (p->block_id == ID_SYNCER) {
b30ab791
AG
5142 drbd_set_in_sync(device, sector, blksize);
5143 dec_rs_pending(device);
2735a594 5144 return 0;
b411b363 5145 }
e05e1e59 5146 switch (pi->cmd) {
b411b363 5147 case P_RS_WRITE_ACK:
8554df1c 5148 what = WRITE_ACKED_BY_PEER_AND_SIS;
b411b363
PR
5149 break;
5150 case P_WRITE_ACK:
8554df1c 5151 what = WRITE_ACKED_BY_PEER;
b411b363
PR
5152 break;
5153 case P_RECV_ACK:
8554df1c 5154 what = RECV_ACKED_BY_PEER;
b411b363 5155 break;
d4dabbe2
LE
5156 case P_SUPERSEDED:
5157 what = CONFLICT_RESOLVED;
b411b363 5158 break;
7be8da07 5159 case P_RETRY_WRITE:
7be8da07 5160 what = POSTPONE_WRITE;
b411b363
PR
5161 break;
5162 default:
2735a594 5163 BUG();
b411b363
PR
5164 }
5165
b30ab791
AG
5166 return validate_req_change_req_state(device, p->block_id, sector,
5167 &device->write_requests, __func__,
2735a594 5168 what, false);
b411b363
PR
5169}
5170
bde89a9e 5171static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5172{
9f4fe9ad 5173 struct drbd_peer_device *peer_device;
b30ab791 5174 struct drbd_device *device;
e658983a 5175 struct p_block_ack *p = pi->data;
b411b363 5176 sector_t sector = be64_to_cpu(p->sector);
2deb8336 5177 int size = be32_to_cpu(p->blksize);
85997675 5178 int err;
b411b363 5179
9f4fe9ad
AG
5180 peer_device = conn_peer_device(connection, pi->vnr);
5181 if (!peer_device)
2735a594 5182 return -EIO;
9f4fe9ad 5183 device = peer_device->device;
b411b363 5184
69a22773 5185 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
b411b363 5186
579b57ed 5187 if (p->block_id == ID_SYNCER) {
b30ab791
AG
5188 dec_rs_pending(device);
5189 drbd_rs_failed_io(device, sector, size);
2735a594 5190 return 0;
b411b363 5191 }
2deb8336 5192
b30ab791
AG
5193 err = validate_req_change_req_state(device, p->block_id, sector,
5194 &device->write_requests, __func__,
303d1448 5195 NEG_ACKED, true);
85997675 5196 if (err) {
c3afd8f5
AG
5197 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
5198 The master bio might already be completed, therefore the
5199 request is no longer in the collision hash. */
5200 /* In Protocol B we might already have got a P_RECV_ACK
5201 but then get a P_NEG_ACK afterwards. */
b30ab791 5202 drbd_set_out_of_sync(device, sector, size);
2deb8336 5203 }
2735a594 5204 return 0;
b411b363
PR
5205}
5206
bde89a9e 5207static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5208{
9f4fe9ad 5209 struct drbd_peer_device *peer_device;
b30ab791 5210 struct drbd_device *device;
e658983a 5211 struct p_block_ack *p = pi->data;
b411b363
PR
5212 sector_t sector = be64_to_cpu(p->sector);
5213
9f4fe9ad
AG
5214 peer_device = conn_peer_device(connection, pi->vnr);
5215 if (!peer_device)
2735a594 5216 return -EIO;
9f4fe9ad 5217 device = peer_device->device;
1952e916 5218
69a22773 5219 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
7be8da07 5220
d0180171 5221 drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
b411b363
PR
5222 (unsigned long long)sector, be32_to_cpu(p->blksize));
5223
b30ab791
AG
5224 return validate_req_change_req_state(device, p->block_id, sector,
5225 &device->read_requests, __func__,
2735a594 5226 NEG_ACKED, false);
b411b363
PR
5227}
5228
bde89a9e 5229static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5230{
9f4fe9ad 5231 struct drbd_peer_device *peer_device;
b30ab791 5232 struct drbd_device *device;
b411b363
PR
5233 sector_t sector;
5234 int size;
e658983a 5235 struct p_block_ack *p = pi->data;
1952e916 5236
9f4fe9ad
AG
5237 peer_device = conn_peer_device(connection, pi->vnr);
5238 if (!peer_device)
2735a594 5239 return -EIO;
9f4fe9ad 5240 device = peer_device->device;
b411b363
PR
5241
5242 sector = be64_to_cpu(p->sector);
5243 size = be32_to_cpu(p->blksize);
b411b363 5244
69a22773 5245 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
b411b363 5246
b30ab791 5247 dec_rs_pending(device);
b411b363 5248
b30ab791
AG
5249 if (get_ldev_if_state(device, D_FAILED)) {
5250 drbd_rs_complete_io(device, sector);
e05e1e59 5251 switch (pi->cmd) {
d612d309 5252 case P_NEG_RS_DREPLY:
b30ab791 5253 drbd_rs_failed_io(device, sector, size);
d612d309
PR
5254 case P_RS_CANCEL:
5255 break;
5256 default:
2735a594 5257 BUG();
d612d309 5258 }
b30ab791 5259 put_ldev(device);
b411b363
PR
5260 }
5261
2735a594 5262 return 0;
b411b363
PR
5263}
5264
bde89a9e 5265static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5266{
e658983a 5267 struct p_barrier_ack *p = pi->data;
c06ece6b 5268 struct drbd_peer_device *peer_device;
9ed57dcb 5269 int vnr;
1952e916 5270
bde89a9e 5271 tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
b411b363 5272
9ed57dcb 5273 rcu_read_lock();
c06ece6b
AG
5274 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5275 struct drbd_device *device = peer_device->device;
5276
b30ab791
AG
5277 if (device->state.conn == C_AHEAD &&
5278 atomic_read(&device->ap_in_flight) == 0 &&
5279 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5280 device->start_resync_timer.expires = jiffies + HZ;
5281 add_timer(&device->start_resync_timer);
9ed57dcb 5282 }
c4752ef1 5283 }
9ed57dcb 5284 rcu_read_unlock();
c4752ef1 5285
2735a594 5286 return 0;
b411b363
PR
5287}
5288
bde89a9e 5289static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5290{
9f4fe9ad 5291 struct drbd_peer_device *peer_device;
b30ab791 5292 struct drbd_device *device;
e658983a 5293 struct p_block_ack *p = pi->data;
84b8c06b 5294 struct drbd_device_work *dw;
b411b363
PR
5295 sector_t sector;
5296 int size;
5297
9f4fe9ad
AG
5298 peer_device = conn_peer_device(connection, pi->vnr);
5299 if (!peer_device)
2735a594 5300 return -EIO;
9f4fe9ad 5301 device = peer_device->device;
1952e916 5302
b411b363
PR
5303 sector = be64_to_cpu(p->sector);
5304 size = be32_to_cpu(p->blksize);
5305
69a22773 5306 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
b411b363
PR
5307
5308 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
b30ab791 5309 drbd_ov_out_of_sync_found(device, sector, size);
b411b363 5310 else
b30ab791 5311 ov_out_of_sync_print(device);
b411b363 5312
b30ab791 5313 if (!get_ldev(device))
2735a594 5314 return 0;
1d53f09e 5315
b30ab791
AG
5316 drbd_rs_complete_io(device, sector);
5317 dec_rs_pending(device);
b411b363 5318
b30ab791 5319 --device->ov_left;
ea5442af
LE
5320
5321 /* let's advance progress step marks only for every other megabyte */
b30ab791
AG
5322 if ((device->ov_left & 0x200) == 0x200)
5323 drbd_advance_rs_marks(device, device->ov_left);
ea5442af 5324
b30ab791 5325 if (device->ov_left == 0) {
84b8c06b
AG
5326 dw = kmalloc(sizeof(*dw), GFP_NOIO);
5327 if (dw) {
5328 dw->w.cb = w_ov_finished;
5329 dw->device = device;
5330 drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
b411b363 5331 } else {
84b8c06b 5332 drbd_err(device, "kmalloc(dw) failed.");
b30ab791
AG
5333 ov_out_of_sync_print(device);
5334 drbd_resync_finished(device);
b411b363
PR
5335 }
5336 }
b30ab791 5337 put_ldev(device);
2735a594 5338 return 0;
b411b363
PR
5339}
5340
bde89a9e 5341static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
0ced55a3 5342{
2735a594 5343 return 0;
b411b363
PR
5344}
5345
bde89a9e 5346static int connection_finish_peer_reqs(struct drbd_connection *connection)
0ced55a3 5347{
c06ece6b 5348 struct drbd_peer_device *peer_device;
c141ebda 5349 int vnr, not_empty = 0;
32862ec7
PR
5350
5351 do {
bde89a9e 5352 clear_bit(SIGNAL_ASENDER, &connection->flags);
32862ec7 5353 flush_signals(current);
c141ebda
PR
5354
5355 rcu_read_lock();
c06ece6b
AG
5356 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5357 struct drbd_device *device = peer_device->device;
b30ab791 5358 kref_get(&device->kref);
c141ebda 5359 rcu_read_unlock();
b30ab791 5360 if (drbd_finish_peer_reqs(device)) {
05a10ec7 5361 kref_put(&device->kref, drbd_destroy_device);
c141ebda 5362 return 1;
d3fcb490 5363 }
05a10ec7 5364 kref_put(&device->kref, drbd_destroy_device);
c141ebda 5365 rcu_read_lock();
082a3439 5366 }
bde89a9e 5367 set_bit(SIGNAL_ASENDER, &connection->flags);
082a3439 5368
0500813f 5369 spin_lock_irq(&connection->resource->req_lock);
c06ece6b
AG
5370 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5371 struct drbd_device *device = peer_device->device;
b30ab791 5372 not_empty = !list_empty(&device->done_ee);
082a3439
PR
5373 if (not_empty)
5374 break;
5375 }
0500813f 5376 spin_unlock_irq(&connection->resource->req_lock);
c141ebda 5377 rcu_read_unlock();
32862ec7
PR
5378 } while (not_empty);
5379
5380 return 0;
0ced55a3
PR
5381}
5382
b411b363
PR
5383struct asender_cmd {
5384 size_t pkt_size;
bde89a9e 5385 int (*fn)(struct drbd_connection *connection, struct packet_info *);
b411b363
PR
5386};
5387
7201b972 5388static struct asender_cmd asender_tbl[] = {
e658983a
AG
5389 [P_PING] = { 0, got_Ping },
5390 [P_PING_ACK] = { 0, got_PingAck },
b411b363
PR
5391 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5392 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5393 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
d4dabbe2 5394 [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck },
b411b363
PR
5395 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5396 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
1952e916 5397 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
b411b363
PR
5398 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5399 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5400 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5401 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
02918be2 5402 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
1952e916
AG
5403 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5404 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5405 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
7201b972 5406};
b411b363
PR
5407
5408int drbd_asender(struct drbd_thread *thi)
5409{
bde89a9e 5410 struct drbd_connection *connection = thi->connection;
b411b363 5411 struct asender_cmd *cmd = NULL;
77351055 5412 struct packet_info pi;
257d0af6 5413 int rv;
bde89a9e 5414 void *buf = connection->meta.rbuf;
b411b363 5415 int received = 0;
bde89a9e 5416 unsigned int header_size = drbd_header_size(connection);
52b061a4 5417 int expect = header_size;
44ed167d
PR
5418 bool ping_timeout_active = false;
5419 struct net_conf *nc;
bb77d34e 5420 int ping_timeo, tcp_cork, ping_int;
3990e04d 5421 struct sched_param param = { .sched_priority = 2 };
b411b363 5422
3990e04d
PR
5423 rv = sched_setscheduler(current, SCHED_RR, &param);
5424 if (rv < 0)
1ec861eb 5425 drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv);
b411b363 5426
e77a0a5c 5427 while (get_t_state(thi) == RUNNING) {
80822284 5428 drbd_thread_current_set_cpu(thi);
b411b363 5429
44ed167d 5430 rcu_read_lock();
bde89a9e 5431 nc = rcu_dereference(connection->net_conf);
44ed167d 5432 ping_timeo = nc->ping_timeo;
bb77d34e 5433 tcp_cork = nc->tcp_cork;
44ed167d
PR
5434 ping_int = nc->ping_int;
5435 rcu_read_unlock();
5436
bde89a9e
AG
5437 if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5438 if (drbd_send_ping(connection)) {
1ec861eb 5439 drbd_err(connection, "drbd_send_ping has failed\n");
b411b363 5440 goto reconnect;
841ce241 5441 }
bde89a9e 5442 connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
44ed167d 5443 ping_timeout_active = true;
b411b363
PR
5444 }
5445
32862ec7
PR
5446 /* TODO: conditionally cork; it may hurt latency if we cork without
5447 much to send */
bb77d34e 5448 if (tcp_cork)
bde89a9e
AG
5449 drbd_tcp_cork(connection->meta.socket);
5450 if (connection_finish_peer_reqs(connection)) {
1ec861eb 5451 drbd_err(connection, "connection_finish_peer_reqs() failed\n");
32862ec7 5452 goto reconnect;
b411b363
PR
5453 }
5454 /* but unconditionally uncork unless disabled */
bb77d34e 5455 if (tcp_cork)
bde89a9e 5456 drbd_tcp_uncork(connection->meta.socket);
b411b363
PR
5457
5458 /* short circuit, recv_msg would return EINTR anyways. */
5459 if (signal_pending(current))
5460 continue;
5461
bde89a9e
AG
5462 rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
5463 clear_bit(SIGNAL_ASENDER, &connection->flags);
b411b363
PR
5464
5465 flush_signals(current);
5466
5467 /* Note:
5468 * -EINTR (on meta) we got a signal
5469 * -EAGAIN (on meta) rcvtimeo expired
5470 * -ECONNRESET other side closed the connection
5471 * -ERESTARTSYS (on data) we got a signal
5472 * rv < 0 other than above: unexpected error!
5473 * rv == expected: full header or command
5474 * rv < expected: "woken" by signal during receive
5475 * rv == 0 : "connection shut down by peer"
5476 */
5477 if (likely(rv > 0)) {
5478 received += rv;
5479 buf += rv;
5480 } else if (rv == 0) {
bde89a9e 5481 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
b66623e3
PR
5482 long t;
5483 rcu_read_lock();
bde89a9e 5484 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
b66623e3
PR
5485 rcu_read_unlock();
5486
bde89a9e
AG
5487 t = wait_event_timeout(connection->ping_wait,
5488 connection->cstate < C_WF_REPORT_PARAMS,
b66623e3 5489 t);
599377ac
PR
5490 if (t)
5491 break;
5492 }
1ec861eb 5493 drbd_err(connection, "meta connection shut down by peer.\n");
b411b363
PR
5494 goto reconnect;
5495 } else if (rv == -EAGAIN) {
cb6518cb
LE
5496 /* If the data socket received something meanwhile,
5497 * that is good enough: peer is still alive. */
bde89a9e
AG
5498 if (time_after(connection->last_received,
5499 jiffies - connection->meta.socket->sk->sk_rcvtimeo))
cb6518cb 5500 continue;
f36af18c 5501 if (ping_timeout_active) {
1ec861eb 5502 drbd_err(connection, "PingAck did not arrive in time.\n");
b411b363
PR
5503 goto reconnect;
5504 }
bde89a9e 5505 set_bit(SEND_PING, &connection->flags);
b411b363
PR
5506 continue;
5507 } else if (rv == -EINTR) {
5508 continue;
5509 } else {
1ec861eb 5510 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
b411b363
PR
5511 goto reconnect;
5512 }
5513
5514 if (received == expect && cmd == NULL) {
bde89a9e 5515 if (decode_header(connection, connection->meta.rbuf, &pi))
b411b363 5516 goto reconnect;
7201b972 5517 cmd = &asender_tbl[pi.cmd];
1952e916 5518 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
1ec861eb 5519 drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
2fcb8f30 5520 cmdname(pi.cmd), pi.cmd);
b411b363
PR
5521 goto disconnect;
5522 }
e658983a 5523 expect = header_size + cmd->pkt_size;
52b061a4 5524 if (pi.size != expect - header_size) {
1ec861eb 5525 drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
77351055 5526 pi.cmd, pi.size);
b411b363 5527 goto reconnect;
257d0af6 5528 }
b411b363
PR
5529 }
5530 if (received == expect) {
2735a594 5531 bool err;
a4fbda8e 5532
bde89a9e 5533 err = cmd->fn(connection, &pi);
2735a594 5534 if (err) {
1ec861eb 5535 drbd_err(connection, "%pf failed\n", cmd->fn);
b411b363 5536 goto reconnect;
1952e916 5537 }
b411b363 5538
bde89a9e 5539 connection->last_received = jiffies;
f36af18c 5540
44ed167d
PR
5541 if (cmd == &asender_tbl[P_PING_ACK]) {
5542 /* restore idle timeout */
bde89a9e 5543 connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
44ed167d
PR
5544 ping_timeout_active = false;
5545 }
f36af18c 5546
bde89a9e 5547 buf = connection->meta.rbuf;
b411b363 5548 received = 0;
52b061a4 5549 expect = header_size;
b411b363
PR
5550 cmd = NULL;
5551 }
5552 }
5553
5554 if (0) {
5555reconnect:
bde89a9e
AG
5556 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5557 conn_md_sync(connection);
b411b363
PR
5558 }
5559 if (0) {
5560disconnect:
bde89a9e 5561 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363 5562 }
bde89a9e 5563 clear_bit(SIGNAL_ASENDER, &connection->flags);
b411b363 5564
1ec861eb 5565 drbd_info(connection, "asender terminated\n");
b411b363
PR
5566
5567 return 0;
5568}
This page took 0.718425 seconds and 5 git commands to generate.