drbd: get rid of drbd_queue_work_front
[deliverable/linux.git] / drivers / block / drbd / drbd_receiver.c
CommitLineData
b411b363
PR
1/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
b411b363
PR
26#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
b411b363
PR
31#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
b411b363
PR
39#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
b411b363
PR
44#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
a3603a6e 47#include "drbd_protocol.h"
b411b363 48#include "drbd_req.h"
b411b363
PR
49#include "drbd_vli.h"
50
20c68fde
LE
51#define PRO_FEATURES (FF_TRIM)
52
77351055
PR
53struct packet_info {
54 enum drbd_packet cmd;
e2857216
AG
55 unsigned int size;
56 unsigned int vnr;
e658983a 57 void *data;
77351055
PR
58};
59
b411b363
PR
60enum finish_epoch {
61 FE_STILL_LIVE,
62 FE_DESTROYED,
63 FE_RECYCLED,
64};
65
bde89a9e
AG
66static int drbd_do_features(struct drbd_connection *connection);
67static int drbd_do_auth(struct drbd_connection *connection);
69a22773 68static int drbd_disconnected(struct drbd_peer_device *);
a0fb3c47 69static void conn_wait_active_ee_empty(struct drbd_connection *connection);
bde89a9e 70static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
99920dc5 71static int e_end_block(struct drbd_work *, int);
b411b363 72
b411b363
PR
73
74#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
75
45bb912b
LE
76/*
77 * some helper functions to deal with single linked page lists,
78 * page->private being our "next" pointer.
79 */
80
81/* If at least n pages are linked at head, get n pages off.
82 * Otherwise, don't modify head, and return NULL.
83 * Locking is the responsibility of the caller.
84 */
85static struct page *page_chain_del(struct page **head, int n)
86{
87 struct page *page;
88 struct page *tmp;
89
90 BUG_ON(!n);
91 BUG_ON(!head);
92
93 page = *head;
23ce4227
PR
94
95 if (!page)
96 return NULL;
97
45bb912b
LE
98 while (page) {
99 tmp = page_chain_next(page);
100 if (--n == 0)
101 break; /* found sufficient pages */
102 if (tmp == NULL)
103 /* insufficient pages, don't use any of them. */
104 return NULL;
105 page = tmp;
106 }
107
108 /* add end of list marker for the returned list */
109 set_page_private(page, 0);
110 /* actual return value, and adjustment of head */
111 page = *head;
112 *head = tmp;
113 return page;
114}
115
116/* may be used outside of locks to find the tail of a (usually short)
117 * "private" page chain, before adding it back to a global chain head
118 * with page_chain_add() under a spinlock. */
119static struct page *page_chain_tail(struct page *page, int *len)
120{
121 struct page *tmp;
122 int i = 1;
123 while ((tmp = page_chain_next(page)))
124 ++i, page = tmp;
125 if (len)
126 *len = i;
127 return page;
128}
129
130static int page_chain_free(struct page *page)
131{
132 struct page *tmp;
133 int i = 0;
134 page_chain_for_each_safe(page, tmp) {
135 put_page(page);
136 ++i;
137 }
138 return i;
139}
140
141static void page_chain_add(struct page **head,
142 struct page *chain_first, struct page *chain_last)
143{
144#if 1
145 struct page *tmp;
146 tmp = page_chain_tail(chain_first, NULL);
147 BUG_ON(tmp != chain_last);
148#endif
149
150 /* add chain to head */
151 set_page_private(chain_last, (unsigned long)*head);
152 *head = chain_first;
153}
154
b30ab791 155static struct page *__drbd_alloc_pages(struct drbd_device *device,
18c2d522 156 unsigned int number)
b411b363
PR
157{
158 struct page *page = NULL;
45bb912b 159 struct page *tmp = NULL;
18c2d522 160 unsigned int i = 0;
b411b363
PR
161
162 /* Yes, testing drbd_pp_vacant outside the lock is racy.
163 * So what. It saves a spin_lock. */
45bb912b 164 if (drbd_pp_vacant >= number) {
b411b363 165 spin_lock(&drbd_pp_lock);
45bb912b
LE
166 page = page_chain_del(&drbd_pp_pool, number);
167 if (page)
168 drbd_pp_vacant -= number;
b411b363 169 spin_unlock(&drbd_pp_lock);
45bb912b
LE
170 if (page)
171 return page;
b411b363 172 }
45bb912b 173
b411b363
PR
174 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
175 * "criss-cross" setup, that might cause write-out on some other DRBD,
176 * which in turn might block on the other node at this very place. */
45bb912b
LE
177 for (i = 0; i < number; i++) {
178 tmp = alloc_page(GFP_TRY);
179 if (!tmp)
180 break;
181 set_page_private(tmp, (unsigned long)page);
182 page = tmp;
183 }
184
185 if (i == number)
186 return page;
187
188 /* Not enough pages immediately available this time.
c37c8ecf 189 * No need to jump around here, drbd_alloc_pages will retry this
45bb912b
LE
190 * function "soon". */
191 if (page) {
192 tmp = page_chain_tail(page, NULL);
193 spin_lock(&drbd_pp_lock);
194 page_chain_add(&drbd_pp_pool, page, tmp);
195 drbd_pp_vacant += i;
196 spin_unlock(&drbd_pp_lock);
197 }
198 return NULL;
b411b363
PR
199}
200
b30ab791 201static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
a990be46 202 struct list_head *to_be_freed)
b411b363 203{
a8cd15ba 204 struct drbd_peer_request *peer_req, *tmp;
b411b363
PR
205
206 /* The EEs are always appended to the end of the list. Since
207 they are sent in order over the wire, they have to finish
208 in order. As soon as we see the first not finished we can
209 stop to examine the list... */
210
a8cd15ba 211 list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
045417f7 212 if (drbd_peer_req_has_active_page(peer_req))
b411b363 213 break;
a8cd15ba 214 list_move(&peer_req->w.list, to_be_freed);
b411b363
PR
215 }
216}
217
b30ab791 218static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
b411b363
PR
219{
220 LIST_HEAD(reclaimed);
db830c46 221 struct drbd_peer_request *peer_req, *t;
b411b363 222
0500813f 223 spin_lock_irq(&device->resource->req_lock);
b30ab791 224 reclaim_finished_net_peer_reqs(device, &reclaimed);
0500813f 225 spin_unlock_irq(&device->resource->req_lock);
b411b363 226
a8cd15ba 227 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
b30ab791 228 drbd_free_net_peer_req(device, peer_req);
b411b363
PR
229}
230
231/**
c37c8ecf 232 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
b30ab791 233 * @device: DRBD device.
45bb912b
LE
234 * @number: number of pages requested
235 * @retry: whether to retry, if not enough pages are available right now
236 *
237 * Tries to allocate number pages, first from our own page pool, then from
0e49d7b0 238 * the kernel.
45bb912b 239 * Possibly retry until DRBD frees sufficient pages somewhere else.
b411b363 240 *
0e49d7b0
LE
241 * If this allocation would exceed the max_buffers setting, we throttle
242 * allocation (schedule_timeout) to give the system some room to breathe.
243 *
244 * We do not use max-buffers as hard limit, because it could lead to
245 * congestion and further to a distributed deadlock during online-verify or
246 * (checksum based) resync, if the max-buffers, socket buffer sizes and
247 * resync-rate settings are mis-configured.
248 *
45bb912b 249 * Returns a page chain linked via page->private.
b411b363 250 */
69a22773 251struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
c37c8ecf 252 bool retry)
b411b363 253{
69a22773 254 struct drbd_device *device = peer_device->device;
b411b363 255 struct page *page = NULL;
44ed167d 256 struct net_conf *nc;
b411b363 257 DEFINE_WAIT(wait);
0e49d7b0 258 unsigned int mxb;
b411b363 259
44ed167d 260 rcu_read_lock();
69a22773 261 nc = rcu_dereference(peer_device->connection->net_conf);
44ed167d
PR
262 mxb = nc ? nc->max_buffers : 1000000;
263 rcu_read_unlock();
264
b30ab791
AG
265 if (atomic_read(&device->pp_in_use) < mxb)
266 page = __drbd_alloc_pages(device, number);
b411b363 267
45bb912b 268 while (page == NULL) {
b411b363
PR
269 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
270
b30ab791 271 drbd_kick_lo_and_reclaim_net(device);
b411b363 272
b30ab791
AG
273 if (atomic_read(&device->pp_in_use) < mxb) {
274 page = __drbd_alloc_pages(device, number);
b411b363
PR
275 if (page)
276 break;
277 }
278
279 if (!retry)
280 break;
281
282 if (signal_pending(current)) {
d0180171 283 drbd_warn(device, "drbd_alloc_pages interrupted!\n");
b411b363
PR
284 break;
285 }
286
0e49d7b0
LE
287 if (schedule_timeout(HZ/10) == 0)
288 mxb = UINT_MAX;
b411b363
PR
289 }
290 finish_wait(&drbd_pp_wait, &wait);
291
45bb912b 292 if (page)
b30ab791 293 atomic_add(number, &device->pp_in_use);
b411b363
PR
294 return page;
295}
296
c37c8ecf 297/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
0500813f 298 * Is also used from inside an other spin_lock_irq(&resource->req_lock);
45bb912b
LE
299 * Either links the page chain back to the global pool,
300 * or returns all pages to the system. */
b30ab791 301static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
b411b363 302{
b30ab791 303 atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
b411b363 304 int i;
435f0740 305
a73ff323
LE
306 if (page == NULL)
307 return;
308
81a5d60e 309 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
45bb912b
LE
310 i = page_chain_free(page);
311 else {
312 struct page *tmp;
313 tmp = page_chain_tail(page, &i);
314 spin_lock(&drbd_pp_lock);
315 page_chain_add(&drbd_pp_pool, page, tmp);
316 drbd_pp_vacant += i;
317 spin_unlock(&drbd_pp_lock);
b411b363 318 }
435f0740 319 i = atomic_sub_return(i, a);
45bb912b 320 if (i < 0)
d0180171 321 drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
435f0740 322 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
b411b363
PR
323 wake_up(&drbd_pp_wait);
324}
325
326/*
327You need to hold the req_lock:
328 _drbd_wait_ee_list_empty()
329
330You must not have the req_lock:
3967deb1 331 drbd_free_peer_req()
0db55363 332 drbd_alloc_peer_req()
7721f567 333 drbd_free_peer_reqs()
b411b363 334 drbd_ee_fix_bhs()
a990be46 335 drbd_finish_peer_reqs()
b411b363
PR
336 drbd_clear_done_ee()
337 drbd_wait_ee_list_empty()
338*/
339
f6ffca9f 340struct drbd_peer_request *
69a22773 341drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
a0fb3c47 342 unsigned int data_size, bool has_payload, gfp_t gfp_mask) __must_hold(local)
b411b363 343{
69a22773 344 struct drbd_device *device = peer_device->device;
db830c46 345 struct drbd_peer_request *peer_req;
a73ff323 346 struct page *page = NULL;
45bb912b 347 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
b411b363 348
b30ab791 349 if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
b411b363
PR
350 return NULL;
351
db830c46
AG
352 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
353 if (!peer_req) {
b411b363 354 if (!(gfp_mask & __GFP_NOWARN))
d0180171 355 drbd_err(device, "%s: allocation failed\n", __func__);
b411b363
PR
356 return NULL;
357 }
358
a0fb3c47 359 if (has_payload && data_size) {
69a22773 360 page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT));
a73ff323
LE
361 if (!page)
362 goto fail;
363 }
b411b363 364
db830c46
AG
365 drbd_clear_interval(&peer_req->i);
366 peer_req->i.size = data_size;
367 peer_req->i.sector = sector;
368 peer_req->i.local = false;
369 peer_req->i.waiting = false;
370
371 peer_req->epoch = NULL;
a8cd15ba 372 peer_req->peer_device = peer_device;
db830c46
AG
373 peer_req->pages = page;
374 atomic_set(&peer_req->pending_bios, 0);
375 peer_req->flags = 0;
9a8e7753
AG
376 /*
377 * The block_id is opaque to the receiver. It is not endianness
378 * converted, and sent back to the sender unchanged.
379 */
db830c46 380 peer_req->block_id = id;
b411b363 381
db830c46 382 return peer_req;
b411b363 383
45bb912b 384 fail:
db830c46 385 mempool_free(peer_req, drbd_ee_mempool);
b411b363
PR
386 return NULL;
387}
388
b30ab791 389void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
f6ffca9f 390 int is_net)
b411b363 391{
db830c46
AG
392 if (peer_req->flags & EE_HAS_DIGEST)
393 kfree(peer_req->digest);
b30ab791 394 drbd_free_pages(device, peer_req->pages, is_net);
0b0ba1ef
AG
395 D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
396 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
db830c46 397 mempool_free(peer_req, drbd_ee_mempool);
b411b363
PR
398}
399
b30ab791 400int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
b411b363
PR
401{
402 LIST_HEAD(work_list);
db830c46 403 struct drbd_peer_request *peer_req, *t;
b411b363 404 int count = 0;
b30ab791 405 int is_net = list == &device->net_ee;
b411b363 406
0500813f 407 spin_lock_irq(&device->resource->req_lock);
b411b363 408 list_splice_init(list, &work_list);
0500813f 409 spin_unlock_irq(&device->resource->req_lock);
b411b363 410
a8cd15ba 411 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
b30ab791 412 __drbd_free_peer_req(device, peer_req, is_net);
b411b363
PR
413 count++;
414 }
415 return count;
416}
417
b411b363 418/*
a990be46 419 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
b411b363 420 */
b30ab791 421static int drbd_finish_peer_reqs(struct drbd_device *device)
b411b363
PR
422{
423 LIST_HEAD(work_list);
424 LIST_HEAD(reclaimed);
db830c46 425 struct drbd_peer_request *peer_req, *t;
e2b3032b 426 int err = 0;
b411b363 427
0500813f 428 spin_lock_irq(&device->resource->req_lock);
b30ab791
AG
429 reclaim_finished_net_peer_reqs(device, &reclaimed);
430 list_splice_init(&device->done_ee, &work_list);
0500813f 431 spin_unlock_irq(&device->resource->req_lock);
b411b363 432
a8cd15ba 433 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
b30ab791 434 drbd_free_net_peer_req(device, peer_req);
b411b363
PR
435
436 /* possible callbacks here:
d4dabbe2 437 * e_end_block, and e_end_resync_block, e_send_superseded.
b411b363
PR
438 * all ignore the last argument.
439 */
a8cd15ba 440 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
e2b3032b
AG
441 int err2;
442
b411b363 443 /* list_del not necessary, next/prev members not touched */
a8cd15ba 444 err2 = peer_req->w.cb(&peer_req->w, !!err);
e2b3032b
AG
445 if (!err)
446 err = err2;
b30ab791 447 drbd_free_peer_req(device, peer_req);
b411b363 448 }
b30ab791 449 wake_up(&device->ee_wait);
b411b363 450
e2b3032b 451 return err;
b411b363
PR
452}
453
b30ab791 454static void _drbd_wait_ee_list_empty(struct drbd_device *device,
d4da1537 455 struct list_head *head)
b411b363
PR
456{
457 DEFINE_WAIT(wait);
458
459 /* avoids spin_lock/unlock
460 * and calling prepare_to_wait in the fast path */
461 while (!list_empty(head)) {
b30ab791 462 prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
0500813f 463 spin_unlock_irq(&device->resource->req_lock);
7eaceacc 464 io_schedule();
b30ab791 465 finish_wait(&device->ee_wait, &wait);
0500813f 466 spin_lock_irq(&device->resource->req_lock);
b411b363
PR
467 }
468}
469
b30ab791 470static void drbd_wait_ee_list_empty(struct drbd_device *device,
d4da1537 471 struct list_head *head)
b411b363 472{
0500813f 473 spin_lock_irq(&device->resource->req_lock);
b30ab791 474 _drbd_wait_ee_list_empty(device, head);
0500813f 475 spin_unlock_irq(&device->resource->req_lock);
b411b363
PR
476}
477
dbd9eea0 478static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
b411b363 479{
b411b363
PR
480 struct kvec iov = {
481 .iov_base = buf,
482 .iov_len = size,
483 };
484 struct msghdr msg = {
b411b363
PR
485 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
486 };
f730c848 487 return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags);
b411b363
PR
488}
489
bde89a9e 490static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
b411b363 491{
b411b363
PR
492 int rv;
493
bde89a9e 494 rv = drbd_recv_short(connection->data.socket, buf, size, 0);
b411b363 495
dbd0820c
PR
496 if (rv < 0) {
497 if (rv == -ECONNRESET)
1ec861eb 498 drbd_info(connection, "sock was reset by peer\n");
dbd0820c 499 else if (rv != -ERESTARTSYS)
1ec861eb 500 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
dbd0820c 501 } else if (rv == 0) {
bde89a9e 502 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
b66623e3
PR
503 long t;
504 rcu_read_lock();
bde89a9e 505 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
b66623e3
PR
506 rcu_read_unlock();
507
bde89a9e 508 t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
b66623e3 509
599377ac
PR
510 if (t)
511 goto out;
512 }
1ec861eb 513 drbd_info(connection, "sock was shut down by peer\n");
599377ac
PR
514 }
515
b411b363 516 if (rv != size)
bde89a9e 517 conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
b411b363 518
599377ac 519out:
b411b363
PR
520 return rv;
521}
522
bde89a9e 523static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
c6967746
AG
524{
525 int err;
526
bde89a9e 527 err = drbd_recv(connection, buf, size);
c6967746
AG
528 if (err != size) {
529 if (err >= 0)
530 err = -EIO;
531 } else
532 err = 0;
533 return err;
534}
535
bde89a9e 536static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
a5c31904
AG
537{
538 int err;
539
bde89a9e 540 err = drbd_recv_all(connection, buf, size);
a5c31904 541 if (err && !signal_pending(current))
1ec861eb 542 drbd_warn(connection, "short read (expected size %d)\n", (int)size);
a5c31904
AG
543 return err;
544}
545
5dbf1673
LE
546/* quoting tcp(7):
547 * On individual connections, the socket buffer size must be set prior to the
548 * listen(2) or connect(2) calls in order to have it take effect.
549 * This is our wrapper to do so.
550 */
551static void drbd_setbufsize(struct socket *sock, unsigned int snd,
552 unsigned int rcv)
553{
554 /* open coded SO_SNDBUF, SO_RCVBUF */
555 if (snd) {
556 sock->sk->sk_sndbuf = snd;
557 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
558 }
559 if (rcv) {
560 sock->sk->sk_rcvbuf = rcv;
561 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
562 }
563}
564
bde89a9e 565static struct socket *drbd_try_connect(struct drbd_connection *connection)
b411b363
PR
566{
567 const char *what;
568 struct socket *sock;
569 struct sockaddr_in6 src_in6;
44ed167d
PR
570 struct sockaddr_in6 peer_in6;
571 struct net_conf *nc;
572 int err, peer_addr_len, my_addr_len;
69ef82de 573 int sndbuf_size, rcvbuf_size, connect_int;
b411b363
PR
574 int disconnect_on_error = 1;
575
44ed167d 576 rcu_read_lock();
bde89a9e 577 nc = rcu_dereference(connection->net_conf);
44ed167d
PR
578 if (!nc) {
579 rcu_read_unlock();
b411b363 580 return NULL;
44ed167d 581 }
44ed167d
PR
582 sndbuf_size = nc->sndbuf_size;
583 rcvbuf_size = nc->rcvbuf_size;
69ef82de 584 connect_int = nc->connect_int;
089c075d 585 rcu_read_unlock();
44ed167d 586
bde89a9e
AG
587 my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
588 memcpy(&src_in6, &connection->my_addr, my_addr_len);
44ed167d 589
bde89a9e 590 if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
44ed167d
PR
591 src_in6.sin6_port = 0;
592 else
593 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
594
bde89a9e
AG
595 peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
596 memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
b411b363
PR
597
598 what = "sock_create_kern";
44ed167d
PR
599 err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
600 SOCK_STREAM, IPPROTO_TCP, &sock);
b411b363
PR
601 if (err < 0) {
602 sock = NULL;
603 goto out;
604 }
605
606 sock->sk->sk_rcvtimeo =
69ef82de 607 sock->sk->sk_sndtimeo = connect_int * HZ;
44ed167d 608 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
b411b363
PR
609
610 /* explicitly bind to the configured IP as source IP
611 * for the outgoing connections.
612 * This is needed for multihomed hosts and to be
613 * able to use lo: interfaces for drbd.
614 * Make sure to use 0 as port number, so linux selects
615 * a free one dynamically.
616 */
b411b363 617 what = "bind before connect";
44ed167d 618 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
b411b363
PR
619 if (err < 0)
620 goto out;
621
622 /* connect may fail, peer not yet available.
623 * stay C_WF_CONNECTION, don't go Disconnecting! */
624 disconnect_on_error = 0;
625 what = "connect";
44ed167d 626 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
b411b363
PR
627
628out:
629 if (err < 0) {
630 if (sock) {
631 sock_release(sock);
632 sock = NULL;
633 }
634 switch (-err) {
635 /* timeout, busy, signal pending */
636 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
637 case EINTR: case ERESTARTSYS:
638 /* peer not (yet) available, network problem */
639 case ECONNREFUSED: case ENETUNREACH:
640 case EHOSTDOWN: case EHOSTUNREACH:
641 disconnect_on_error = 0;
642 break;
643 default:
1ec861eb 644 drbd_err(connection, "%s failed, err = %d\n", what, err);
b411b363
PR
645 }
646 if (disconnect_on_error)
bde89a9e 647 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363 648 }
44ed167d 649
b411b363
PR
650 return sock;
651}
652
7a426fd8 653struct accept_wait_data {
bde89a9e 654 struct drbd_connection *connection;
7a426fd8
PR
655 struct socket *s_listen;
656 struct completion door_bell;
657 void (*original_sk_state_change)(struct sock *sk);
658
659};
660
715306f6 661static void drbd_incoming_connection(struct sock *sk)
7a426fd8
PR
662{
663 struct accept_wait_data *ad = sk->sk_user_data;
715306f6 664 void (*state_change)(struct sock *sk);
7a426fd8 665
715306f6
AG
666 state_change = ad->original_sk_state_change;
667 if (sk->sk_state == TCP_ESTABLISHED)
668 complete(&ad->door_bell);
669 state_change(sk);
7a426fd8
PR
670}
671
bde89a9e 672static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
b411b363 673{
1f3e509b 674 int err, sndbuf_size, rcvbuf_size, my_addr_len;
44ed167d 675 struct sockaddr_in6 my_addr;
1f3e509b 676 struct socket *s_listen;
44ed167d 677 struct net_conf *nc;
b411b363
PR
678 const char *what;
679
44ed167d 680 rcu_read_lock();
bde89a9e 681 nc = rcu_dereference(connection->net_conf);
44ed167d
PR
682 if (!nc) {
683 rcu_read_unlock();
7a426fd8 684 return -EIO;
44ed167d 685 }
44ed167d
PR
686 sndbuf_size = nc->sndbuf_size;
687 rcvbuf_size = nc->rcvbuf_size;
44ed167d 688 rcu_read_unlock();
b411b363 689
bde89a9e
AG
690 my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
691 memcpy(&my_addr, &connection->my_addr, my_addr_len);
b411b363
PR
692
693 what = "sock_create_kern";
44ed167d 694 err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
1f3e509b 695 SOCK_STREAM, IPPROTO_TCP, &s_listen);
b411b363
PR
696 if (err) {
697 s_listen = NULL;
698 goto out;
699 }
700
98683650 701 s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
44ed167d 702 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
b411b363
PR
703
704 what = "bind before listen";
44ed167d 705 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
b411b363
PR
706 if (err < 0)
707 goto out;
708
7a426fd8
PR
709 ad->s_listen = s_listen;
710 write_lock_bh(&s_listen->sk->sk_callback_lock);
711 ad->original_sk_state_change = s_listen->sk->sk_state_change;
715306f6 712 s_listen->sk->sk_state_change = drbd_incoming_connection;
7a426fd8
PR
713 s_listen->sk->sk_user_data = ad;
714 write_unlock_bh(&s_listen->sk->sk_callback_lock);
b411b363 715
2820fd39
PR
716 what = "listen";
717 err = s_listen->ops->listen(s_listen, 5);
718 if (err < 0)
719 goto out;
720
7a426fd8 721 return 0;
b411b363
PR
722out:
723 if (s_listen)
724 sock_release(s_listen);
725 if (err < 0) {
726 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
1ec861eb 727 drbd_err(connection, "%s failed, err = %d\n", what, err);
bde89a9e 728 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363
PR
729 }
730 }
b411b363 731
7a426fd8 732 return -EIO;
b411b363
PR
733}
734
715306f6 735static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
b411b363 736{
715306f6
AG
737 write_lock_bh(&sk->sk_callback_lock);
738 sk->sk_state_change = ad->original_sk_state_change;
739 sk->sk_user_data = NULL;
740 write_unlock_bh(&sk->sk_callback_lock);
b411b363
PR
741}
742
bde89a9e 743static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
b411b363 744{
1f3e509b
PR
745 int timeo, connect_int, err = 0;
746 struct socket *s_estab = NULL;
1f3e509b
PR
747 struct net_conf *nc;
748
749 rcu_read_lock();
bde89a9e 750 nc = rcu_dereference(connection->net_conf);
1f3e509b
PR
751 if (!nc) {
752 rcu_read_unlock();
753 return NULL;
754 }
755 connect_int = nc->connect_int;
756 rcu_read_unlock();
757
758 timeo = connect_int * HZ;
38b682b2
AM
759 /* 28.5% random jitter */
760 timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
1f3e509b 761
7a426fd8
PR
762 err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
763 if (err <= 0)
764 return NULL;
b411b363 765
7a426fd8 766 err = kernel_accept(ad->s_listen, &s_estab, 0);
b411b363
PR
767 if (err < 0) {
768 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
1ec861eb 769 drbd_err(connection, "accept failed, err = %d\n", err);
bde89a9e 770 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363
PR
771 }
772 }
b411b363 773
715306f6
AG
774 if (s_estab)
775 unregister_state_change(s_estab->sk, ad);
b411b363 776
b411b363
PR
777 return s_estab;
778}
b411b363 779
bde89a9e 780static int decode_header(struct drbd_connection *, void *, struct packet_info *);
b411b363 781
bde89a9e 782static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
9f5bdc33
AG
783 enum drbd_packet cmd)
784{
bde89a9e 785 if (!conn_prepare_command(connection, sock))
9f5bdc33 786 return -EIO;
bde89a9e 787 return conn_send_command(connection, sock, cmd, 0, NULL, 0);
b411b363
PR
788}
789
bde89a9e 790static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
b411b363 791{
bde89a9e 792 unsigned int header_size = drbd_header_size(connection);
9f5bdc33
AG
793 struct packet_info pi;
794 int err;
b411b363 795
bde89a9e 796 err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
9f5bdc33
AG
797 if (err != header_size) {
798 if (err >= 0)
799 err = -EIO;
800 return err;
801 }
bde89a9e 802 err = decode_header(connection, connection->data.rbuf, &pi);
9f5bdc33
AG
803 if (err)
804 return err;
805 return pi.cmd;
b411b363
PR
806}
807
808/**
809 * drbd_socket_okay() - Free the socket if its connection is not okay
b411b363
PR
810 * @sock: pointer to the pointer to the socket.
811 */
dbd9eea0 812static int drbd_socket_okay(struct socket **sock)
b411b363
PR
813{
814 int rr;
815 char tb[4];
816
817 if (!*sock)
81e84650 818 return false;
b411b363 819
dbd9eea0 820 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
b411b363
PR
821
822 if (rr > 0 || rr == -EAGAIN) {
81e84650 823 return true;
b411b363
PR
824 } else {
825 sock_release(*sock);
826 *sock = NULL;
81e84650 827 return false;
b411b363
PR
828 }
829}
2325eb66
PR
830/* Gets called if a connection is established, or if a new minor gets created
831 in a connection */
69a22773 832int drbd_connected(struct drbd_peer_device *peer_device)
907599e0 833{
69a22773 834 struct drbd_device *device = peer_device->device;
0829f5ed 835 int err;
907599e0 836
b30ab791
AG
837 atomic_set(&device->packet_seq, 0);
838 device->peer_seq = 0;
907599e0 839
69a22773
AG
840 device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
841 &peer_device->connection->cstate_mutex :
b30ab791 842 &device->own_state_mutex;
8410da8f 843
69a22773 844 err = drbd_send_sync_param(peer_device);
0829f5ed 845 if (!err)
69a22773 846 err = drbd_send_sizes(peer_device, 0, 0);
0829f5ed 847 if (!err)
69a22773 848 err = drbd_send_uuids(peer_device);
0829f5ed 849 if (!err)
69a22773 850 err = drbd_send_current_state(peer_device);
b30ab791
AG
851 clear_bit(USE_DEGR_WFC_T, &device->flags);
852 clear_bit(RESIZE_PENDING, &device->flags);
853 atomic_set(&device->ap_in_flight, 0);
854 mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
0829f5ed 855 return err;
907599e0 856}
b411b363
PR
857
858/*
859 * return values:
860 * 1 yes, we have a valid connection
861 * 0 oops, did not work out, please try again
862 * -1 peer talks different language,
863 * no point in trying again, please go standalone.
864 * -2 We do not have a network config...
865 */
bde89a9e 866static int conn_connect(struct drbd_connection *connection)
b411b363 867{
7da35862 868 struct drbd_socket sock, msock;
c06ece6b 869 struct drbd_peer_device *peer_device;
44ed167d 870 struct net_conf *nc;
92f14951 871 int vnr, timeout, h, ok;
08b165ba 872 bool discard_my_data;
197296ff 873 enum drbd_state_rv rv;
7a426fd8 874 struct accept_wait_data ad = {
bde89a9e 875 .connection = connection,
7a426fd8
PR
876 .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
877 };
b411b363 878
bde89a9e
AG
879 clear_bit(DISCONNECT_SENT, &connection->flags);
880 if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
b411b363
PR
881 return -2;
882
7da35862 883 mutex_init(&sock.mutex);
bde89a9e
AG
884 sock.sbuf = connection->data.sbuf;
885 sock.rbuf = connection->data.rbuf;
7da35862
PR
886 sock.socket = NULL;
887 mutex_init(&msock.mutex);
bde89a9e
AG
888 msock.sbuf = connection->meta.sbuf;
889 msock.rbuf = connection->meta.rbuf;
7da35862
PR
890 msock.socket = NULL;
891
0916e0e3 892 /* Assume that the peer only understands protocol 80 until we know better. */
bde89a9e 893 connection->agreed_pro_version = 80;
b411b363 894
bde89a9e 895 if (prepare_listen_socket(connection, &ad))
7a426fd8 896 return 0;
b411b363
PR
897
898 do {
2bf89621 899 struct socket *s;
b411b363 900
bde89a9e 901 s = drbd_try_connect(connection);
b411b363 902 if (s) {
7da35862
PR
903 if (!sock.socket) {
904 sock.socket = s;
bde89a9e 905 send_first_packet(connection, &sock, P_INITIAL_DATA);
7da35862 906 } else if (!msock.socket) {
bde89a9e 907 clear_bit(RESOLVE_CONFLICTS, &connection->flags);
7da35862 908 msock.socket = s;
bde89a9e 909 send_first_packet(connection, &msock, P_INITIAL_META);
b411b363 910 } else {
1ec861eb 911 drbd_err(connection, "Logic error in conn_connect()\n");
b411b363
PR
912 goto out_release_sockets;
913 }
914 }
915
7da35862
PR
916 if (sock.socket && msock.socket) {
917 rcu_read_lock();
bde89a9e 918 nc = rcu_dereference(connection->net_conf);
7da35862
PR
919 timeout = nc->ping_timeo * HZ / 10;
920 rcu_read_unlock();
921 schedule_timeout_interruptible(timeout);
922 ok = drbd_socket_okay(&sock.socket);
923 ok = drbd_socket_okay(&msock.socket) && ok;
b411b363
PR
924 if (ok)
925 break;
926 }
927
928retry:
bde89a9e 929 s = drbd_wait_for_connect(connection, &ad);
b411b363 930 if (s) {
bde89a9e 931 int fp = receive_first_packet(connection, s);
7da35862
PR
932 drbd_socket_okay(&sock.socket);
933 drbd_socket_okay(&msock.socket);
92f14951 934 switch (fp) {
e5d6f33a 935 case P_INITIAL_DATA:
7da35862 936 if (sock.socket) {
1ec861eb 937 drbd_warn(connection, "initial packet S crossed\n");
7da35862 938 sock_release(sock.socket);
80c6eed4
PR
939 sock.socket = s;
940 goto randomize;
b411b363 941 }
7da35862 942 sock.socket = s;
b411b363 943 break;
e5d6f33a 944 case P_INITIAL_META:
bde89a9e 945 set_bit(RESOLVE_CONFLICTS, &connection->flags);
7da35862 946 if (msock.socket) {
1ec861eb 947 drbd_warn(connection, "initial packet M crossed\n");
7da35862 948 sock_release(msock.socket);
80c6eed4
PR
949 msock.socket = s;
950 goto randomize;
b411b363 951 }
7da35862 952 msock.socket = s;
b411b363
PR
953 break;
954 default:
1ec861eb 955 drbd_warn(connection, "Error receiving initial packet\n");
b411b363 956 sock_release(s);
80c6eed4 957randomize:
38b682b2 958 if (prandom_u32() & 1)
b411b363
PR
959 goto retry;
960 }
961 }
962
bde89a9e 963 if (connection->cstate <= C_DISCONNECTING)
b411b363
PR
964 goto out_release_sockets;
965 if (signal_pending(current)) {
966 flush_signals(current);
967 smp_rmb();
bde89a9e 968 if (get_t_state(&connection->receiver) == EXITING)
b411b363
PR
969 goto out_release_sockets;
970 }
971
b666dbf8
PR
972 ok = drbd_socket_okay(&sock.socket);
973 ok = drbd_socket_okay(&msock.socket) && ok;
974 } while (!ok);
b411b363 975
7a426fd8
PR
976 if (ad.s_listen)
977 sock_release(ad.s_listen);
b411b363 978
98683650
PR
979 sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
980 msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
b411b363 981
7da35862
PR
982 sock.socket->sk->sk_allocation = GFP_NOIO;
983 msock.socket->sk->sk_allocation = GFP_NOIO;
b411b363 984
7da35862
PR
985 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
986 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
b411b363 987
b411b363 988 /* NOT YET ...
bde89a9e 989 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
7da35862 990 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
6038178e 991 * first set it to the P_CONNECTION_FEATURES timeout,
b411b363 992 * which we set to 4x the configured ping_timeout. */
44ed167d 993 rcu_read_lock();
bde89a9e 994 nc = rcu_dereference(connection->net_conf);
44ed167d 995
7da35862
PR
996 sock.socket->sk->sk_sndtimeo =
997 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
b411b363 998
7da35862 999 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
44ed167d 1000 timeout = nc->timeout * HZ / 10;
08b165ba 1001 discard_my_data = nc->discard_my_data;
44ed167d 1002 rcu_read_unlock();
b411b363 1003
7da35862 1004 msock.socket->sk->sk_sndtimeo = timeout;
b411b363
PR
1005
1006 /* we don't want delays.
25985edc 1007 * we use TCP_CORK where appropriate, though */
7da35862
PR
1008 drbd_tcp_nodelay(sock.socket);
1009 drbd_tcp_nodelay(msock.socket);
b411b363 1010
bde89a9e
AG
1011 connection->data.socket = sock.socket;
1012 connection->meta.socket = msock.socket;
1013 connection->last_received = jiffies;
b411b363 1014
bde89a9e 1015 h = drbd_do_features(connection);
b411b363
PR
1016 if (h <= 0)
1017 return h;
1018
bde89a9e 1019 if (connection->cram_hmac_tfm) {
b30ab791 1020 /* drbd_request_state(device, NS(conn, WFAuth)); */
bde89a9e 1021 switch (drbd_do_auth(connection)) {
b10d96cb 1022 case -1:
1ec861eb 1023 drbd_err(connection, "Authentication of peer failed\n");
b411b363 1024 return -1;
b10d96cb 1025 case 0:
1ec861eb 1026 drbd_err(connection, "Authentication of peer failed, trying again.\n");
b10d96cb 1027 return 0;
b411b363
PR
1028 }
1029 }
1030
bde89a9e
AG
1031 connection->data.socket->sk->sk_sndtimeo = timeout;
1032 connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
b411b363 1033
bde89a9e 1034 if (drbd_send_protocol(connection) == -EOPNOTSUPP)
7e2455c1 1035 return -1;
b411b363 1036
31007745
PR
1037 /* Prevent a race between resync-handshake and
1038 * being promoted to Primary.
1039 *
1040 * Grab and release the state mutex, so we know that any current
1041 * drbd_set_role() is finished, and any incoming drbd_set_role
1042 * will see the STATE_SENT flag, and wait for it to be cleared.
1043 */
1044 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1045 mutex_lock(peer_device->device->state_mutex);
1046
bde89a9e 1047 set_bit(STATE_SENT, &connection->flags);
a1096a6e 1048
31007745
PR
1049 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1050 mutex_unlock(peer_device->device->state_mutex);
1051
c141ebda 1052 rcu_read_lock();
c06ece6b
AG
1053 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1054 struct drbd_device *device = peer_device->device;
b30ab791 1055 kref_get(&device->kref);
26ea8f92
AG
1056 rcu_read_unlock();
1057
08b165ba 1058 if (discard_my_data)
b30ab791 1059 set_bit(DISCARD_MY_DATA, &device->flags);
08b165ba 1060 else
b30ab791 1061 clear_bit(DISCARD_MY_DATA, &device->flags);
08b165ba 1062
69a22773 1063 drbd_connected(peer_device);
05a10ec7 1064 kref_put(&device->kref, drbd_destroy_device);
c141ebda
PR
1065 rcu_read_lock();
1066 }
1067 rcu_read_unlock();
1068
bde89a9e
AG
1069 rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1070 if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1071 clear_bit(STATE_SENT, &connection->flags);
1e86ac48 1072 return 0;
a1096a6e 1073 }
1e86ac48 1074
bde89a9e 1075 drbd_thread_start(&connection->asender);
b411b363 1076
0500813f 1077 mutex_lock(&connection->resource->conf_update);
08b165ba
PR
1078 /* The discard_my_data flag is a single-shot modifier to the next
1079 * connection attempt, the handshake of which is now well underway.
1080 * No need for rcu style copying of the whole struct
1081 * just to clear a single value. */
bde89a9e 1082 connection->net_conf->discard_my_data = 0;
0500813f 1083 mutex_unlock(&connection->resource->conf_update);
08b165ba 1084
d3fcb490 1085 return h;
b411b363
PR
1086
1087out_release_sockets:
7a426fd8
PR
1088 if (ad.s_listen)
1089 sock_release(ad.s_listen);
7da35862
PR
1090 if (sock.socket)
1091 sock_release(sock.socket);
1092 if (msock.socket)
1093 sock_release(msock.socket);
b411b363
PR
1094 return -1;
1095}
1096
bde89a9e 1097static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
b411b363 1098{
bde89a9e 1099 unsigned int header_size = drbd_header_size(connection);
e658983a 1100
0c8e36d9
AG
1101 if (header_size == sizeof(struct p_header100) &&
1102 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1103 struct p_header100 *h = header;
1104 if (h->pad != 0) {
1ec861eb 1105 drbd_err(connection, "Header padding is not zero\n");
0c8e36d9
AG
1106 return -EINVAL;
1107 }
1108 pi->vnr = be16_to_cpu(h->volume);
1109 pi->cmd = be16_to_cpu(h->command);
1110 pi->size = be32_to_cpu(h->length);
1111 } else if (header_size == sizeof(struct p_header95) &&
1112 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
e658983a 1113 struct p_header95 *h = header;
e658983a 1114 pi->cmd = be16_to_cpu(h->command);
b55d84ba
AG
1115 pi->size = be32_to_cpu(h->length);
1116 pi->vnr = 0;
e658983a
AG
1117 } else if (header_size == sizeof(struct p_header80) &&
1118 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1119 struct p_header80 *h = header;
1120 pi->cmd = be16_to_cpu(h->command);
1121 pi->size = be16_to_cpu(h->length);
77351055 1122 pi->vnr = 0;
02918be2 1123 } else {
1ec861eb 1124 drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
e658983a 1125 be32_to_cpu(*(__be32 *)header),
bde89a9e 1126 connection->agreed_pro_version);
8172f3e9 1127 return -EINVAL;
b411b363 1128 }
e658983a 1129 pi->data = header + header_size;
8172f3e9 1130 return 0;
257d0af6 1131}
b411b363 1132
bde89a9e 1133static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
257d0af6 1134{
bde89a9e 1135 void *buffer = connection->data.rbuf;
69bc7bc3 1136 int err;
257d0af6 1137
bde89a9e 1138 err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
a5c31904 1139 if (err)
69bc7bc3 1140 return err;
257d0af6 1141
bde89a9e
AG
1142 err = decode_header(connection, buffer, pi);
1143 connection->last_received = jiffies;
b411b363 1144
69bc7bc3 1145 return err;
b411b363
PR
1146}
1147
bde89a9e 1148static void drbd_flush(struct drbd_connection *connection)
b411b363
PR
1149{
1150 int rv;
c06ece6b 1151 struct drbd_peer_device *peer_device;
4b0007c0
PR
1152 int vnr;
1153
e9526580 1154 if (connection->resource->write_ordering >= WO_bdev_flush) {
615e087f 1155 rcu_read_lock();
c06ece6b
AG
1156 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1157 struct drbd_device *device = peer_device->device;
1158
b30ab791 1159 if (!get_ldev(device))
615e087f 1160 continue;
b30ab791 1161 kref_get(&device->kref);
615e087f
LE
1162 rcu_read_unlock();
1163
b30ab791 1164 rv = blkdev_issue_flush(device->ldev->backing_bdev,
615e087f
LE
1165 GFP_NOIO, NULL);
1166 if (rv) {
d0180171 1167 drbd_info(device, "local disk flush failed with status %d\n", rv);
615e087f
LE
1168 /* would rather check on EOPNOTSUPP, but that is not reliable.
1169 * don't try again for ANY return value != 0
1170 * if (rv == -EOPNOTSUPP) */
8fe39aac 1171 drbd_bump_write_ordering(connection->resource, NULL, WO_drain_io);
4b0007c0 1172 }
b30ab791 1173 put_ldev(device);
05a10ec7 1174 kref_put(&device->kref, drbd_destroy_device);
b411b363 1175
615e087f
LE
1176 rcu_read_lock();
1177 if (rv)
1178 break;
b411b363 1179 }
615e087f 1180 rcu_read_unlock();
b411b363 1181 }
b411b363
PR
1182}
1183
1184/**
1185 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
b30ab791 1186 * @device: DRBD device.
b411b363
PR
1187 * @epoch: Epoch object.
1188 * @ev: Epoch event.
1189 */
bde89a9e 1190static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
b411b363
PR
1191 struct drbd_epoch *epoch,
1192 enum epoch_event ev)
1193{
2451fc3b 1194 int epoch_size;
b411b363 1195 struct drbd_epoch *next_epoch;
b411b363
PR
1196 enum finish_epoch rv = FE_STILL_LIVE;
1197
bde89a9e 1198 spin_lock(&connection->epoch_lock);
b411b363
PR
1199 do {
1200 next_epoch = NULL;
b411b363
PR
1201
1202 epoch_size = atomic_read(&epoch->epoch_size);
1203
1204 switch (ev & ~EV_CLEANUP) {
1205 case EV_PUT:
1206 atomic_dec(&epoch->active);
1207 break;
1208 case EV_GOT_BARRIER_NR:
1209 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
b411b363
PR
1210 break;
1211 case EV_BECAME_LAST:
1212 /* nothing to do*/
1213 break;
1214 }
1215
b411b363
PR
1216 if (epoch_size != 0 &&
1217 atomic_read(&epoch->active) == 0 &&
80f9fd55 1218 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
b411b363 1219 if (!(ev & EV_CLEANUP)) {
bde89a9e
AG
1220 spin_unlock(&connection->epoch_lock);
1221 drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1222 spin_lock(&connection->epoch_lock);
b411b363 1223 }
9ed57dcb
LE
1224#if 0
1225 /* FIXME: dec unacked on connection, once we have
1226 * something to count pending connection packets in. */
80f9fd55 1227 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
bde89a9e 1228 dec_unacked(epoch->connection);
9ed57dcb 1229#endif
b411b363 1230
bde89a9e 1231 if (connection->current_epoch != epoch) {
b411b363
PR
1232 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1233 list_del(&epoch->list);
1234 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
bde89a9e 1235 connection->epochs--;
b411b363
PR
1236 kfree(epoch);
1237
1238 if (rv == FE_STILL_LIVE)
1239 rv = FE_DESTROYED;
1240 } else {
1241 epoch->flags = 0;
1242 atomic_set(&epoch->epoch_size, 0);
698f9315 1243 /* atomic_set(&epoch->active, 0); is already zero */
b411b363
PR
1244 if (rv == FE_STILL_LIVE)
1245 rv = FE_RECYCLED;
1246 }
1247 }
1248
1249 if (!next_epoch)
1250 break;
1251
1252 epoch = next_epoch;
1253 } while (1);
1254
bde89a9e 1255 spin_unlock(&connection->epoch_lock);
b411b363 1256
b411b363
PR
1257 return rv;
1258}
1259
8fe39aac
PR
1260static enum write_ordering_e
1261max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
1262{
1263 struct disk_conf *dc;
1264
1265 dc = rcu_dereference(bdev->disk_conf);
1266
1267 if (wo == WO_bdev_flush && !dc->disk_flushes)
1268 wo = WO_drain_io;
1269 if (wo == WO_drain_io && !dc->disk_drain)
1270 wo = WO_none;
1271
1272 return wo;
1273}
1274
b411b363
PR
1275/**
1276 * drbd_bump_write_ordering() - Fall back to an other write ordering method
bde89a9e 1277 * @connection: DRBD connection.
b411b363
PR
1278 * @wo: Write ordering method to try.
1279 */
8fe39aac
PR
1280void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
1281 enum write_ordering_e wo)
b411b363 1282{
e9526580 1283 struct drbd_device *device;
b411b363 1284 enum write_ordering_e pwo;
4b0007c0 1285 int vnr;
b411b363
PR
1286 static char *write_ordering_str[] = {
1287 [WO_none] = "none",
1288 [WO_drain_io] = "drain",
1289 [WO_bdev_flush] = "flush",
b411b363
PR
1290 };
1291
e9526580 1292 pwo = resource->write_ordering;
70df7092
LE
1293 if (wo != WO_bdev_flush)
1294 wo = min(pwo, wo);
daeda1cc 1295 rcu_read_lock();
e9526580 1296 idr_for_each_entry(&resource->devices, device, vnr) {
8fe39aac
PR
1297 if (get_ldev(device)) {
1298 wo = max_allowed_wo(device->ldev, wo);
1299 if (device->ldev == bdev)
1300 bdev = NULL;
1301 put_ldev(device);
1302 }
4b0007c0 1303 }
8fe39aac
PR
1304
1305 if (bdev)
1306 wo = max_allowed_wo(bdev, wo);
1307
70df7092
LE
1308 rcu_read_unlock();
1309
e9526580
PR
1310 resource->write_ordering = wo;
1311 if (pwo != resource->write_ordering || wo == WO_bdev_flush)
1312 drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
b411b363
PR
1313}
1314
45bb912b 1315/**
fbe29dec 1316 * drbd_submit_peer_request()
b30ab791 1317 * @device: DRBD device.
db830c46 1318 * @peer_req: peer request
45bb912b 1319 * @rw: flag field, see bio->bi_rw
10f6d992
LE
1320 *
1321 * May spread the pages to multiple bios,
1322 * depending on bio_add_page restrictions.
1323 *
1324 * Returns 0 if all bios have been submitted,
1325 * -ENOMEM if we could not allocate enough bios,
1326 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1327 * single page to an empty bio (which should never happen and likely indicates
1328 * that the lower level IO stack is in some way broken). This has been observed
1329 * on certain Xen deployments.
45bb912b
LE
1330 */
1331/* TODO allocate from our own bio_set. */
b30ab791 1332int drbd_submit_peer_request(struct drbd_device *device,
fbe29dec
AG
1333 struct drbd_peer_request *peer_req,
1334 const unsigned rw, const int fault_type)
45bb912b
LE
1335{
1336 struct bio *bios = NULL;
1337 struct bio *bio;
db830c46
AG
1338 struct page *page = peer_req->pages;
1339 sector_t sector = peer_req->i.sector;
1340 unsigned ds = peer_req->i.size;
45bb912b
LE
1341 unsigned n_bios = 0;
1342 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
10f6d992 1343 int err = -ENOMEM;
45bb912b 1344
a0fb3c47
LE
1345 if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) {
1346 /* wait for all pending IO completions, before we start
1347 * zeroing things out. */
1348 conn_wait_active_ee_empty(first_peer_device(device)->connection);
1349 if (blkdev_issue_zeroout(device->ldev->backing_bdev,
1350 sector, ds >> 9, GFP_NOIO))
1351 peer_req->flags |= EE_WAS_ERROR;
1352 drbd_endio_write_sec_final(peer_req);
1353 return 0;
1354 }
1355
54ed4ed8
LE
1356 /* Discards don't have any payload.
1357 * But the scsi layer still expects a bio_vec it can use internally,
1358 * see sd_setup_discard_cmnd() and blk_add_request_payload(). */
a0fb3c47 1359 if (peer_req->flags & EE_IS_TRIM)
54ed4ed8 1360 nr_pages = 1;
a0fb3c47 1361
45bb912b
LE
1362 /* In most cases, we will only need one bio. But in case the lower
1363 * level restrictions happen to be different at this offset on this
1364 * side than those of the sending peer, we may need to submit the
9476f39d
LE
1365 * request in more than one bio.
1366 *
1367 * Plain bio_alloc is good enough here, this is no DRBD internally
1368 * generated bio, but a bio allocated on behalf of the peer.
1369 */
45bb912b
LE
1370next_bio:
1371 bio = bio_alloc(GFP_NOIO, nr_pages);
1372 if (!bio) {
a0fb3c47 1373 drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
45bb912b
LE
1374 goto fail;
1375 }
db830c46 1376 /* > peer_req->i.sector, unless this is the first bio */
4f024f37 1377 bio->bi_iter.bi_sector = sector;
b30ab791 1378 bio->bi_bdev = device->ldev->backing_bdev;
45bb912b 1379 bio->bi_rw = rw;
db830c46 1380 bio->bi_private = peer_req;
fcefa62e 1381 bio->bi_end_io = drbd_peer_request_endio;
45bb912b
LE
1382
1383 bio->bi_next = bios;
1384 bios = bio;
1385 ++n_bios;
1386
a0fb3c47
LE
1387 if (rw & REQ_DISCARD) {
1388 bio->bi_iter.bi_size = ds;
1389 goto submit;
1390 }
1391
45bb912b
LE
1392 page_chain_for_each(page) {
1393 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1394 if (!bio_add_page(bio, page, len, 0)) {
10f6d992
LE
1395 /* A single page must always be possible!
1396 * But in case it fails anyways,
1397 * we deal with it, and complain (below). */
1398 if (bio->bi_vcnt == 0) {
d0180171 1399 drbd_err(device,
10f6d992
LE
1400 "bio_add_page failed for len=%u, "
1401 "bi_vcnt=0 (bi_sector=%llu)\n",
4f024f37 1402 len, (uint64_t)bio->bi_iter.bi_sector);
10f6d992
LE
1403 err = -ENOSPC;
1404 goto fail;
1405 }
45bb912b
LE
1406 goto next_bio;
1407 }
1408 ds -= len;
1409 sector += len >> 9;
1410 --nr_pages;
1411 }
0b0ba1ef 1412 D_ASSERT(device, ds == 0);
a0fb3c47
LE
1413submit:
1414 D_ASSERT(device, page == NULL);
45bb912b 1415
db830c46 1416 atomic_set(&peer_req->pending_bios, n_bios);
45bb912b
LE
1417 do {
1418 bio = bios;
1419 bios = bios->bi_next;
1420 bio->bi_next = NULL;
1421
b30ab791 1422 drbd_generic_make_request(device, fault_type, bio);
45bb912b 1423 } while (bios);
45bb912b
LE
1424 return 0;
1425
1426fail:
1427 while (bios) {
1428 bio = bios;
1429 bios = bios->bi_next;
1430 bio_put(bio);
1431 }
10f6d992 1432 return err;
45bb912b
LE
1433}
1434
b30ab791 1435static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
db830c46 1436 struct drbd_peer_request *peer_req)
53840641 1437{
db830c46 1438 struct drbd_interval *i = &peer_req->i;
53840641 1439
b30ab791 1440 drbd_remove_interval(&device->write_requests, i);
53840641
AG
1441 drbd_clear_interval(i);
1442
6c852bec 1443 /* Wake up any processes waiting for this peer request to complete. */
53840641 1444 if (i->waiting)
b30ab791 1445 wake_up(&device->misc_wait);
53840641
AG
1446}
1447
bde89a9e 1448static void conn_wait_active_ee_empty(struct drbd_connection *connection)
77fede51 1449{
c06ece6b 1450 struct drbd_peer_device *peer_device;
77fede51
PR
1451 int vnr;
1452
1453 rcu_read_lock();
c06ece6b
AG
1454 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1455 struct drbd_device *device = peer_device->device;
1456
b30ab791 1457 kref_get(&device->kref);
77fede51 1458 rcu_read_unlock();
b30ab791 1459 drbd_wait_ee_list_empty(device, &device->active_ee);
05a10ec7 1460 kref_put(&device->kref, drbd_destroy_device);
77fede51
PR
1461 rcu_read_lock();
1462 }
1463 rcu_read_unlock();
1464}
1465
9f4fe9ad
AG
1466static struct drbd_peer_device *
1467conn_peer_device(struct drbd_connection *connection, int volume_number)
1468{
1469 return idr_find(&connection->peer_devices, volume_number);
1470}
1471
bde89a9e 1472static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
b411b363 1473{
2451fc3b 1474 int rv;
e658983a 1475 struct p_barrier *p = pi->data;
b411b363
PR
1476 struct drbd_epoch *epoch;
1477
9ed57dcb
LE
1478 /* FIXME these are unacked on connection,
1479 * not a specific (peer)device.
1480 */
bde89a9e
AG
1481 connection->current_epoch->barrier_nr = p->barrier;
1482 connection->current_epoch->connection = connection;
1483 rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
b411b363
PR
1484
1485 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1486 * the activity log, which means it would not be resynced in case the
1487 * R_PRIMARY crashes now.
1488 * Therefore we must send the barrier_ack after the barrier request was
1489 * completed. */
e9526580 1490 switch (connection->resource->write_ordering) {
b411b363
PR
1491 case WO_none:
1492 if (rv == FE_RECYCLED)
82bc0194 1493 return 0;
2451fc3b
PR
1494
1495 /* receiver context, in the writeout path of the other node.
1496 * avoid potential distributed deadlock */
1497 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1498 if (epoch)
1499 break;
1500 else
1ec861eb 1501 drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
2451fc3b 1502 /* Fall through */
b411b363
PR
1503
1504 case WO_bdev_flush:
1505 case WO_drain_io:
bde89a9e
AG
1506 conn_wait_active_ee_empty(connection);
1507 drbd_flush(connection);
2451fc3b 1508
bde89a9e 1509 if (atomic_read(&connection->current_epoch->epoch_size)) {
2451fc3b
PR
1510 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1511 if (epoch)
1512 break;
b411b363
PR
1513 }
1514
82bc0194 1515 return 0;
2451fc3b 1516 default:
e9526580
PR
1517 drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
1518 connection->resource->write_ordering);
82bc0194 1519 return -EIO;
b411b363
PR
1520 }
1521
1522 epoch->flags = 0;
1523 atomic_set(&epoch->epoch_size, 0);
1524 atomic_set(&epoch->active, 0);
1525
bde89a9e
AG
1526 spin_lock(&connection->epoch_lock);
1527 if (atomic_read(&connection->current_epoch->epoch_size)) {
1528 list_add(&epoch->list, &connection->current_epoch->list);
1529 connection->current_epoch = epoch;
1530 connection->epochs++;
b411b363
PR
1531 } else {
1532 /* The current_epoch got recycled while we allocated this one... */
1533 kfree(epoch);
1534 }
bde89a9e 1535 spin_unlock(&connection->epoch_lock);
b411b363 1536
82bc0194 1537 return 0;
b411b363
PR
1538}
1539
1540/* used from receive_RSDataReply (recv_resync_read)
1541 * and from receive_Data */
f6ffca9f 1542static struct drbd_peer_request *
69a22773 1543read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
a0fb3c47 1544 struct packet_info *pi) __must_hold(local)
b411b363 1545{
69a22773 1546 struct drbd_device *device = peer_device->device;
b30ab791 1547 const sector_t capacity = drbd_get_capacity(device->this_bdev);
db830c46 1548 struct drbd_peer_request *peer_req;
b411b363 1549 struct page *page;
a5c31904 1550 int dgs, ds, err;
a0fb3c47 1551 int data_size = pi->size;
69a22773
AG
1552 void *dig_in = peer_device->connection->int_dig_in;
1553 void *dig_vv = peer_device->connection->int_dig_vv;
6b4388ac 1554 unsigned long *data;
a0fb3c47 1555 struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
b411b363 1556
88104ca4 1557 dgs = 0;
a0fb3c47 1558 if (!trim && peer_device->connection->peer_integrity_tfm) {
69a22773 1559 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
9f5bdc33
AG
1560 /*
1561 * FIXME: Receive the incoming digest into the receive buffer
1562 * here, together with its struct p_data?
1563 */
69a22773 1564 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
a5c31904 1565 if (err)
b411b363 1566 return NULL;
88104ca4 1567 data_size -= dgs;
b411b363
PR
1568 }
1569
a0fb3c47
LE
1570 if (trim) {
1571 D_ASSERT(peer_device, data_size == 0);
1572 data_size = be32_to_cpu(trim->size);
1573 }
1574
841ce241
AG
1575 if (!expect(IS_ALIGNED(data_size, 512)))
1576 return NULL;
a0fb3c47
LE
1577 /* prepare for larger trim requests. */
1578 if (!trim && !expect(data_size <= DRBD_MAX_BIO_SIZE))
841ce241 1579 return NULL;
b411b363 1580
6666032a
LE
1581 /* even though we trust out peer,
1582 * we sometimes have to double check. */
1583 if (sector + (data_size>>9) > capacity) {
d0180171 1584 drbd_err(device, "request from peer beyond end of local disk: "
fdda6544 1585 "capacity: %llus < sector: %llus + size: %u\n",
6666032a
LE
1586 (unsigned long long)capacity,
1587 (unsigned long long)sector, data_size);
1588 return NULL;
1589 }
1590
b411b363
PR
1591 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1592 * "criss-cross" setup, that might cause write-out on some other DRBD,
1593 * which in turn might block on the other node at this very place. */
a0fb3c47 1594 peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, trim == NULL, GFP_NOIO);
db830c46 1595 if (!peer_req)
b411b363 1596 return NULL;
45bb912b 1597
a0fb3c47 1598 if (trim)
81a3537a 1599 return peer_req;
a73ff323 1600
b411b363 1601 ds = data_size;
db830c46 1602 page = peer_req->pages;
45bb912b
LE
1603 page_chain_for_each(page) {
1604 unsigned len = min_t(int, ds, PAGE_SIZE);
6b4388ac 1605 data = kmap(page);
69a22773 1606 err = drbd_recv_all_warn(peer_device->connection, data, len);
b30ab791 1607 if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
d0180171 1608 drbd_err(device, "Fault injection: Corrupting data on receive\n");
6b4388ac
PR
1609 data[0] = data[0] ^ (unsigned long)-1;
1610 }
b411b363 1611 kunmap(page);
a5c31904 1612 if (err) {
b30ab791 1613 drbd_free_peer_req(device, peer_req);
b411b363
PR
1614 return NULL;
1615 }
a5c31904 1616 ds -= len;
b411b363
PR
1617 }
1618
1619 if (dgs) {
69a22773 1620 drbd_csum_ee(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv);
b411b363 1621 if (memcmp(dig_in, dig_vv, dgs)) {
d0180171 1622 drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
470be44a 1623 (unsigned long long)sector, data_size);
b30ab791 1624 drbd_free_peer_req(device, peer_req);
b411b363
PR
1625 return NULL;
1626 }
1627 }
b30ab791 1628 device->recv_cnt += data_size>>9;
db830c46 1629 return peer_req;
b411b363
PR
1630}
1631
1632/* drbd_drain_block() just takes a data block
1633 * out of the socket input buffer, and discards it.
1634 */
69a22773 1635static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
b411b363
PR
1636{
1637 struct page *page;
a5c31904 1638 int err = 0;
b411b363
PR
1639 void *data;
1640
c3470cde 1641 if (!data_size)
fc5be839 1642 return 0;
c3470cde 1643
69a22773 1644 page = drbd_alloc_pages(peer_device, 1, 1);
b411b363
PR
1645
1646 data = kmap(page);
1647 while (data_size) {
fc5be839
AG
1648 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1649
69a22773 1650 err = drbd_recv_all_warn(peer_device->connection, data, len);
a5c31904 1651 if (err)
b411b363 1652 break;
a5c31904 1653 data_size -= len;
b411b363
PR
1654 }
1655 kunmap(page);
69a22773 1656 drbd_free_pages(peer_device->device, page, 0);
fc5be839 1657 return err;
b411b363
PR
1658}
1659
69a22773 1660static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
b411b363
PR
1661 sector_t sector, int data_size)
1662{
7988613b
KO
1663 struct bio_vec bvec;
1664 struct bvec_iter iter;
b411b363 1665 struct bio *bio;
7988613b 1666 int dgs, err, expect;
69a22773
AG
1667 void *dig_in = peer_device->connection->int_dig_in;
1668 void *dig_vv = peer_device->connection->int_dig_vv;
b411b363 1669
88104ca4 1670 dgs = 0;
69a22773
AG
1671 if (peer_device->connection->peer_integrity_tfm) {
1672 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
1673 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
a5c31904
AG
1674 if (err)
1675 return err;
88104ca4 1676 data_size -= dgs;
b411b363
PR
1677 }
1678
b411b363
PR
1679 /* optimistically update recv_cnt. if receiving fails below,
1680 * we disconnect anyways, and counters will be reset. */
69a22773 1681 peer_device->device->recv_cnt += data_size>>9;
b411b363
PR
1682
1683 bio = req->master_bio;
69a22773 1684 D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
b411b363 1685
7988613b
KO
1686 bio_for_each_segment(bvec, bio, iter) {
1687 void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
1688 expect = min_t(int, data_size, bvec.bv_len);
69a22773 1689 err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
7988613b 1690 kunmap(bvec.bv_page);
a5c31904
AG
1691 if (err)
1692 return err;
1693 data_size -= expect;
b411b363
PR
1694 }
1695
1696 if (dgs) {
69a22773 1697 drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
b411b363 1698 if (memcmp(dig_in, dig_vv, dgs)) {
69a22773 1699 drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
28284cef 1700 return -EINVAL;
b411b363
PR
1701 }
1702 }
1703
69a22773 1704 D_ASSERT(peer_device->device, data_size == 0);
28284cef 1705 return 0;
b411b363
PR
1706}
1707
a990be46
AG
1708/*
1709 * e_end_resync_block() is called in asender context via
1710 * drbd_finish_peer_reqs().
1711 */
99920dc5 1712static int e_end_resync_block(struct drbd_work *w, int unused)
b411b363 1713{
8050e6d0 1714 struct drbd_peer_request *peer_req =
a8cd15ba
AG
1715 container_of(w, struct drbd_peer_request, w);
1716 struct drbd_peer_device *peer_device = peer_req->peer_device;
1717 struct drbd_device *device = peer_device->device;
db830c46 1718 sector_t sector = peer_req->i.sector;
99920dc5 1719 int err;
b411b363 1720
0b0ba1ef 1721 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
b411b363 1722
db830c46 1723 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b30ab791 1724 drbd_set_in_sync(device, sector, peer_req->i.size);
a8cd15ba 1725 err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
b411b363
PR
1726 } else {
1727 /* Record failure to sync */
b30ab791 1728 drbd_rs_failed_io(device, sector, peer_req->i.size);
b411b363 1729
a8cd15ba 1730 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
b411b363 1731 }
b30ab791 1732 dec_unacked(device);
b411b363 1733
99920dc5 1734 return err;
b411b363
PR
1735}
1736
69a22773 1737static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
a0fb3c47 1738 struct packet_info *pi) __releases(local)
b411b363 1739{
69a22773 1740 struct drbd_device *device = peer_device->device;
db830c46 1741 struct drbd_peer_request *peer_req;
b411b363 1742
a0fb3c47 1743 peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
db830c46 1744 if (!peer_req)
45bb912b 1745 goto fail;
b411b363 1746
b30ab791 1747 dec_rs_pending(device);
b411b363 1748
b30ab791 1749 inc_unacked(device);
b411b363
PR
1750 /* corresponding dec_unacked() in e_end_resync_block()
1751 * respective _drbd_clear_done_ee */
1752
a8cd15ba 1753 peer_req->w.cb = e_end_resync_block;
45bb912b 1754
0500813f 1755 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 1756 list_add(&peer_req->w.list, &device->sync_ee);
0500813f 1757 spin_unlock_irq(&device->resource->req_lock);
b411b363 1758
a0fb3c47 1759 atomic_add(pi->size >> 9, &device->rs_sect_ev);
b30ab791 1760 if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
e1c1b0fc 1761 return 0;
b411b363 1762
10f6d992 1763 /* don't care for the reason here */
d0180171 1764 drbd_err(device, "submit failed, triggering re-connect\n");
0500813f 1765 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 1766 list_del(&peer_req->w.list);
0500813f 1767 spin_unlock_irq(&device->resource->req_lock);
22cc37a9 1768
b30ab791 1769 drbd_free_peer_req(device, peer_req);
45bb912b 1770fail:
b30ab791 1771 put_ldev(device);
e1c1b0fc 1772 return -EIO;
b411b363
PR
1773}
1774
668eebc6 1775static struct drbd_request *
b30ab791 1776find_request(struct drbd_device *device, struct rb_root *root, u64 id,
bc9c5c41 1777 sector_t sector, bool missing_ok, const char *func)
51624585 1778{
51624585
AG
1779 struct drbd_request *req;
1780
bc9c5c41
AG
1781 /* Request object according to our peer */
1782 req = (struct drbd_request *)(unsigned long)id;
5e472264 1783 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
668eebc6 1784 return req;
c3afd8f5 1785 if (!missing_ok) {
d0180171 1786 drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
c3afd8f5
AG
1787 (unsigned long)id, (unsigned long long)sector);
1788 }
51624585 1789 return NULL;
b411b363
PR
1790}
1791
bde89a9e 1792static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 1793{
9f4fe9ad 1794 struct drbd_peer_device *peer_device;
b30ab791 1795 struct drbd_device *device;
b411b363
PR
1796 struct drbd_request *req;
1797 sector_t sector;
82bc0194 1798 int err;
e658983a 1799 struct p_data *p = pi->data;
4a76b161 1800
9f4fe9ad
AG
1801 peer_device = conn_peer_device(connection, pi->vnr);
1802 if (!peer_device)
4a76b161 1803 return -EIO;
9f4fe9ad 1804 device = peer_device->device;
b411b363
PR
1805
1806 sector = be64_to_cpu(p->sector);
1807
0500813f 1808 spin_lock_irq(&device->resource->req_lock);
b30ab791 1809 req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
0500813f 1810 spin_unlock_irq(&device->resource->req_lock);
c3afd8f5 1811 if (unlikely(!req))
82bc0194 1812 return -EIO;
b411b363 1813
24c4830c 1814 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
b411b363
PR
1815 * special casing it there for the various failure cases.
1816 * still no race with drbd_fail_pending_reads */
69a22773 1817 err = recv_dless_read(peer_device, req, sector, pi->size);
82bc0194 1818 if (!err)
8554df1c 1819 req_mod(req, DATA_RECEIVED);
b411b363
PR
1820 /* else: nothing. handled from drbd_disconnect...
1821 * I don't think we may complete this just yet
1822 * in case we are "on-disconnect: freeze" */
1823
82bc0194 1824 return err;
b411b363
PR
1825}
1826
bde89a9e 1827static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 1828{
9f4fe9ad 1829 struct drbd_peer_device *peer_device;
b30ab791 1830 struct drbd_device *device;
b411b363 1831 sector_t sector;
82bc0194 1832 int err;
e658983a 1833 struct p_data *p = pi->data;
4a76b161 1834
9f4fe9ad
AG
1835 peer_device = conn_peer_device(connection, pi->vnr);
1836 if (!peer_device)
4a76b161 1837 return -EIO;
9f4fe9ad 1838 device = peer_device->device;
b411b363
PR
1839
1840 sector = be64_to_cpu(p->sector);
0b0ba1ef 1841 D_ASSERT(device, p->block_id == ID_SYNCER);
b411b363 1842
b30ab791 1843 if (get_ldev(device)) {
b411b363
PR
1844 /* data is submitted to disk within recv_resync_read.
1845 * corresponding put_ldev done below on error,
fcefa62e 1846 * or in drbd_peer_request_endio. */
a0fb3c47 1847 err = recv_resync_read(peer_device, sector, pi);
b411b363
PR
1848 } else {
1849 if (__ratelimit(&drbd_ratelimit_state))
d0180171 1850 drbd_err(device, "Can not write resync data to local disk.\n");
b411b363 1851
69a22773 1852 err = drbd_drain_block(peer_device, pi->size);
b411b363 1853
69a22773 1854 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
b411b363
PR
1855 }
1856
b30ab791 1857 atomic_add(pi->size >> 9, &device->rs_sect_in);
778f271d 1858
82bc0194 1859 return err;
b411b363
PR
1860}
1861
b30ab791 1862static void restart_conflicting_writes(struct drbd_device *device,
7be8da07 1863 sector_t sector, int size)
b411b363 1864{
7be8da07
AG
1865 struct drbd_interval *i;
1866 struct drbd_request *req;
1867
b30ab791 1868 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
7be8da07
AG
1869 if (!i->local)
1870 continue;
1871 req = container_of(i, struct drbd_request, i);
1872 if (req->rq_state & RQ_LOCAL_PENDING ||
1873 !(req->rq_state & RQ_POSTPONED))
1874 continue;
2312f0b3
LE
1875 /* as it is RQ_POSTPONED, this will cause it to
1876 * be queued on the retry workqueue. */
d4dabbe2 1877 __req_mod(req, CONFLICT_RESOLVED, NULL);
7be8da07
AG
1878 }
1879}
b411b363 1880
a990be46
AG
1881/*
1882 * e_end_block() is called in asender context via drbd_finish_peer_reqs().
b411b363 1883 */
99920dc5 1884static int e_end_block(struct drbd_work *w, int cancel)
b411b363 1885{
8050e6d0 1886 struct drbd_peer_request *peer_req =
a8cd15ba
AG
1887 container_of(w, struct drbd_peer_request, w);
1888 struct drbd_peer_device *peer_device = peer_req->peer_device;
1889 struct drbd_device *device = peer_device->device;
db830c46 1890 sector_t sector = peer_req->i.sector;
99920dc5 1891 int err = 0, pcmd;
b411b363 1892
303d1448 1893 if (peer_req->flags & EE_SEND_WRITE_ACK) {
db830c46 1894 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b30ab791
AG
1895 pcmd = (device->state.conn >= C_SYNC_SOURCE &&
1896 device->state.conn <= C_PAUSED_SYNC_T &&
db830c46 1897 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
b411b363 1898 P_RS_WRITE_ACK : P_WRITE_ACK;
a8cd15ba 1899 err = drbd_send_ack(peer_device, pcmd, peer_req);
b411b363 1900 if (pcmd == P_RS_WRITE_ACK)
b30ab791 1901 drbd_set_in_sync(device, sector, peer_req->i.size);
b411b363 1902 } else {
a8cd15ba 1903 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
b411b363
PR
1904 /* we expect it to be marked out of sync anyways...
1905 * maybe assert this? */
1906 }
b30ab791 1907 dec_unacked(device);
b411b363
PR
1908 }
1909 /* we delete from the conflict detection hash _after_ we sent out the
1910 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
302bdeae 1911 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
0500813f 1912 spin_lock_irq(&device->resource->req_lock);
0b0ba1ef 1913 D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
b30ab791 1914 drbd_remove_epoch_entry_interval(device, peer_req);
7be8da07 1915 if (peer_req->flags & EE_RESTART_REQUESTS)
b30ab791 1916 restart_conflicting_writes(device, sector, peer_req->i.size);
0500813f 1917 spin_unlock_irq(&device->resource->req_lock);
bb3bfe96 1918 } else
0b0ba1ef 1919 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
b411b363 1920
a6b32bc3 1921 drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
b411b363 1922
99920dc5 1923 return err;
b411b363
PR
1924}
1925
a8cd15ba 1926static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
b411b363 1927{
8050e6d0 1928 struct drbd_peer_request *peer_req =
a8cd15ba
AG
1929 container_of(w, struct drbd_peer_request, w);
1930 struct drbd_peer_device *peer_device = peer_req->peer_device;
99920dc5 1931 int err;
b411b363 1932
a8cd15ba
AG
1933 err = drbd_send_ack(peer_device, ack, peer_req);
1934 dec_unacked(peer_device->device);
b411b363 1935
99920dc5 1936 return err;
b411b363
PR
1937}
1938
d4dabbe2 1939static int e_send_superseded(struct drbd_work *w, int unused)
7be8da07 1940{
a8cd15ba 1941 return e_send_ack(w, P_SUPERSEDED);
7be8da07
AG
1942}
1943
99920dc5 1944static int e_send_retry_write(struct drbd_work *w, int unused)
7be8da07 1945{
a8cd15ba
AG
1946 struct drbd_peer_request *peer_req =
1947 container_of(w, struct drbd_peer_request, w);
1948 struct drbd_connection *connection = peer_req->peer_device->connection;
7be8da07 1949
a8cd15ba 1950 return e_send_ack(w, connection->agreed_pro_version >= 100 ?
d4dabbe2 1951 P_RETRY_WRITE : P_SUPERSEDED);
7be8da07 1952}
b411b363 1953
3e394da1
AG
1954static bool seq_greater(u32 a, u32 b)
1955{
1956 /*
1957 * We assume 32-bit wrap-around here.
1958 * For 24-bit wrap-around, we would have to shift:
1959 * a <<= 8; b <<= 8;
1960 */
1961 return (s32)a - (s32)b > 0;
1962}
b411b363 1963
3e394da1
AG
1964static u32 seq_max(u32 a, u32 b)
1965{
1966 return seq_greater(a, b) ? a : b;
b411b363
PR
1967}
1968
69a22773 1969static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
3e394da1 1970{
69a22773 1971 struct drbd_device *device = peer_device->device;
3c13b680 1972 unsigned int newest_peer_seq;
3e394da1 1973
69a22773 1974 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
b30ab791
AG
1975 spin_lock(&device->peer_seq_lock);
1976 newest_peer_seq = seq_max(device->peer_seq, peer_seq);
1977 device->peer_seq = newest_peer_seq;
1978 spin_unlock(&device->peer_seq_lock);
1979 /* wake up only if we actually changed device->peer_seq */
3c13b680 1980 if (peer_seq == newest_peer_seq)
b30ab791 1981 wake_up(&device->seq_wait);
7be8da07 1982 }
b411b363
PR
1983}
1984
d93f6302 1985static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
b6a370ba 1986{
d93f6302
LE
1987 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
1988}
b6a370ba 1989
d93f6302 1990/* maybe change sync_ee into interval trees as well? */
b30ab791 1991static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
d93f6302
LE
1992{
1993 struct drbd_peer_request *rs_req;
b6a370ba
PR
1994 bool rv = 0;
1995
0500813f 1996 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 1997 list_for_each_entry(rs_req, &device->sync_ee, w.list) {
d93f6302
LE
1998 if (overlaps(peer_req->i.sector, peer_req->i.size,
1999 rs_req->i.sector, rs_req->i.size)) {
b6a370ba
PR
2000 rv = 1;
2001 break;
2002 }
2003 }
0500813f 2004 spin_unlock_irq(&device->resource->req_lock);
b6a370ba
PR
2005
2006 return rv;
2007}
2008
b411b363
PR
2009/* Called from receive_Data.
2010 * Synchronize packets on sock with packets on msock.
2011 *
2012 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
2013 * packet traveling on msock, they are still processed in the order they have
2014 * been sent.
2015 *
2016 * Note: we don't care for Ack packets overtaking P_DATA packets.
2017 *
b30ab791 2018 * In case packet_seq is larger than device->peer_seq number, there are
b411b363 2019 * outstanding packets on the msock. We wait for them to arrive.
b30ab791 2020 * In case we are the logically next packet, we update device->peer_seq
b411b363
PR
2021 * ourselves. Correctly handles 32bit wrap around.
2022 *
2023 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
2024 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
2025 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
2026 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
2027 *
2028 * returns 0 if we may process the packet,
2029 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
69a22773 2030static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
b411b363 2031{
69a22773 2032 struct drbd_device *device = peer_device->device;
b411b363 2033 DEFINE_WAIT(wait);
b411b363 2034 long timeout;
b874d231 2035 int ret = 0, tp;
7be8da07 2036
69a22773 2037 if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
7be8da07
AG
2038 return 0;
2039
b30ab791 2040 spin_lock(&device->peer_seq_lock);
b411b363 2041 for (;;) {
b30ab791
AG
2042 if (!seq_greater(peer_seq - 1, device->peer_seq)) {
2043 device->peer_seq = seq_max(device->peer_seq, peer_seq);
b411b363 2044 break;
7be8da07 2045 }
b874d231 2046
b411b363
PR
2047 if (signal_pending(current)) {
2048 ret = -ERESTARTSYS;
2049 break;
2050 }
b874d231
PR
2051
2052 rcu_read_lock();
a6b32bc3 2053 tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries;
b874d231
PR
2054 rcu_read_unlock();
2055
2056 if (!tp)
2057 break;
2058
2059 /* Only need to wait if two_primaries is enabled */
b30ab791
AG
2060 prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2061 spin_unlock(&device->peer_seq_lock);
44ed167d 2062 rcu_read_lock();
69a22773 2063 timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
44ed167d 2064 rcu_read_unlock();
71b1c1eb 2065 timeout = schedule_timeout(timeout);
b30ab791 2066 spin_lock(&device->peer_seq_lock);
7be8da07 2067 if (!timeout) {
b411b363 2068 ret = -ETIMEDOUT;
d0180171 2069 drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
b411b363
PR
2070 break;
2071 }
2072 }
b30ab791
AG
2073 spin_unlock(&device->peer_seq_lock);
2074 finish_wait(&device->seq_wait, &wait);
b411b363
PR
2075 return ret;
2076}
2077
688593c5
LE
2078/* see also bio_flags_to_wire()
2079 * DRBD_REQ_*, because we need to semantically map the flags to data packet
2080 * flags and back. We may replicate to other kernel versions. */
81f0ffd2 2081static unsigned long wire_flags_to_bio(u32 dpf)
76d2e7ec 2082{
688593c5
LE
2083 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
2084 (dpf & DP_FUA ? REQ_FUA : 0) |
2085 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
2086 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
76d2e7ec
PR
2087}
2088
b30ab791 2089static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
7be8da07
AG
2090 unsigned int size)
2091{
2092 struct drbd_interval *i;
2093
2094 repeat:
b30ab791 2095 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
7be8da07
AG
2096 struct drbd_request *req;
2097 struct bio_and_error m;
2098
2099 if (!i->local)
2100 continue;
2101 req = container_of(i, struct drbd_request, i);
2102 if (!(req->rq_state & RQ_POSTPONED))
2103 continue;
2104 req->rq_state &= ~RQ_POSTPONED;
2105 __req_mod(req, NEG_ACKED, &m);
0500813f 2106 spin_unlock_irq(&device->resource->req_lock);
7be8da07 2107 if (m.bio)
b30ab791 2108 complete_master_bio(device, &m);
0500813f 2109 spin_lock_irq(&device->resource->req_lock);
7be8da07
AG
2110 goto repeat;
2111 }
2112}
2113
b30ab791 2114static int handle_write_conflicts(struct drbd_device *device,
7be8da07
AG
2115 struct drbd_peer_request *peer_req)
2116{
e33b32de 2117 struct drbd_connection *connection = peer_req->peer_device->connection;
bde89a9e 2118 bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
7be8da07
AG
2119 sector_t sector = peer_req->i.sector;
2120 const unsigned int size = peer_req->i.size;
2121 struct drbd_interval *i;
2122 bool equal;
2123 int err;
2124
2125 /*
2126 * Inserting the peer request into the write_requests tree will prevent
2127 * new conflicting local requests from being added.
2128 */
b30ab791 2129 drbd_insert_interval(&device->write_requests, &peer_req->i);
7be8da07
AG
2130
2131 repeat:
b30ab791 2132 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
7be8da07
AG
2133 if (i == &peer_req->i)
2134 continue;
2135
2136 if (!i->local) {
2137 /*
2138 * Our peer has sent a conflicting remote request; this
2139 * should not happen in a two-node setup. Wait for the
2140 * earlier peer request to complete.
2141 */
b30ab791 2142 err = drbd_wait_misc(device, i);
7be8da07
AG
2143 if (err)
2144 goto out;
2145 goto repeat;
2146 }
2147
2148 equal = i->sector == sector && i->size == size;
2149 if (resolve_conflicts) {
2150 /*
2151 * If the peer request is fully contained within the
d4dabbe2
LE
2152 * overlapping request, it can be considered overwritten
2153 * and thus superseded; otherwise, it will be retried
2154 * once all overlapping requests have completed.
7be8da07 2155 */
d4dabbe2 2156 bool superseded = i->sector <= sector && i->sector +
7be8da07
AG
2157 (i->size >> 9) >= sector + (size >> 9);
2158
2159 if (!equal)
d0180171 2160 drbd_alert(device, "Concurrent writes detected: "
7be8da07
AG
2161 "local=%llus +%u, remote=%llus +%u, "
2162 "assuming %s came first\n",
2163 (unsigned long long)i->sector, i->size,
2164 (unsigned long long)sector, size,
d4dabbe2 2165 superseded ? "local" : "remote");
7be8da07 2166
b30ab791 2167 inc_unacked(device);
a8cd15ba 2168 peer_req->w.cb = superseded ? e_send_superseded :
7be8da07 2169 e_send_retry_write;
a8cd15ba 2170 list_add_tail(&peer_req->w.list, &device->done_ee);
e33b32de 2171 wake_asender(connection);
7be8da07
AG
2172
2173 err = -ENOENT;
2174 goto out;
2175 } else {
2176 struct drbd_request *req =
2177 container_of(i, struct drbd_request, i);
2178
2179 if (!equal)
d0180171 2180 drbd_alert(device, "Concurrent writes detected: "
7be8da07
AG
2181 "local=%llus +%u, remote=%llus +%u\n",
2182 (unsigned long long)i->sector, i->size,
2183 (unsigned long long)sector, size);
2184
2185 if (req->rq_state & RQ_LOCAL_PENDING ||
2186 !(req->rq_state & RQ_POSTPONED)) {
2187 /*
2188 * Wait for the node with the discard flag to
d4dabbe2
LE
2189 * decide if this request has been superseded
2190 * or needs to be retried.
2191 * Requests that have been superseded will
7be8da07
AG
2192 * disappear from the write_requests tree.
2193 *
2194 * In addition, wait for the conflicting
2195 * request to finish locally before submitting
2196 * the conflicting peer request.
2197 */
b30ab791 2198 err = drbd_wait_misc(device, &req->i);
7be8da07 2199 if (err) {
e33b32de 2200 _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
b30ab791 2201 fail_postponed_requests(device, sector, size);
7be8da07
AG
2202 goto out;
2203 }
2204 goto repeat;
2205 }
2206 /*
2207 * Remember to restart the conflicting requests after
2208 * the new peer request has completed.
2209 */
2210 peer_req->flags |= EE_RESTART_REQUESTS;
2211 }
2212 }
2213 err = 0;
2214
2215 out:
2216 if (err)
b30ab791 2217 drbd_remove_epoch_entry_interval(device, peer_req);
7be8da07
AG
2218 return err;
2219}
2220
b411b363 2221/* mirrored write */
bde89a9e 2222static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
b411b363 2223{
9f4fe9ad 2224 struct drbd_peer_device *peer_device;
b30ab791 2225 struct drbd_device *device;
b411b363 2226 sector_t sector;
db830c46 2227 struct drbd_peer_request *peer_req;
e658983a 2228 struct p_data *p = pi->data;
7be8da07 2229 u32 peer_seq = be32_to_cpu(p->seq_num);
b411b363
PR
2230 int rw = WRITE;
2231 u32 dp_flags;
302bdeae 2232 int err, tp;
b411b363 2233
9f4fe9ad
AG
2234 peer_device = conn_peer_device(connection, pi->vnr);
2235 if (!peer_device)
4a76b161 2236 return -EIO;
9f4fe9ad 2237 device = peer_device->device;
b411b363 2238
b30ab791 2239 if (!get_ldev(device)) {
82bc0194
AG
2240 int err2;
2241
69a22773
AG
2242 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
2243 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
bde89a9e 2244 atomic_inc(&connection->current_epoch->epoch_size);
69a22773 2245 err2 = drbd_drain_block(peer_device, pi->size);
82bc0194
AG
2246 if (!err)
2247 err = err2;
2248 return err;
b411b363
PR
2249 }
2250
fcefa62e
AG
2251 /*
2252 * Corresponding put_ldev done either below (on various errors), or in
2253 * drbd_peer_request_endio, if we successfully submit the data at the
2254 * end of this function.
2255 */
b411b363
PR
2256
2257 sector = be64_to_cpu(p->sector);
a0fb3c47 2258 peer_req = read_in_block(peer_device, p->block_id, sector, pi);
db830c46 2259 if (!peer_req) {
b30ab791 2260 put_ldev(device);
82bc0194 2261 return -EIO;
b411b363
PR
2262 }
2263
a8cd15ba 2264 peer_req->w.cb = e_end_block;
b411b363 2265
688593c5 2266 dp_flags = be32_to_cpu(p->dp_flags);
81f0ffd2 2267 rw |= wire_flags_to_bio(dp_flags);
a0fb3c47
LE
2268 if (pi->cmd == P_TRIM) {
2269 struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
2270 peer_req->flags |= EE_IS_TRIM;
2271 if (!blk_queue_discard(q))
2272 peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
2273 D_ASSERT(peer_device, peer_req->i.size > 0);
2274 D_ASSERT(peer_device, rw & REQ_DISCARD);
2275 D_ASSERT(peer_device, peer_req->pages == NULL);
2276 } else if (peer_req->pages == NULL) {
0b0ba1ef
AG
2277 D_ASSERT(device, peer_req->i.size == 0);
2278 D_ASSERT(device, dp_flags & DP_FLUSH);
a73ff323 2279 }
688593c5
LE
2280
2281 if (dp_flags & DP_MAY_SET_IN_SYNC)
db830c46 2282 peer_req->flags |= EE_MAY_SET_IN_SYNC;
688593c5 2283
bde89a9e
AG
2284 spin_lock(&connection->epoch_lock);
2285 peer_req->epoch = connection->current_epoch;
db830c46
AG
2286 atomic_inc(&peer_req->epoch->epoch_size);
2287 atomic_inc(&peer_req->epoch->active);
bde89a9e 2288 spin_unlock(&connection->epoch_lock);
b411b363 2289
302bdeae 2290 rcu_read_lock();
9f4fe9ad 2291 tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
302bdeae
PR
2292 rcu_read_unlock();
2293 if (tp) {
2294 peer_req->flags |= EE_IN_INTERVAL_TREE;
69a22773 2295 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
7be8da07 2296 if (err)
b411b363 2297 goto out_interrupted;
0500813f 2298 spin_lock_irq(&device->resource->req_lock);
b30ab791 2299 err = handle_write_conflicts(device, peer_req);
7be8da07 2300 if (err) {
0500813f 2301 spin_unlock_irq(&device->resource->req_lock);
7be8da07 2302 if (err == -ENOENT) {
b30ab791 2303 put_ldev(device);
82bc0194 2304 return 0;
b411b363 2305 }
7be8da07 2306 goto out_interrupted;
b411b363 2307 }
b874d231 2308 } else {
69a22773 2309 update_peer_seq(peer_device, peer_seq);
0500813f 2310 spin_lock_irq(&device->resource->req_lock);
b874d231 2311 }
a0fb3c47
LE
2312 /* if we use the zeroout fallback code, we process synchronously
2313 * and we wait for all pending requests, respectively wait for
2314 * active_ee to become empty in drbd_submit_peer_request();
2315 * better not add ourselves here. */
2316 if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0)
2317 list_add(&peer_req->w.list, &device->active_ee);
0500813f 2318 spin_unlock_irq(&device->resource->req_lock);
b411b363 2319
b30ab791
AG
2320 if (device->state.conn == C_SYNC_TARGET)
2321 wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
b411b363 2322
9f4fe9ad 2323 if (peer_device->connection->agreed_pro_version < 100) {
44ed167d 2324 rcu_read_lock();
9f4fe9ad 2325 switch (rcu_dereference(peer_device->connection->net_conf)->wire_protocol) {
303d1448
PR
2326 case DRBD_PROT_C:
2327 dp_flags |= DP_SEND_WRITE_ACK;
2328 break;
2329 case DRBD_PROT_B:
2330 dp_flags |= DP_SEND_RECEIVE_ACK;
2331 break;
b411b363 2332 }
44ed167d 2333 rcu_read_unlock();
b411b363
PR
2334 }
2335
303d1448
PR
2336 if (dp_flags & DP_SEND_WRITE_ACK) {
2337 peer_req->flags |= EE_SEND_WRITE_ACK;
b30ab791 2338 inc_unacked(device);
b411b363
PR
2339 /* corresponding dec_unacked() in e_end_block()
2340 * respective _drbd_clear_done_ee */
303d1448
PR
2341 }
2342
2343 if (dp_flags & DP_SEND_RECEIVE_ACK) {
b411b363
PR
2344 /* I really don't like it that the receiver thread
2345 * sends on the msock, but anyways */
69a22773 2346 drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
b411b363
PR
2347 }
2348
b30ab791 2349 if (device->state.pdsk < D_INCONSISTENT) {
b411b363 2350 /* In case we have the only disk of the cluster, */
b30ab791 2351 drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
db830c46
AG
2352 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2353 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
4dd726f0 2354 drbd_al_begin_io(device, &peer_req->i);
b411b363
PR
2355 }
2356
b30ab791 2357 err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR);
82bc0194
AG
2358 if (!err)
2359 return 0;
b411b363 2360
10f6d992 2361 /* don't care for the reason here */
d0180171 2362 drbd_err(device, "submit failed, triggering re-connect\n");
0500813f 2363 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 2364 list_del(&peer_req->w.list);
b30ab791 2365 drbd_remove_epoch_entry_interval(device, peer_req);
0500813f 2366 spin_unlock_irq(&device->resource->req_lock);
db830c46 2367 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
b30ab791 2368 drbd_al_complete_io(device, &peer_req->i);
22cc37a9 2369
b411b363 2370out_interrupted:
bde89a9e 2371 drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP);
b30ab791
AG
2372 put_ldev(device);
2373 drbd_free_peer_req(device, peer_req);
82bc0194 2374 return err;
b411b363
PR
2375}
2376
0f0601f4
LE
2377/* We may throttle resync, if the lower device seems to be busy,
2378 * and current sync rate is above c_min_rate.
2379 *
2380 * To decide whether or not the lower device is busy, we use a scheme similar
2381 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2382 * (more than 64 sectors) of activity we cannot account for with our own resync
2383 * activity, it obviously is "busy".
2384 *
2385 * The current sync rate used here uses only the most recent two step marks,
2386 * to have a short time average so we can react faster.
2387 */
e8299874 2388bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
0f0601f4 2389{
e3555d85 2390 struct lc_element *tmp;
e8299874 2391 bool throttle = true;
daeda1cc 2392
e8299874
LE
2393 if (!drbd_rs_c_min_rate_throttle(device))
2394 return false;
0f0601f4 2395
b30ab791
AG
2396 spin_lock_irq(&device->al_lock);
2397 tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
e3555d85
PR
2398 if (tmp) {
2399 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
e8299874
LE
2400 if (test_bit(BME_PRIORITY, &bm_ext->flags))
2401 throttle = false;
e3555d85
PR
2402 /* Do not slow down if app IO is already waiting for this extent */
2403 }
b30ab791 2404 spin_unlock_irq(&device->al_lock);
e3555d85 2405
e8299874
LE
2406 return throttle;
2407}
2408
2409bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
2410{
2411 struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
2412 unsigned long db, dt, dbdt;
2413 unsigned int c_min_rate;
2414 int curr_events;
2415
2416 rcu_read_lock();
2417 c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
2418 rcu_read_unlock();
2419
2420 /* feature disabled? */
2421 if (c_min_rate == 0)
2422 return false;
2423
0f0601f4
LE
2424 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2425 (int)part_stat_read(&disk->part0, sectors[1]) -
b30ab791 2426 atomic_read(&device->rs_sect_ev);
b30ab791 2427 if (!device->rs_last_events || curr_events - device->rs_last_events > 64) {
0f0601f4
LE
2428 unsigned long rs_left;
2429 int i;
2430
b30ab791 2431 device->rs_last_events = curr_events;
0f0601f4
LE
2432
2433 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2434 * approx. */
b30ab791 2435 i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
2649f080 2436
b30ab791
AG
2437 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2438 rs_left = device->ov_left;
2649f080 2439 else
b30ab791 2440 rs_left = drbd_bm_total_weight(device) - device->rs_failed;
0f0601f4 2441
b30ab791 2442 dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
0f0601f4
LE
2443 if (!dt)
2444 dt++;
b30ab791 2445 db = device->rs_mark_left[i] - rs_left;
0f0601f4
LE
2446 dbdt = Bit2KB(db/dt);
2447
daeda1cc 2448 if (dbdt > c_min_rate)
e8299874 2449 return true;
0f0601f4 2450 }
e8299874 2451 return false;
0f0601f4
LE
2452}
2453
bde89a9e 2454static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
b411b363 2455{
9f4fe9ad 2456 struct drbd_peer_device *peer_device;
b30ab791 2457 struct drbd_device *device;
b411b363 2458 sector_t sector;
4a76b161 2459 sector_t capacity;
db830c46 2460 struct drbd_peer_request *peer_req;
b411b363 2461 struct digest_info *di = NULL;
b18b37be 2462 int size, verb;
b411b363 2463 unsigned int fault_type;
e658983a 2464 struct p_block_req *p = pi->data;
4a76b161 2465
9f4fe9ad
AG
2466 peer_device = conn_peer_device(connection, pi->vnr);
2467 if (!peer_device)
4a76b161 2468 return -EIO;
9f4fe9ad 2469 device = peer_device->device;
b30ab791 2470 capacity = drbd_get_capacity(device->this_bdev);
b411b363
PR
2471
2472 sector = be64_to_cpu(p->sector);
2473 size = be32_to_cpu(p->blksize);
2474
c670a398 2475 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
d0180171 2476 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
b411b363 2477 (unsigned long long)sector, size);
82bc0194 2478 return -EINVAL;
b411b363
PR
2479 }
2480 if (sector + (size>>9) > capacity) {
d0180171 2481 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
b411b363 2482 (unsigned long long)sector, size);
82bc0194 2483 return -EINVAL;
b411b363
PR
2484 }
2485
b30ab791 2486 if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
b18b37be 2487 verb = 1;
e2857216 2488 switch (pi->cmd) {
b18b37be 2489 case P_DATA_REQUEST:
69a22773 2490 drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
b18b37be
PR
2491 break;
2492 case P_RS_DATA_REQUEST:
2493 case P_CSUM_RS_REQUEST:
2494 case P_OV_REQUEST:
69a22773 2495 drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
b18b37be
PR
2496 break;
2497 case P_OV_REPLY:
2498 verb = 0;
b30ab791 2499 dec_rs_pending(device);
69a22773 2500 drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
b18b37be
PR
2501 break;
2502 default:
49ba9b1b 2503 BUG();
b18b37be
PR
2504 }
2505 if (verb && __ratelimit(&drbd_ratelimit_state))
d0180171 2506 drbd_err(device, "Can not satisfy peer's read request, "
b411b363 2507 "no local data.\n");
b18b37be 2508
a821cc4a 2509 /* drain possibly payload */
69a22773 2510 return drbd_drain_block(peer_device, pi->size);
b411b363
PR
2511 }
2512
2513 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2514 * "criss-cross" setup, that might cause write-out on some other DRBD,
2515 * which in turn might block on the other node at this very place. */
a0fb3c47
LE
2516 peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
2517 true /* has real payload */, GFP_NOIO);
db830c46 2518 if (!peer_req) {
b30ab791 2519 put_ldev(device);
82bc0194 2520 return -ENOMEM;
b411b363
PR
2521 }
2522
e2857216 2523 switch (pi->cmd) {
b411b363 2524 case P_DATA_REQUEST:
a8cd15ba 2525 peer_req->w.cb = w_e_end_data_req;
b411b363 2526 fault_type = DRBD_FAULT_DT_RD;
80a40e43
LE
2527 /* application IO, don't drbd_rs_begin_io */
2528 goto submit;
2529
b411b363 2530 case P_RS_DATA_REQUEST:
a8cd15ba 2531 peer_req->w.cb = w_e_end_rsdata_req;
b411b363 2532 fault_type = DRBD_FAULT_RS_RD;
5f9915bb 2533 /* used in the sector offset progress display */
b30ab791 2534 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
b411b363
PR
2535 break;
2536
2537 case P_OV_REPLY:
2538 case P_CSUM_RS_REQUEST:
2539 fault_type = DRBD_FAULT_RS_RD;
e2857216 2540 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
b411b363
PR
2541 if (!di)
2542 goto out_free_e;
2543
e2857216 2544 di->digest_size = pi->size;
b411b363
PR
2545 di->digest = (((char *)di)+sizeof(struct digest_info));
2546
db830c46
AG
2547 peer_req->digest = di;
2548 peer_req->flags |= EE_HAS_DIGEST;
c36c3ced 2549
9f4fe9ad 2550 if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
b411b363
PR
2551 goto out_free_e;
2552
e2857216 2553 if (pi->cmd == P_CSUM_RS_REQUEST) {
9f4fe9ad 2554 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
a8cd15ba 2555 peer_req->w.cb = w_e_end_csum_rs_req;
5f9915bb 2556 /* used in the sector offset progress display */
b30ab791 2557 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
e2857216 2558 } else if (pi->cmd == P_OV_REPLY) {
2649f080 2559 /* track progress, we may need to throttle */
b30ab791 2560 atomic_add(size >> 9, &device->rs_sect_in);
a8cd15ba 2561 peer_req->w.cb = w_e_end_ov_reply;
b30ab791 2562 dec_rs_pending(device);
0f0601f4
LE
2563 /* drbd_rs_begin_io done when we sent this request,
2564 * but accounting still needs to be done. */
2565 goto submit_for_resync;
b411b363
PR
2566 }
2567 break;
2568
2569 case P_OV_REQUEST:
b30ab791 2570 if (device->ov_start_sector == ~(sector_t)0 &&
9f4fe9ad 2571 peer_device->connection->agreed_pro_version >= 90) {
de228bba
LE
2572 unsigned long now = jiffies;
2573 int i;
b30ab791
AG
2574 device->ov_start_sector = sector;
2575 device->ov_position = sector;
2576 device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2577 device->rs_total = device->ov_left;
de228bba 2578 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
b30ab791
AG
2579 device->rs_mark_left[i] = device->ov_left;
2580 device->rs_mark_time[i] = now;
de228bba 2581 }
d0180171 2582 drbd_info(device, "Online Verify start sector: %llu\n",
b411b363
PR
2583 (unsigned long long)sector);
2584 }
a8cd15ba 2585 peer_req->w.cb = w_e_end_ov_req;
b411b363 2586 fault_type = DRBD_FAULT_RS_RD;
b411b363
PR
2587 break;
2588
b411b363 2589 default:
49ba9b1b 2590 BUG();
b411b363
PR
2591 }
2592
0f0601f4
LE
2593 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2594 * wrt the receiver, but it is not as straightforward as it may seem.
2595 * Various places in the resync start and stop logic assume resync
2596 * requests are processed in order, requeuing this on the worker thread
2597 * introduces a bunch of new code for synchronization between threads.
2598 *
2599 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2600 * "forever", throttling after drbd_rs_begin_io will lock that extent
2601 * for application writes for the same time. For now, just throttle
2602 * here, where the rest of the code expects the receiver to sleep for
2603 * a while, anyways.
2604 */
2605
2606 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2607 * this defers syncer requests for some time, before letting at least
2608 * on request through. The resync controller on the receiving side
2609 * will adapt to the incoming rate accordingly.
2610 *
2611 * We cannot throttle here if remote is Primary/SyncTarget:
2612 * we would also throttle its application reads.
2613 * In that case, throttling is done on the SyncTarget only.
2614 */
b30ab791 2615 if (device->state.peer != R_PRIMARY && drbd_rs_should_slow_down(device, sector))
e3555d85 2616 schedule_timeout_uninterruptible(HZ/10);
b30ab791 2617 if (drbd_rs_begin_io(device, sector))
80a40e43 2618 goto out_free_e;
b411b363 2619
0f0601f4 2620submit_for_resync:
b30ab791 2621 atomic_add(size >> 9, &device->rs_sect_ev);
0f0601f4 2622
80a40e43 2623submit:
b30ab791 2624 inc_unacked(device);
0500813f 2625 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 2626 list_add_tail(&peer_req->w.list, &device->read_ee);
0500813f 2627 spin_unlock_irq(&device->resource->req_lock);
b411b363 2628
b30ab791 2629 if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0)
82bc0194 2630 return 0;
b411b363 2631
10f6d992 2632 /* don't care for the reason here */
d0180171 2633 drbd_err(device, "submit failed, triggering re-connect\n");
0500813f 2634 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 2635 list_del(&peer_req->w.list);
0500813f 2636 spin_unlock_irq(&device->resource->req_lock);
22cc37a9
LE
2637 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2638
b411b363 2639out_free_e:
b30ab791
AG
2640 put_ldev(device);
2641 drbd_free_peer_req(device, peer_req);
82bc0194 2642 return -EIO;
b411b363
PR
2643}
2644
69a22773
AG
2645/**
2646 * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries
2647 */
2648static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
b411b363 2649{
69a22773 2650 struct drbd_device *device = peer_device->device;
b411b363
PR
2651 int self, peer, rv = -100;
2652 unsigned long ch_self, ch_peer;
44ed167d 2653 enum drbd_after_sb_p after_sb_0p;
b411b363 2654
b30ab791
AG
2655 self = device->ldev->md.uuid[UI_BITMAP] & 1;
2656 peer = device->p_uuid[UI_BITMAP] & 1;
b411b363 2657
b30ab791
AG
2658 ch_peer = device->p_uuid[UI_SIZE];
2659 ch_self = device->comm_bm_set;
b411b363 2660
44ed167d 2661 rcu_read_lock();
69a22773 2662 after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
44ed167d
PR
2663 rcu_read_unlock();
2664 switch (after_sb_0p) {
b411b363
PR
2665 case ASB_CONSENSUS:
2666 case ASB_DISCARD_SECONDARY:
2667 case ASB_CALL_HELPER:
44ed167d 2668 case ASB_VIOLENTLY:
d0180171 2669 drbd_err(device, "Configuration error.\n");
b411b363
PR
2670 break;
2671 case ASB_DISCONNECT:
2672 break;
2673 case ASB_DISCARD_YOUNGER_PRI:
2674 if (self == 0 && peer == 1) {
2675 rv = -1;
2676 break;
2677 }
2678 if (self == 1 && peer == 0) {
2679 rv = 1;
2680 break;
2681 }
2682 /* Else fall through to one of the other strategies... */
2683 case ASB_DISCARD_OLDER_PRI:
2684 if (self == 0 && peer == 1) {
2685 rv = 1;
2686 break;
2687 }
2688 if (self == 1 && peer == 0) {
2689 rv = -1;
2690 break;
2691 }
2692 /* Else fall through to one of the other strategies... */
d0180171 2693 drbd_warn(device, "Discard younger/older primary did not find a decision\n"
b411b363
PR
2694 "Using discard-least-changes instead\n");
2695 case ASB_DISCARD_ZERO_CHG:
2696 if (ch_peer == 0 && ch_self == 0) {
69a22773 2697 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
b411b363
PR
2698 ? -1 : 1;
2699 break;
2700 } else {
2701 if (ch_peer == 0) { rv = 1; break; }
2702 if (ch_self == 0) { rv = -1; break; }
2703 }
44ed167d 2704 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
b411b363
PR
2705 break;
2706 case ASB_DISCARD_LEAST_CHG:
2707 if (ch_self < ch_peer)
2708 rv = -1;
2709 else if (ch_self > ch_peer)
2710 rv = 1;
2711 else /* ( ch_self == ch_peer ) */
2712 /* Well, then use something else. */
69a22773 2713 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
b411b363
PR
2714 ? -1 : 1;
2715 break;
2716 case ASB_DISCARD_LOCAL:
2717 rv = -1;
2718 break;
2719 case ASB_DISCARD_REMOTE:
2720 rv = 1;
2721 }
2722
2723 return rv;
2724}
2725
69a22773
AG
2726/**
2727 * drbd_asb_recover_1p - Recover after split-brain with one remaining primary
2728 */
2729static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
b411b363 2730{
69a22773 2731 struct drbd_device *device = peer_device->device;
6184ea21 2732 int hg, rv = -100;
44ed167d 2733 enum drbd_after_sb_p after_sb_1p;
b411b363 2734
44ed167d 2735 rcu_read_lock();
69a22773 2736 after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
44ed167d
PR
2737 rcu_read_unlock();
2738 switch (after_sb_1p) {
b411b363
PR
2739 case ASB_DISCARD_YOUNGER_PRI:
2740 case ASB_DISCARD_OLDER_PRI:
2741 case ASB_DISCARD_LEAST_CHG:
2742 case ASB_DISCARD_LOCAL:
2743 case ASB_DISCARD_REMOTE:
44ed167d 2744 case ASB_DISCARD_ZERO_CHG:
d0180171 2745 drbd_err(device, "Configuration error.\n");
b411b363
PR
2746 break;
2747 case ASB_DISCONNECT:
2748 break;
2749 case ASB_CONSENSUS:
69a22773 2750 hg = drbd_asb_recover_0p(peer_device);
b30ab791 2751 if (hg == -1 && device->state.role == R_SECONDARY)
b411b363 2752 rv = hg;
b30ab791 2753 if (hg == 1 && device->state.role == R_PRIMARY)
b411b363
PR
2754 rv = hg;
2755 break;
2756 case ASB_VIOLENTLY:
69a22773 2757 rv = drbd_asb_recover_0p(peer_device);
b411b363
PR
2758 break;
2759 case ASB_DISCARD_SECONDARY:
b30ab791 2760 return device->state.role == R_PRIMARY ? 1 : -1;
b411b363 2761 case ASB_CALL_HELPER:
69a22773 2762 hg = drbd_asb_recover_0p(peer_device);
b30ab791 2763 if (hg == -1 && device->state.role == R_PRIMARY) {
bb437946
AG
2764 enum drbd_state_rv rv2;
2765
b411b363
PR
2766 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2767 * we might be here in C_WF_REPORT_PARAMS which is transient.
2768 * we do not need to wait for the after state change work either. */
b30ab791 2769 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
bb437946 2770 if (rv2 != SS_SUCCESS) {
b30ab791 2771 drbd_khelper(device, "pri-lost-after-sb");
b411b363 2772 } else {
d0180171 2773 drbd_warn(device, "Successfully gave up primary role.\n");
b411b363
PR
2774 rv = hg;
2775 }
2776 } else
2777 rv = hg;
2778 }
2779
2780 return rv;
2781}
2782
69a22773
AG
2783/**
2784 * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries
2785 */
2786static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
b411b363 2787{
69a22773 2788 struct drbd_device *device = peer_device->device;
6184ea21 2789 int hg, rv = -100;
44ed167d 2790 enum drbd_after_sb_p after_sb_2p;
b411b363 2791
44ed167d 2792 rcu_read_lock();
69a22773 2793 after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
44ed167d
PR
2794 rcu_read_unlock();
2795 switch (after_sb_2p) {
b411b363
PR
2796 case ASB_DISCARD_YOUNGER_PRI:
2797 case ASB_DISCARD_OLDER_PRI:
2798 case ASB_DISCARD_LEAST_CHG:
2799 case ASB_DISCARD_LOCAL:
2800 case ASB_DISCARD_REMOTE:
2801 case ASB_CONSENSUS:
2802 case ASB_DISCARD_SECONDARY:
44ed167d 2803 case ASB_DISCARD_ZERO_CHG:
d0180171 2804 drbd_err(device, "Configuration error.\n");
b411b363
PR
2805 break;
2806 case ASB_VIOLENTLY:
69a22773 2807 rv = drbd_asb_recover_0p(peer_device);
b411b363
PR
2808 break;
2809 case ASB_DISCONNECT:
2810 break;
2811 case ASB_CALL_HELPER:
69a22773 2812 hg = drbd_asb_recover_0p(peer_device);
b411b363 2813 if (hg == -1) {
bb437946
AG
2814 enum drbd_state_rv rv2;
2815
b411b363
PR
2816 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2817 * we might be here in C_WF_REPORT_PARAMS which is transient.
2818 * we do not need to wait for the after state change work either. */
b30ab791 2819 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
bb437946 2820 if (rv2 != SS_SUCCESS) {
b30ab791 2821 drbd_khelper(device, "pri-lost-after-sb");
b411b363 2822 } else {
d0180171 2823 drbd_warn(device, "Successfully gave up primary role.\n");
b411b363
PR
2824 rv = hg;
2825 }
2826 } else
2827 rv = hg;
2828 }
2829
2830 return rv;
2831}
2832
b30ab791 2833static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
b411b363
PR
2834 u64 bits, u64 flags)
2835{
2836 if (!uuid) {
d0180171 2837 drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
b411b363
PR
2838 return;
2839 }
d0180171 2840 drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
b411b363
PR
2841 text,
2842 (unsigned long long)uuid[UI_CURRENT],
2843 (unsigned long long)uuid[UI_BITMAP],
2844 (unsigned long long)uuid[UI_HISTORY_START],
2845 (unsigned long long)uuid[UI_HISTORY_END],
2846 (unsigned long long)bits,
2847 (unsigned long long)flags);
2848}
2849
2850/*
2851 100 after split brain try auto recover
2852 2 C_SYNC_SOURCE set BitMap
2853 1 C_SYNC_SOURCE use BitMap
2854 0 no Sync
2855 -1 C_SYNC_TARGET use BitMap
2856 -2 C_SYNC_TARGET set BitMap
2857 -100 after split brain, disconnect
2858-1000 unrelated data
4a23f264
PR
2859-1091 requires proto 91
2860-1096 requires proto 96
b411b363 2861 */
44a4d551 2862static int drbd_uuid_compare(struct drbd_device *const device, int *rule_nr) __must_hold(local)
b411b363 2863{
44a4d551
LE
2864 struct drbd_peer_device *const peer_device = first_peer_device(device);
2865 struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
b411b363
PR
2866 u64 self, peer;
2867 int i, j;
2868
b30ab791
AG
2869 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2870 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
b411b363
PR
2871
2872 *rule_nr = 10;
2873 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2874 return 0;
2875
2876 *rule_nr = 20;
2877 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2878 peer != UUID_JUST_CREATED)
2879 return -2;
2880
2881 *rule_nr = 30;
2882 if (self != UUID_JUST_CREATED &&
2883 (peer == UUID_JUST_CREATED || peer == (u64)0))
2884 return 2;
2885
2886 if (self == peer) {
2887 int rct, dc; /* roles at crash time */
2888
b30ab791 2889 if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
b411b363 2890
44a4d551 2891 if (connection->agreed_pro_version < 91)
4a23f264 2892 return -1091;
b411b363 2893
b30ab791
AG
2894 if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2895 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
d0180171 2896 drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
b30ab791
AG
2897 drbd_uuid_move_history(device);
2898 device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
2899 device->ldev->md.uuid[UI_BITMAP] = 0;
b411b363 2900
b30ab791
AG
2901 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2902 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
b411b363
PR
2903 *rule_nr = 34;
2904 } else {
d0180171 2905 drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
b411b363
PR
2906 *rule_nr = 36;
2907 }
2908
2909 return 1;
2910 }
2911
b30ab791 2912 if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
b411b363 2913
44a4d551 2914 if (connection->agreed_pro_version < 91)
4a23f264 2915 return -1091;
b411b363 2916
b30ab791
AG
2917 if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2918 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
d0180171 2919 drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
b411b363 2920
b30ab791
AG
2921 device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
2922 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
2923 device->p_uuid[UI_BITMAP] = 0UL;
b411b363 2924
b30ab791 2925 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
b411b363
PR
2926 *rule_nr = 35;
2927 } else {
d0180171 2928 drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
b411b363
PR
2929 *rule_nr = 37;
2930 }
2931
2932 return -1;
2933 }
2934
2935 /* Common power [off|failure] */
b30ab791
AG
2936 rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
2937 (device->p_uuid[UI_FLAGS] & 2);
b411b363
PR
2938 /* lowest bit is set when we were primary,
2939 * next bit (weight 2) is set when peer was primary */
2940 *rule_nr = 40;
2941
2942 switch (rct) {
2943 case 0: /* !self_pri && !peer_pri */ return 0;
2944 case 1: /* self_pri && !peer_pri */ return 1;
2945 case 2: /* !self_pri && peer_pri */ return -1;
2946 case 3: /* self_pri && peer_pri */
44a4d551 2947 dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
b411b363
PR
2948 return dc ? -1 : 1;
2949 }
2950 }
2951
2952 *rule_nr = 50;
b30ab791 2953 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
b411b363
PR
2954 if (self == peer)
2955 return -1;
2956
2957 *rule_nr = 51;
b30ab791 2958 peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
b411b363 2959 if (self == peer) {
44a4d551 2960 if (connection->agreed_pro_version < 96 ?
b30ab791
AG
2961 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2962 (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2963 peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
b411b363
PR
2964 /* The last P_SYNC_UUID did not get though. Undo the last start of
2965 resync as sync source modifications of the peer's UUIDs. */
2966
44a4d551 2967 if (connection->agreed_pro_version < 91)
4a23f264 2968 return -1091;
b411b363 2969
b30ab791
AG
2970 device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
2971 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
4a23f264 2972
d0180171 2973 drbd_info(device, "Lost last syncUUID packet, corrected:\n");
b30ab791 2974 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
4a23f264 2975
b411b363
PR
2976 return -1;
2977 }
2978 }
2979
2980 *rule_nr = 60;
b30ab791 2981 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
b411b363 2982 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
b30ab791 2983 peer = device->p_uuid[i] & ~((u64)1);
b411b363
PR
2984 if (self == peer)
2985 return -2;
2986 }
2987
2988 *rule_nr = 70;
b30ab791
AG
2989 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2990 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
b411b363
PR
2991 if (self == peer)
2992 return 1;
2993
2994 *rule_nr = 71;
b30ab791 2995 self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
b411b363 2996 if (self == peer) {
44a4d551 2997 if (connection->agreed_pro_version < 96 ?
b30ab791
AG
2998 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2999 (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
3000 self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
b411b363
PR
3001 /* The last P_SYNC_UUID did not get though. Undo the last start of
3002 resync as sync source modifications of our UUIDs. */
3003
44a4d551 3004 if (connection->agreed_pro_version < 91)
4a23f264 3005 return -1091;
b411b363 3006
b30ab791
AG
3007 __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
3008 __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
b411b363 3009
d0180171 3010 drbd_info(device, "Last syncUUID did not get through, corrected:\n");
b30ab791
AG
3011 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3012 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
b411b363
PR
3013
3014 return 1;
3015 }
3016 }
3017
3018
3019 *rule_nr = 80;
b30ab791 3020 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
b411b363 3021 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
b30ab791 3022 self = device->ldev->md.uuid[i] & ~((u64)1);
b411b363
PR
3023 if (self == peer)
3024 return 2;
3025 }
3026
3027 *rule_nr = 90;
b30ab791
AG
3028 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3029 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
b411b363
PR
3030 if (self == peer && self != ((u64)0))
3031 return 100;
3032
3033 *rule_nr = 100;
3034 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
b30ab791 3035 self = device->ldev->md.uuid[i] & ~((u64)1);
b411b363 3036 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
b30ab791 3037 peer = device->p_uuid[j] & ~((u64)1);
b411b363
PR
3038 if (self == peer)
3039 return -100;
3040 }
3041 }
3042
3043 return -1000;
3044}
3045
3046/* drbd_sync_handshake() returns the new conn state on success, or
3047 CONN_MASK (-1) on failure.
3048 */
69a22773
AG
3049static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
3050 enum drbd_role peer_role,
b411b363
PR
3051 enum drbd_disk_state peer_disk) __must_hold(local)
3052{
69a22773 3053 struct drbd_device *device = peer_device->device;
b411b363
PR
3054 enum drbd_conns rv = C_MASK;
3055 enum drbd_disk_state mydisk;
44ed167d 3056 struct net_conf *nc;
6dff2902 3057 int hg, rule_nr, rr_conflict, tentative;
b411b363 3058
b30ab791 3059 mydisk = device->state.disk;
b411b363 3060 if (mydisk == D_NEGOTIATING)
b30ab791 3061 mydisk = device->new_state_tmp.disk;
b411b363 3062
d0180171 3063 drbd_info(device, "drbd_sync_handshake:\n");
9f2247bb 3064
b30ab791
AG
3065 spin_lock_irq(&device->ldev->md.uuid_lock);
3066 drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3067 drbd_uuid_dump(device, "peer", device->p_uuid,
3068 device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
b411b363 3069
b30ab791
AG
3070 hg = drbd_uuid_compare(device, &rule_nr);
3071 spin_unlock_irq(&device->ldev->md.uuid_lock);
b411b363 3072
d0180171 3073 drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
b411b363
PR
3074
3075 if (hg == -1000) {
d0180171 3076 drbd_alert(device, "Unrelated data, aborting!\n");
b411b363
PR
3077 return C_MASK;
3078 }
4a23f264 3079 if (hg < -1000) {
d0180171 3080 drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
b411b363
PR
3081 return C_MASK;
3082 }
3083
3084 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3085 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
3086 int f = (hg == -100) || abs(hg) == 2;
3087 hg = mydisk > D_INCONSISTENT ? 1 : -1;
3088 if (f)
3089 hg = hg*2;
d0180171 3090 drbd_info(device, "Becoming sync %s due to disk states.\n",
b411b363
PR
3091 hg > 0 ? "source" : "target");
3092 }
3093
3a11a487 3094 if (abs(hg) == 100)
b30ab791 3095 drbd_khelper(device, "initial-split-brain");
3a11a487 3096
44ed167d 3097 rcu_read_lock();
69a22773 3098 nc = rcu_dereference(peer_device->connection->net_conf);
44ed167d
PR
3099
3100 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
b30ab791 3101 int pcount = (device->state.role == R_PRIMARY)
b411b363
PR
3102 + (peer_role == R_PRIMARY);
3103 int forced = (hg == -100);
3104
3105 switch (pcount) {
3106 case 0:
69a22773 3107 hg = drbd_asb_recover_0p(peer_device);
b411b363
PR
3108 break;
3109 case 1:
69a22773 3110 hg = drbd_asb_recover_1p(peer_device);
b411b363
PR
3111 break;
3112 case 2:
69a22773 3113 hg = drbd_asb_recover_2p(peer_device);
b411b363
PR
3114 break;
3115 }
3116 if (abs(hg) < 100) {
d0180171 3117 drbd_warn(device, "Split-Brain detected, %d primaries, "
b411b363
PR
3118 "automatically solved. Sync from %s node\n",
3119 pcount, (hg < 0) ? "peer" : "this");
3120 if (forced) {
d0180171 3121 drbd_warn(device, "Doing a full sync, since"
b411b363
PR
3122 " UUIDs where ambiguous.\n");
3123 hg = hg*2;
3124 }
3125 }
3126 }
3127
3128 if (hg == -100) {
b30ab791 3129 if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
b411b363 3130 hg = -1;
b30ab791 3131 if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
b411b363
PR
3132 hg = 1;
3133
3134 if (abs(hg) < 100)
d0180171 3135 drbd_warn(device, "Split-Brain detected, manually solved. "
b411b363
PR
3136 "Sync from %s node\n",
3137 (hg < 0) ? "peer" : "this");
3138 }
44ed167d 3139 rr_conflict = nc->rr_conflict;
6dff2902 3140 tentative = nc->tentative;
44ed167d 3141 rcu_read_unlock();
b411b363
PR
3142
3143 if (hg == -100) {
580b9767
LE
3144 /* FIXME this log message is not correct if we end up here
3145 * after an attempted attach on a diskless node.
3146 * We just refuse to attach -- well, we drop the "connection"
3147 * to that disk, in a way... */
d0180171 3148 drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
b30ab791 3149 drbd_khelper(device, "split-brain");
b411b363
PR
3150 return C_MASK;
3151 }
3152
3153 if (hg > 0 && mydisk <= D_INCONSISTENT) {
d0180171 3154 drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
b411b363
PR
3155 return C_MASK;
3156 }
3157
3158 if (hg < 0 && /* by intention we do not use mydisk here. */
b30ab791 3159 device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
44ed167d 3160 switch (rr_conflict) {
b411b363 3161 case ASB_CALL_HELPER:
b30ab791 3162 drbd_khelper(device, "pri-lost");
b411b363
PR
3163 /* fall through */
3164 case ASB_DISCONNECT:
d0180171 3165 drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
b411b363
PR
3166 return C_MASK;
3167 case ASB_VIOLENTLY:
d0180171 3168 drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
b411b363
PR
3169 "assumption\n");
3170 }
3171 }
3172
69a22773 3173 if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
cf14c2e9 3174 if (hg == 0)
d0180171 3175 drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
cf14c2e9 3176 else
d0180171 3177 drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
cf14c2e9
PR
3178 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3179 abs(hg) >= 2 ? "full" : "bit-map based");
3180 return C_MASK;
3181 }
3182
b411b363 3183 if (abs(hg) >= 2) {
d0180171 3184 drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
b30ab791 3185 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
20ceb2b2 3186 BM_LOCKED_SET_ALLOWED))
b411b363
PR
3187 return C_MASK;
3188 }
3189
3190 if (hg > 0) { /* become sync source. */
3191 rv = C_WF_BITMAP_S;
3192 } else if (hg < 0) { /* become sync target */
3193 rv = C_WF_BITMAP_T;
3194 } else {
3195 rv = C_CONNECTED;
b30ab791 3196 if (drbd_bm_total_weight(device)) {
d0180171 3197 drbd_info(device, "No resync, but %lu bits in bitmap!\n",
b30ab791 3198 drbd_bm_total_weight(device));
b411b363
PR
3199 }
3200 }
3201
3202 return rv;
3203}
3204
f179d76d 3205static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
b411b363
PR
3206{
3207 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
f179d76d
PR
3208 if (peer == ASB_DISCARD_REMOTE)
3209 return ASB_DISCARD_LOCAL;
b411b363
PR
3210
3211 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
f179d76d
PR
3212 if (peer == ASB_DISCARD_LOCAL)
3213 return ASB_DISCARD_REMOTE;
b411b363
PR
3214
3215 /* everything else is valid if they are equal on both sides. */
f179d76d 3216 return peer;
b411b363
PR
3217}
3218
bde89a9e 3219static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3220{
e658983a 3221 struct p_protocol *p = pi->data;
036b17ea
PR
3222 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3223 int p_proto, p_discard_my_data, p_two_primaries, cf;
3224 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3225 char integrity_alg[SHARED_SECRET_MAX] = "";
accdbcc5 3226 struct crypto_hash *peer_integrity_tfm = NULL;
7aca6c75 3227 void *int_dig_in = NULL, *int_dig_vv = NULL;
b411b363 3228
b411b363
PR
3229 p_proto = be32_to_cpu(p->protocol);
3230 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3231 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3232 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
b411b363 3233 p_two_primaries = be32_to_cpu(p->two_primaries);
cf14c2e9 3234 cf = be32_to_cpu(p->conn_flags);
6139f60d 3235 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
cf14c2e9 3236
bde89a9e 3237 if (connection->agreed_pro_version >= 87) {
86db0618 3238 int err;
cf14c2e9 3239
88104ca4 3240 if (pi->size > sizeof(integrity_alg))
86db0618 3241 return -EIO;
bde89a9e 3242 err = drbd_recv_all(connection, integrity_alg, pi->size);
86db0618
AG
3243 if (err)
3244 return err;
036b17ea 3245 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
b411b363
PR
3246 }
3247
7d4c782c 3248 if (pi->cmd != P_PROTOCOL_UPDATE) {
bde89a9e 3249 clear_bit(CONN_DRY_RUN, &connection->flags);
b411b363 3250
fbc12f45 3251 if (cf & CF_DRY_RUN)
bde89a9e 3252 set_bit(CONN_DRY_RUN, &connection->flags);
b411b363 3253
fbc12f45 3254 rcu_read_lock();
bde89a9e 3255 nc = rcu_dereference(connection->net_conf);
b411b363 3256
fbc12f45 3257 if (p_proto != nc->wire_protocol) {
1ec861eb 3258 drbd_err(connection, "incompatible %s settings\n", "protocol");
fbc12f45
AG
3259 goto disconnect_rcu_unlock;
3260 }
b411b363 3261
fbc12f45 3262 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
1ec861eb 3263 drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
fbc12f45
AG
3264 goto disconnect_rcu_unlock;
3265 }
b411b363 3266
fbc12f45 3267 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
1ec861eb 3268 drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
fbc12f45
AG
3269 goto disconnect_rcu_unlock;
3270 }
b411b363 3271
fbc12f45 3272 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
1ec861eb 3273 drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
fbc12f45
AG
3274 goto disconnect_rcu_unlock;
3275 }
b411b363 3276
fbc12f45 3277 if (p_discard_my_data && nc->discard_my_data) {
1ec861eb 3278 drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
fbc12f45
AG
3279 goto disconnect_rcu_unlock;
3280 }
b411b363 3281
fbc12f45 3282 if (p_two_primaries != nc->two_primaries) {
1ec861eb 3283 drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
fbc12f45
AG
3284 goto disconnect_rcu_unlock;
3285 }
b411b363 3286
fbc12f45 3287 if (strcmp(integrity_alg, nc->integrity_alg)) {
1ec861eb 3288 drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
fbc12f45
AG
3289 goto disconnect_rcu_unlock;
3290 }
b411b363 3291
fbc12f45 3292 rcu_read_unlock();
b411b363
PR
3293 }
3294
7d4c782c
AG
3295 if (integrity_alg[0]) {
3296 int hash_size;
3297
3298 /*
3299 * We can only change the peer data integrity algorithm
3300 * here. Changing our own data integrity algorithm
3301 * requires that we send a P_PROTOCOL_UPDATE packet at
3302 * the same time; otherwise, the peer has no way to
3303 * tell between which packets the algorithm should
3304 * change.
3305 */
b411b363 3306
7d4c782c
AG
3307 peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
3308 if (!peer_integrity_tfm) {
1ec861eb 3309 drbd_err(connection, "peer data-integrity-alg %s not supported\n",
7d4c782c
AG
3310 integrity_alg);
3311 goto disconnect;
3312 }
b411b363 3313
7d4c782c
AG
3314 hash_size = crypto_hash_digestsize(peer_integrity_tfm);
3315 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3316 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3317 if (!(int_dig_in && int_dig_vv)) {
1ec861eb 3318 drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
b411b363
PR
3319 goto disconnect;
3320 }
b411b363
PR
3321 }
3322
7d4c782c
AG
3323 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3324 if (!new_net_conf) {
1ec861eb 3325 drbd_err(connection, "Allocation of new net_conf failed\n");
7d4c782c
AG
3326 goto disconnect;
3327 }
3328
bde89a9e 3329 mutex_lock(&connection->data.mutex);
0500813f 3330 mutex_lock(&connection->resource->conf_update);
bde89a9e 3331 old_net_conf = connection->net_conf;
7d4c782c
AG
3332 *new_net_conf = *old_net_conf;
3333
3334 new_net_conf->wire_protocol = p_proto;
3335 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3336 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3337 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3338 new_net_conf->two_primaries = p_two_primaries;
3339
bde89a9e 3340 rcu_assign_pointer(connection->net_conf, new_net_conf);
0500813f 3341 mutex_unlock(&connection->resource->conf_update);
bde89a9e 3342 mutex_unlock(&connection->data.mutex);
7d4c782c 3343
bde89a9e
AG
3344 crypto_free_hash(connection->peer_integrity_tfm);
3345 kfree(connection->int_dig_in);
3346 kfree(connection->int_dig_vv);
3347 connection->peer_integrity_tfm = peer_integrity_tfm;
3348 connection->int_dig_in = int_dig_in;
3349 connection->int_dig_vv = int_dig_vv;
7d4c782c
AG
3350
3351 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
1ec861eb 3352 drbd_info(connection, "peer data-integrity-alg: %s\n",
7d4c782c
AG
3353 integrity_alg[0] ? integrity_alg : "(none)");
3354
3355 synchronize_rcu();
3356 kfree(old_net_conf);
82bc0194 3357 return 0;
b411b363 3358
44ed167d
PR
3359disconnect_rcu_unlock:
3360 rcu_read_unlock();
b411b363 3361disconnect:
b792c35c 3362 crypto_free_hash(peer_integrity_tfm);
036b17ea
PR
3363 kfree(int_dig_in);
3364 kfree(int_dig_vv);
bde89a9e 3365 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 3366 return -EIO;
b411b363
PR
3367}
3368
3369/* helper function
3370 * input: alg name, feature name
3371 * return: NULL (alg name was "")
3372 * ERR_PTR(error) if something goes wrong
3373 * or the crypto hash ptr, if it worked out ok. */
f63e631a 3374static
b30ab791 3375struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
b411b363
PR
3376 const char *alg, const char *name)
3377{
3378 struct crypto_hash *tfm;
3379
3380 if (!alg[0])
3381 return NULL;
3382
3383 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
3384 if (IS_ERR(tfm)) {
d0180171 3385 drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
b411b363
PR
3386 alg, name, PTR_ERR(tfm));
3387 return tfm;
3388 }
b411b363
PR
3389 return tfm;
3390}
3391
bde89a9e 3392static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
4a76b161 3393{
bde89a9e 3394 void *buffer = connection->data.rbuf;
4a76b161
AG
3395 int size = pi->size;
3396
3397 while (size) {
3398 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
bde89a9e 3399 s = drbd_recv(connection, buffer, s);
4a76b161
AG
3400 if (s <= 0) {
3401 if (s < 0)
3402 return s;
3403 break;
3404 }
3405 size -= s;
3406 }
3407 if (size)
3408 return -EIO;
3409 return 0;
3410}
3411
3412/*
3413 * config_unknown_volume - device configuration command for unknown volume
3414 *
3415 * When a device is added to an existing connection, the node on which the
3416 * device is added first will send configuration commands to its peer but the
3417 * peer will not know about the device yet. It will warn and ignore these
3418 * commands. Once the device is added on the second node, the second node will
3419 * send the same device configuration commands, but in the other direction.
3420 *
3421 * (We can also end up here if drbd is misconfigured.)
3422 */
bde89a9e 3423static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
4a76b161 3424{
1ec861eb 3425 drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
2fcb8f30 3426 cmdname(pi->cmd), pi->vnr);
bde89a9e 3427 return ignore_remaining_packet(connection, pi);
4a76b161
AG
3428}
3429
bde89a9e 3430static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3431{
9f4fe9ad 3432 struct drbd_peer_device *peer_device;
b30ab791 3433 struct drbd_device *device;
e658983a 3434 struct p_rs_param_95 *p;
b411b363
PR
3435 unsigned int header_size, data_size, exp_max_sz;
3436 struct crypto_hash *verify_tfm = NULL;
3437 struct crypto_hash *csums_tfm = NULL;
2ec91e0e 3438 struct net_conf *old_net_conf, *new_net_conf = NULL;
813472ce 3439 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
bde89a9e 3440 const int apv = connection->agreed_pro_version;
813472ce 3441 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
778f271d 3442 int fifo_size = 0;
82bc0194 3443 int err;
b411b363 3444
9f4fe9ad
AG
3445 peer_device = conn_peer_device(connection, pi->vnr);
3446 if (!peer_device)
bde89a9e 3447 return config_unknown_volume(connection, pi);
9f4fe9ad 3448 device = peer_device->device;
b411b363
PR
3449
3450 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3451 : apv == 88 ? sizeof(struct p_rs_param)
3452 + SHARED_SECRET_MAX
8e26f9cc
PR
3453 : apv <= 94 ? sizeof(struct p_rs_param_89)
3454 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
b411b363 3455
e2857216 3456 if (pi->size > exp_max_sz) {
d0180171 3457 drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
e2857216 3458 pi->size, exp_max_sz);
82bc0194 3459 return -EIO;
b411b363
PR
3460 }
3461
3462 if (apv <= 88) {
e658983a 3463 header_size = sizeof(struct p_rs_param);
e2857216 3464 data_size = pi->size - header_size;
8e26f9cc 3465 } else if (apv <= 94) {
e658983a 3466 header_size = sizeof(struct p_rs_param_89);
e2857216 3467 data_size = pi->size - header_size;
0b0ba1ef 3468 D_ASSERT(device, data_size == 0);
8e26f9cc 3469 } else {
e658983a 3470 header_size = sizeof(struct p_rs_param_95);
e2857216 3471 data_size = pi->size - header_size;
0b0ba1ef 3472 D_ASSERT(device, data_size == 0);
b411b363
PR
3473 }
3474
3475 /* initialize verify_alg and csums_alg */
e658983a 3476 p = pi->data;
b411b363
PR
3477 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3478
9f4fe9ad 3479 err = drbd_recv_all(peer_device->connection, p, header_size);
82bc0194
AG
3480 if (err)
3481 return err;
b411b363 3482
0500813f 3483 mutex_lock(&connection->resource->conf_update);
9f4fe9ad 3484 old_net_conf = peer_device->connection->net_conf;
b30ab791 3485 if (get_ldev(device)) {
813472ce
PR
3486 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3487 if (!new_disk_conf) {
b30ab791 3488 put_ldev(device);
0500813f 3489 mutex_unlock(&connection->resource->conf_update);
d0180171 3490 drbd_err(device, "Allocation of new disk_conf failed\n");
813472ce
PR
3491 return -ENOMEM;
3492 }
daeda1cc 3493
b30ab791 3494 old_disk_conf = device->ldev->disk_conf;
813472ce 3495 *new_disk_conf = *old_disk_conf;
b411b363 3496
6394b935 3497 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
813472ce 3498 }
b411b363
PR
3499
3500 if (apv >= 88) {
3501 if (apv == 88) {
5de73827 3502 if (data_size > SHARED_SECRET_MAX || data_size == 0) {
d0180171 3503 drbd_err(device, "verify-alg of wrong size, "
5de73827
PR
3504 "peer wants %u, accepting only up to %u byte\n",
3505 data_size, SHARED_SECRET_MAX);
813472ce
PR
3506 err = -EIO;
3507 goto reconnect;
b411b363
PR
3508 }
3509
9f4fe9ad 3510 err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
813472ce
PR
3511 if (err)
3512 goto reconnect;
b411b363
PR
3513 /* we expect NUL terminated string */
3514 /* but just in case someone tries to be evil */
0b0ba1ef 3515 D_ASSERT(device, p->verify_alg[data_size-1] == 0);
b411b363
PR
3516 p->verify_alg[data_size-1] = 0;
3517
3518 } else /* apv >= 89 */ {
3519 /* we still expect NUL terminated strings */
3520 /* but just in case someone tries to be evil */
0b0ba1ef
AG
3521 D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3522 D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
b411b363
PR
3523 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3524 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3525 }
3526
2ec91e0e 3527 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
b30ab791 3528 if (device->state.conn == C_WF_REPORT_PARAMS) {
d0180171 3529 drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
2ec91e0e 3530 old_net_conf->verify_alg, p->verify_alg);
b411b363
PR
3531 goto disconnect;
3532 }
b30ab791 3533 verify_tfm = drbd_crypto_alloc_digest_safe(device,
b411b363
PR
3534 p->verify_alg, "verify-alg");
3535 if (IS_ERR(verify_tfm)) {
3536 verify_tfm = NULL;
3537 goto disconnect;
3538 }
3539 }
3540
2ec91e0e 3541 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
b30ab791 3542 if (device->state.conn == C_WF_REPORT_PARAMS) {
d0180171 3543 drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
2ec91e0e 3544 old_net_conf->csums_alg, p->csums_alg);
b411b363
PR
3545 goto disconnect;
3546 }
b30ab791 3547 csums_tfm = drbd_crypto_alloc_digest_safe(device,
b411b363
PR
3548 p->csums_alg, "csums-alg");
3549 if (IS_ERR(csums_tfm)) {
3550 csums_tfm = NULL;
3551 goto disconnect;
3552 }
3553 }
3554
813472ce 3555 if (apv > 94 && new_disk_conf) {
daeda1cc
PR
3556 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3557 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3558 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3559 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
778f271d 3560
daeda1cc 3561 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
b30ab791 3562 if (fifo_size != device->rs_plan_s->size) {
813472ce
PR
3563 new_plan = fifo_alloc(fifo_size);
3564 if (!new_plan) {
d0180171 3565 drbd_err(device, "kmalloc of fifo_buffer failed");
b30ab791 3566 put_ldev(device);
778f271d
PR
3567 goto disconnect;
3568 }
3569 }
8e26f9cc 3570 }
b411b363 3571
91fd4dad 3572 if (verify_tfm || csums_tfm) {
2ec91e0e
PR
3573 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3574 if (!new_net_conf) {
d0180171 3575 drbd_err(device, "Allocation of new net_conf failed\n");
91fd4dad
PR
3576 goto disconnect;
3577 }
3578
2ec91e0e 3579 *new_net_conf = *old_net_conf;
91fd4dad
PR
3580
3581 if (verify_tfm) {
2ec91e0e
PR
3582 strcpy(new_net_conf->verify_alg, p->verify_alg);
3583 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
9f4fe9ad
AG
3584 crypto_free_hash(peer_device->connection->verify_tfm);
3585 peer_device->connection->verify_tfm = verify_tfm;
d0180171 3586 drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
91fd4dad
PR
3587 }
3588 if (csums_tfm) {
2ec91e0e
PR
3589 strcpy(new_net_conf->csums_alg, p->csums_alg);
3590 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
9f4fe9ad
AG
3591 crypto_free_hash(peer_device->connection->csums_tfm);
3592 peer_device->connection->csums_tfm = csums_tfm;
d0180171 3593 drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
91fd4dad 3594 }
bde89a9e 3595 rcu_assign_pointer(connection->net_conf, new_net_conf);
778f271d 3596 }
b411b363
PR
3597 }
3598
813472ce 3599 if (new_disk_conf) {
b30ab791
AG
3600 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
3601 put_ldev(device);
813472ce
PR
3602 }
3603
3604 if (new_plan) {
b30ab791
AG
3605 old_plan = device->rs_plan_s;
3606 rcu_assign_pointer(device->rs_plan_s, new_plan);
b411b363 3607 }
daeda1cc 3608
0500813f 3609 mutex_unlock(&connection->resource->conf_update);
daeda1cc
PR
3610 synchronize_rcu();
3611 if (new_net_conf)
3612 kfree(old_net_conf);
3613 kfree(old_disk_conf);
813472ce 3614 kfree(old_plan);
daeda1cc 3615
82bc0194 3616 return 0;
b411b363 3617
813472ce
PR
3618reconnect:
3619 if (new_disk_conf) {
b30ab791 3620 put_ldev(device);
813472ce
PR
3621 kfree(new_disk_conf);
3622 }
0500813f 3623 mutex_unlock(&connection->resource->conf_update);
813472ce
PR
3624 return -EIO;
3625
b411b363 3626disconnect:
813472ce
PR
3627 kfree(new_plan);
3628 if (new_disk_conf) {
b30ab791 3629 put_ldev(device);
813472ce
PR
3630 kfree(new_disk_conf);
3631 }
0500813f 3632 mutex_unlock(&connection->resource->conf_update);
b411b363
PR
3633 /* just for completeness: actually not needed,
3634 * as this is not reached if csums_tfm was ok. */
3635 crypto_free_hash(csums_tfm);
3636 /* but free the verify_tfm again, if csums_tfm did not work out */
3637 crypto_free_hash(verify_tfm);
9f4fe9ad 3638 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 3639 return -EIO;
b411b363
PR
3640}
3641
b411b363 3642/* warn if the arguments differ by more than 12.5% */
b30ab791 3643static void warn_if_differ_considerably(struct drbd_device *device,
b411b363
PR
3644 const char *s, sector_t a, sector_t b)
3645{
3646 sector_t d;
3647 if (a == 0 || b == 0)
3648 return;
3649 d = (a > b) ? (a - b) : (b - a);
3650 if (d > (a>>3) || d > (b>>3))
d0180171 3651 drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
b411b363
PR
3652 (unsigned long long)a, (unsigned long long)b);
3653}
3654
bde89a9e 3655static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3656{
9f4fe9ad 3657 struct drbd_peer_device *peer_device;
b30ab791 3658 struct drbd_device *device;
e658983a 3659 struct p_sizes *p = pi->data;
e96c9633 3660 enum determine_dev_size dd = DS_UNCHANGED;
b411b363
PR
3661 sector_t p_size, p_usize, my_usize;
3662 int ldsc = 0; /* local disk size changed */
e89b591c 3663 enum dds_flags ddsf;
b411b363 3664
9f4fe9ad
AG
3665 peer_device = conn_peer_device(connection, pi->vnr);
3666 if (!peer_device)
bde89a9e 3667 return config_unknown_volume(connection, pi);
9f4fe9ad 3668 device = peer_device->device;
4a76b161 3669
b411b363
PR
3670 p_size = be64_to_cpu(p->d_size);
3671 p_usize = be64_to_cpu(p->u_size);
3672
b411b363
PR
3673 /* just store the peer's disk size for now.
3674 * we still need to figure out whether we accept that. */
b30ab791 3675 device->p_size = p_size;
b411b363 3676
b30ab791 3677 if (get_ldev(device)) {
daeda1cc 3678 rcu_read_lock();
b30ab791 3679 my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
daeda1cc
PR
3680 rcu_read_unlock();
3681
b30ab791
AG
3682 warn_if_differ_considerably(device, "lower level device sizes",
3683 p_size, drbd_get_max_capacity(device->ldev));
3684 warn_if_differ_considerably(device, "user requested size",
daeda1cc 3685 p_usize, my_usize);
b411b363
PR
3686
3687 /* if this is the first connect, or an otherwise expected
3688 * param exchange, choose the minimum */
b30ab791 3689 if (device->state.conn == C_WF_REPORT_PARAMS)
daeda1cc 3690 p_usize = min_not_zero(my_usize, p_usize);
b411b363
PR
3691
3692 /* Never shrink a device with usable data during connect.
3693 But allow online shrinking if we are connected. */
b30ab791
AG
3694 if (drbd_new_dev_size(device, device->ldev, p_usize, 0) <
3695 drbd_get_capacity(device->this_bdev) &&
3696 device->state.disk >= D_OUTDATED &&
3697 device->state.conn < C_CONNECTED) {
d0180171 3698 drbd_err(device, "The peer's disk size is too small!\n");
9f4fe9ad 3699 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
b30ab791 3700 put_ldev(device);
82bc0194 3701 return -EIO;
b411b363 3702 }
daeda1cc
PR
3703
3704 if (my_usize != p_usize) {
3705 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3706
3707 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3708 if (!new_disk_conf) {
d0180171 3709 drbd_err(device, "Allocation of new disk_conf failed\n");
b30ab791 3710 put_ldev(device);
daeda1cc
PR
3711 return -ENOMEM;
3712 }
3713
0500813f 3714 mutex_lock(&connection->resource->conf_update);
b30ab791 3715 old_disk_conf = device->ldev->disk_conf;
daeda1cc
PR
3716 *new_disk_conf = *old_disk_conf;
3717 new_disk_conf->disk_size = p_usize;
3718
b30ab791 3719 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
0500813f 3720 mutex_unlock(&connection->resource->conf_update);
daeda1cc
PR
3721 synchronize_rcu();
3722 kfree(old_disk_conf);
3723
d0180171 3724 drbd_info(device, "Peer sets u_size to %lu sectors\n",
daeda1cc 3725 (unsigned long)my_usize);
b411b363 3726 }
daeda1cc 3727
b30ab791 3728 put_ldev(device);
b411b363 3729 }
b411b363 3730
20c68fde 3731 device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
20c68fde
LE
3732 /* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size().
3733 In case we cleared the QUEUE_FLAG_DISCARD from our queue in
3734 drbd_reconsider_max_bio_size(), we can be sure that after
3735 drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
3736
e89b591c 3737 ddsf = be16_to_cpu(p->dds_flags);
b30ab791 3738 if (get_ldev(device)) {
8fe39aac 3739 drbd_reconsider_max_bio_size(device, device->ldev);
b30ab791
AG
3740 dd = drbd_determine_dev_size(device, ddsf, NULL);
3741 put_ldev(device);
e96c9633 3742 if (dd == DS_ERROR)
82bc0194 3743 return -EIO;
b30ab791 3744 drbd_md_sync(device);
b411b363
PR
3745 } else {
3746 /* I am diskless, need to accept the peer's size. */
8fe39aac 3747 drbd_reconsider_max_bio_size(device, NULL);
b30ab791 3748 drbd_set_my_capacity(device, p_size);
b411b363
PR
3749 }
3750
b30ab791
AG
3751 if (get_ldev(device)) {
3752 if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
3753 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
b411b363
PR
3754 ldsc = 1;
3755 }
3756
b30ab791 3757 put_ldev(device);
b411b363
PR
3758 }
3759
b30ab791 3760 if (device->state.conn > C_WF_REPORT_PARAMS) {
b411b363 3761 if (be64_to_cpu(p->c_size) !=
b30ab791 3762 drbd_get_capacity(device->this_bdev) || ldsc) {
b411b363
PR
3763 /* we have different sizes, probably peer
3764 * needs to know my new size... */
69a22773 3765 drbd_send_sizes(peer_device, 0, ddsf);
b411b363 3766 }
b30ab791
AG
3767 if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
3768 (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
3769 if (device->state.pdsk >= D_INCONSISTENT &&
3770 device->state.disk >= D_INCONSISTENT) {
e89b591c 3771 if (ddsf & DDSF_NO_RESYNC)
d0180171 3772 drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
e89b591c 3773 else
b30ab791 3774 resync_after_online_grow(device);
e89b591c 3775 } else
b30ab791 3776 set_bit(RESYNC_AFTER_NEG, &device->flags);
b411b363
PR
3777 }
3778 }
3779
82bc0194 3780 return 0;
b411b363
PR
3781}
3782
bde89a9e 3783static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3784{
9f4fe9ad 3785 struct drbd_peer_device *peer_device;
b30ab791 3786 struct drbd_device *device;
e658983a 3787 struct p_uuids *p = pi->data;
b411b363 3788 u64 *p_uuid;
62b0da3a 3789 int i, updated_uuids = 0;
b411b363 3790
9f4fe9ad
AG
3791 peer_device = conn_peer_device(connection, pi->vnr);
3792 if (!peer_device)
bde89a9e 3793 return config_unknown_volume(connection, pi);
9f4fe9ad 3794 device = peer_device->device;
4a76b161 3795
b411b363 3796 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
063eacf8 3797 if (!p_uuid) {
d0180171 3798 drbd_err(device, "kmalloc of p_uuid failed\n");
063eacf8
JW
3799 return false;
3800 }
b411b363
PR
3801
3802 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3803 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3804
b30ab791
AG
3805 kfree(device->p_uuid);
3806 device->p_uuid = p_uuid;
b411b363 3807
b30ab791
AG
3808 if (device->state.conn < C_CONNECTED &&
3809 device->state.disk < D_INCONSISTENT &&
3810 device->state.role == R_PRIMARY &&
3811 (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
d0180171 3812 drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
b30ab791 3813 (unsigned long long)device->ed_uuid);
9f4fe9ad 3814 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 3815 return -EIO;
b411b363
PR
3816 }
3817
b30ab791 3818 if (get_ldev(device)) {
b411b363 3819 int skip_initial_sync =
b30ab791 3820 device->state.conn == C_CONNECTED &&
9f4fe9ad 3821 peer_device->connection->agreed_pro_version >= 90 &&
b30ab791 3822 device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
b411b363
PR
3823 (p_uuid[UI_FLAGS] & 8);
3824 if (skip_initial_sync) {
d0180171 3825 drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
b30ab791 3826 drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
20ceb2b2
LE
3827 "clear_n_write from receive_uuids",
3828 BM_LOCKED_TEST_ALLOWED);
b30ab791
AG
3829 _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
3830 _drbd_uuid_set(device, UI_BITMAP, 0);
3831 _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
b411b363 3832 CS_VERBOSE, NULL);
b30ab791 3833 drbd_md_sync(device);
62b0da3a 3834 updated_uuids = 1;
b411b363 3835 }
b30ab791
AG
3836 put_ldev(device);
3837 } else if (device->state.disk < D_INCONSISTENT &&
3838 device->state.role == R_PRIMARY) {
18a50fa2
PR
3839 /* I am a diskless primary, the peer just created a new current UUID
3840 for me. */
b30ab791 3841 updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
b411b363
PR
3842 }
3843
3844 /* Before we test for the disk state, we should wait until an eventually
3845 ongoing cluster wide state change is finished. That is important if
3846 we are primary and are detaching from our disk. We need to see the
3847 new disk state... */
b30ab791
AG
3848 mutex_lock(device->state_mutex);
3849 mutex_unlock(device->state_mutex);
3850 if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
3851 updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
62b0da3a
LE
3852
3853 if (updated_uuids)
b30ab791 3854 drbd_print_uuids(device, "receiver updated UUIDs to");
b411b363 3855
82bc0194 3856 return 0;
b411b363
PR
3857}
3858
3859/**
3860 * convert_state() - Converts the peer's view of the cluster state to our point of view
3861 * @ps: The state as seen by the peer.
3862 */
3863static union drbd_state convert_state(union drbd_state ps)
3864{
3865 union drbd_state ms;
3866
3867 static enum drbd_conns c_tab[] = {
369bea63 3868 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
b411b363
PR
3869 [C_CONNECTED] = C_CONNECTED,
3870
3871 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3872 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3873 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3874 [C_VERIFY_S] = C_VERIFY_T,
3875 [C_MASK] = C_MASK,
3876 };
3877
3878 ms.i = ps.i;
3879
3880 ms.conn = c_tab[ps.conn];
3881 ms.peer = ps.role;
3882 ms.role = ps.peer;
3883 ms.pdsk = ps.disk;
3884 ms.disk = ps.pdsk;
3885 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3886
3887 return ms;
3888}
3889
bde89a9e 3890static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3891{
9f4fe9ad 3892 struct drbd_peer_device *peer_device;
b30ab791 3893 struct drbd_device *device;
e658983a 3894 struct p_req_state *p = pi->data;
b411b363 3895 union drbd_state mask, val;
bf885f8a 3896 enum drbd_state_rv rv;
b411b363 3897
9f4fe9ad
AG
3898 peer_device = conn_peer_device(connection, pi->vnr);
3899 if (!peer_device)
4a76b161 3900 return -EIO;
9f4fe9ad 3901 device = peer_device->device;
4a76b161 3902
b411b363
PR
3903 mask.i = be32_to_cpu(p->mask);
3904 val.i = be32_to_cpu(p->val);
3905
9f4fe9ad 3906 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
b30ab791 3907 mutex_is_locked(device->state_mutex)) {
69a22773 3908 drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
82bc0194 3909 return 0;
b411b363
PR
3910 }
3911
3912 mask = convert_state(mask);
3913 val = convert_state(val);
3914
b30ab791 3915 rv = drbd_change_state(device, CS_VERBOSE, mask, val);
69a22773 3916 drbd_send_sr_reply(peer_device, rv);
b411b363 3917
b30ab791 3918 drbd_md_sync(device);
b411b363 3919
82bc0194 3920 return 0;
b411b363
PR
3921}
3922
bde89a9e 3923static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3924{
e658983a 3925 struct p_req_state *p = pi->data;
b411b363 3926 union drbd_state mask, val;
bf885f8a 3927 enum drbd_state_rv rv;
b411b363 3928
b411b363
PR
3929 mask.i = be32_to_cpu(p->mask);
3930 val.i = be32_to_cpu(p->val);
3931
bde89a9e
AG
3932 if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
3933 mutex_is_locked(&connection->cstate_mutex)) {
3934 conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
82bc0194 3935 return 0;
b411b363
PR
3936 }
3937
3938 mask = convert_state(mask);
3939 val = convert_state(val);
3940
bde89a9e
AG
3941 rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
3942 conn_send_sr_reply(connection, rv);
b411b363 3943
82bc0194 3944 return 0;
b411b363
PR
3945}
3946
bde89a9e 3947static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3948{
9f4fe9ad 3949 struct drbd_peer_device *peer_device;
b30ab791 3950 struct drbd_device *device;
e658983a 3951 struct p_state *p = pi->data;
4ac4aada 3952 union drbd_state os, ns, peer_state;
b411b363 3953 enum drbd_disk_state real_peer_disk;
65d922c3 3954 enum chg_state_flags cs_flags;
b411b363
PR
3955 int rv;
3956
9f4fe9ad
AG
3957 peer_device = conn_peer_device(connection, pi->vnr);
3958 if (!peer_device)
bde89a9e 3959 return config_unknown_volume(connection, pi);
9f4fe9ad 3960 device = peer_device->device;
4a76b161 3961
b411b363
PR
3962 peer_state.i = be32_to_cpu(p->state);
3963
3964 real_peer_disk = peer_state.disk;
3965 if (peer_state.disk == D_NEGOTIATING) {
b30ab791 3966 real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
d0180171 3967 drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
b411b363
PR
3968 }
3969
0500813f 3970 spin_lock_irq(&device->resource->req_lock);
b411b363 3971 retry:
b30ab791 3972 os = ns = drbd_read_state(device);
0500813f 3973 spin_unlock_irq(&device->resource->req_lock);
b411b363 3974
545752d5
LE
3975 /* If some other part of the code (asender thread, timeout)
3976 * already decided to close the connection again,
3977 * we must not "re-establish" it here. */
3978 if (os.conn <= C_TEAR_DOWN)
58ffa580 3979 return -ECONNRESET;
545752d5 3980
40424e4a
LE
3981 /* If this is the "end of sync" confirmation, usually the peer disk
3982 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
3983 * set) resync started in PausedSyncT, or if the timing of pause-/
3984 * unpause-sync events has been "just right", the peer disk may
3985 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
3986 */
3987 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
3988 real_peer_disk == D_UP_TO_DATE &&
e9ef7bb6
LE
3989 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3990 /* If we are (becoming) SyncSource, but peer is still in sync
3991 * preparation, ignore its uptodate-ness to avoid flapping, it
3992 * will change to inconsistent once the peer reaches active
3993 * syncing states.
3994 * It may have changed syncer-paused flags, however, so we
3995 * cannot ignore this completely. */
3996 if (peer_state.conn > C_CONNECTED &&
3997 peer_state.conn < C_SYNC_SOURCE)
3998 real_peer_disk = D_INCONSISTENT;
3999
4000 /* if peer_state changes to connected at the same time,
4001 * it explicitly notifies us that it finished resync.
4002 * Maybe we should finish it up, too? */
4003 else if (os.conn >= C_SYNC_SOURCE &&
4004 peer_state.conn == C_CONNECTED) {
b30ab791
AG
4005 if (drbd_bm_total_weight(device) <= device->rs_failed)
4006 drbd_resync_finished(device);
82bc0194 4007 return 0;
e9ef7bb6
LE
4008 }
4009 }
4010
02b91b55
LE
4011 /* explicit verify finished notification, stop sector reached. */
4012 if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
4013 peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
b30ab791
AG
4014 ov_out_of_sync_print(device);
4015 drbd_resync_finished(device);
58ffa580 4016 return 0;
02b91b55
LE
4017 }
4018
e9ef7bb6
LE
4019 /* peer says his disk is inconsistent, while we think it is uptodate,
4020 * and this happens while the peer still thinks we have a sync going on,
4021 * but we think we are already done with the sync.
4022 * We ignore this to avoid flapping pdsk.
4023 * This should not happen, if the peer is a recent version of drbd. */
4024 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
4025 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
4026 real_peer_disk = D_UP_TO_DATE;
4027
4ac4aada
LE
4028 if (ns.conn == C_WF_REPORT_PARAMS)
4029 ns.conn = C_CONNECTED;
b411b363 4030
67531718
PR
4031 if (peer_state.conn == C_AHEAD)
4032 ns.conn = C_BEHIND;
4033
b30ab791
AG
4034 if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4035 get_ldev_if_state(device, D_NEGOTIATING)) {
b411b363
PR
4036 int cr; /* consider resync */
4037
4038 /* if we established a new connection */
4ac4aada 4039 cr = (os.conn < C_CONNECTED);
b411b363
PR
4040 /* if we had an established connection
4041 * and one of the nodes newly attaches a disk */
4ac4aada 4042 cr |= (os.conn == C_CONNECTED &&
b411b363 4043 (peer_state.disk == D_NEGOTIATING ||
4ac4aada 4044 os.disk == D_NEGOTIATING));
b411b363
PR
4045 /* if we have both been inconsistent, and the peer has been
4046 * forced to be UpToDate with --overwrite-data */
b30ab791 4047 cr |= test_bit(CONSIDER_RESYNC, &device->flags);
b411b363
PR
4048 /* if we had been plain connected, and the admin requested to
4049 * start a sync by "invalidate" or "invalidate-remote" */
4ac4aada 4050 cr |= (os.conn == C_CONNECTED &&
b411b363
PR
4051 (peer_state.conn >= C_STARTING_SYNC_S &&
4052 peer_state.conn <= C_WF_BITMAP_T));
4053
4054 if (cr)
69a22773 4055 ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
b411b363 4056
b30ab791 4057 put_ldev(device);
4ac4aada
LE
4058 if (ns.conn == C_MASK) {
4059 ns.conn = C_CONNECTED;
b30ab791
AG
4060 if (device->state.disk == D_NEGOTIATING) {
4061 drbd_force_state(device, NS(disk, D_FAILED));
b411b363 4062 } else if (peer_state.disk == D_NEGOTIATING) {
d0180171 4063 drbd_err(device, "Disk attach process on the peer node was aborted.\n");
b411b363 4064 peer_state.disk = D_DISKLESS;
580b9767 4065 real_peer_disk = D_DISKLESS;
b411b363 4066 } else {
9f4fe9ad 4067 if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
82bc0194 4068 return -EIO;
0b0ba1ef 4069 D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
9f4fe9ad 4070 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 4071 return -EIO;
b411b363
PR
4072 }
4073 }
4074 }
4075
0500813f 4076 spin_lock_irq(&device->resource->req_lock);
b30ab791 4077 if (os.i != drbd_read_state(device).i)
b411b363 4078 goto retry;
b30ab791 4079 clear_bit(CONSIDER_RESYNC, &device->flags);
b411b363
PR
4080 ns.peer = peer_state.role;
4081 ns.pdsk = real_peer_disk;
4082 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
4ac4aada 4083 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
b30ab791 4084 ns.disk = device->new_state_tmp.disk;
4ac4aada 4085 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
b30ab791
AG
4086 if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4087 test_bit(NEW_CUR_UUID, &device->flags)) {
8554df1c 4088 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
481c6f50 4089 for temporal network outages! */
0500813f 4090 spin_unlock_irq(&device->resource->req_lock);
d0180171 4091 drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
9f4fe9ad 4092 tl_clear(peer_device->connection);
b30ab791
AG
4093 drbd_uuid_new_current(device);
4094 clear_bit(NEW_CUR_UUID, &device->flags);
9f4fe9ad 4095 conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
82bc0194 4096 return -EIO;
481c6f50 4097 }
b30ab791
AG
4098 rv = _drbd_set_state(device, ns, cs_flags, NULL);
4099 ns = drbd_read_state(device);
0500813f 4100 spin_unlock_irq(&device->resource->req_lock);
b411b363
PR
4101
4102 if (rv < SS_SUCCESS) {
9f4fe9ad 4103 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 4104 return -EIO;
b411b363
PR
4105 }
4106
4ac4aada
LE
4107 if (os.conn > C_WF_REPORT_PARAMS) {
4108 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
b411b363
PR
4109 peer_state.disk != D_NEGOTIATING ) {
4110 /* we want resync, peer has not yet decided to sync... */
4111 /* Nowadays only used when forcing a node into primary role and
4112 setting its disk to UpToDate with that */
69a22773
AG
4113 drbd_send_uuids(peer_device);
4114 drbd_send_current_state(peer_device);
b411b363
PR
4115 }
4116 }
4117
b30ab791 4118 clear_bit(DISCARD_MY_DATA, &device->flags);
b411b363 4119
b30ab791 4120 drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
b411b363 4121
82bc0194 4122 return 0;
b411b363
PR
4123}
4124
bde89a9e 4125static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
b411b363 4126{
9f4fe9ad 4127 struct drbd_peer_device *peer_device;
b30ab791 4128 struct drbd_device *device;
e658983a 4129 struct p_rs_uuid *p = pi->data;
4a76b161 4130
9f4fe9ad
AG
4131 peer_device = conn_peer_device(connection, pi->vnr);
4132 if (!peer_device)
4a76b161 4133 return -EIO;
9f4fe9ad 4134 device = peer_device->device;
b411b363 4135
b30ab791
AG
4136 wait_event(device->misc_wait,
4137 device->state.conn == C_WF_SYNC_UUID ||
4138 device->state.conn == C_BEHIND ||
4139 device->state.conn < C_CONNECTED ||
4140 device->state.disk < D_NEGOTIATING);
b411b363 4141
0b0ba1ef 4142 /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */
b411b363 4143
b411b363
PR
4144 /* Here the _drbd_uuid_ functions are right, current should
4145 _not_ be rotated into the history */
b30ab791
AG
4146 if (get_ldev_if_state(device, D_NEGOTIATING)) {
4147 _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4148 _drbd_uuid_set(device, UI_BITMAP, 0UL);
b411b363 4149
b30ab791
AG
4150 drbd_print_uuids(device, "updated sync uuid");
4151 drbd_start_resync(device, C_SYNC_TARGET);
b411b363 4152
b30ab791 4153 put_ldev(device);
b411b363 4154 } else
d0180171 4155 drbd_err(device, "Ignoring SyncUUID packet!\n");
b411b363 4156
82bc0194 4157 return 0;
b411b363
PR
4158}
4159
2c46407d
AG
4160/**
4161 * receive_bitmap_plain
4162 *
4163 * Return 0 when done, 1 when another iteration is needed, and a negative error
4164 * code upon failure.
4165 */
4166static int
69a22773 4167receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
e658983a 4168 unsigned long *p, struct bm_xfer_ctx *c)
b411b363 4169{
50d0b1ad 4170 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
69a22773 4171 drbd_header_size(peer_device->connection);
e658983a 4172 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
50d0b1ad 4173 c->bm_words - c->word_offset);
e658983a 4174 unsigned int want = num_words * sizeof(*p);
2c46407d 4175 int err;
b411b363 4176
50d0b1ad 4177 if (want != size) {
69a22773 4178 drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
2c46407d 4179 return -EIO;
b411b363
PR
4180 }
4181 if (want == 0)
2c46407d 4182 return 0;
69a22773 4183 err = drbd_recv_all(peer_device->connection, p, want);
82bc0194 4184 if (err)
2c46407d 4185 return err;
b411b363 4186
69a22773 4187 drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
b411b363
PR
4188
4189 c->word_offset += num_words;
4190 c->bit_offset = c->word_offset * BITS_PER_LONG;
4191 if (c->bit_offset > c->bm_bits)
4192 c->bit_offset = c->bm_bits;
4193
2c46407d 4194 return 1;
b411b363
PR
4195}
4196
a02d1240
AG
4197static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4198{
4199 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4200}
4201
4202static int dcbp_get_start(struct p_compressed_bm *p)
4203{
4204 return (p->encoding & 0x80) != 0;
4205}
4206
4207static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4208{
4209 return (p->encoding >> 4) & 0x7;
4210}
4211
2c46407d
AG
4212/**
4213 * recv_bm_rle_bits
4214 *
4215 * Return 0 when done, 1 when another iteration is needed, and a negative error
4216 * code upon failure.
4217 */
4218static int
69a22773 4219recv_bm_rle_bits(struct drbd_peer_device *peer_device,
b411b363 4220 struct p_compressed_bm *p,
c6d25cfe
PR
4221 struct bm_xfer_ctx *c,
4222 unsigned int len)
b411b363
PR
4223{
4224 struct bitstream bs;
4225 u64 look_ahead;
4226 u64 rl;
4227 u64 tmp;
4228 unsigned long s = c->bit_offset;
4229 unsigned long e;
a02d1240 4230 int toggle = dcbp_get_start(p);
b411b363
PR
4231 int have;
4232 int bits;
4233
a02d1240 4234 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
b411b363
PR
4235
4236 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4237 if (bits < 0)
2c46407d 4238 return -EIO;
b411b363
PR
4239
4240 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4241 bits = vli_decode_bits(&rl, look_ahead);
4242 if (bits <= 0)
2c46407d 4243 return -EIO;
b411b363
PR
4244
4245 if (toggle) {
4246 e = s + rl -1;
4247 if (e >= c->bm_bits) {
69a22773 4248 drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
2c46407d 4249 return -EIO;
b411b363 4250 }
69a22773 4251 _drbd_bm_set_bits(peer_device->device, s, e);
b411b363
PR
4252 }
4253
4254 if (have < bits) {
69a22773 4255 drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
b411b363
PR
4256 have, bits, look_ahead,
4257 (unsigned int)(bs.cur.b - p->code),
4258 (unsigned int)bs.buf_len);
2c46407d 4259 return -EIO;
b411b363 4260 }
d2da5b0c
LE
4261 /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4262 if (likely(bits < 64))
4263 look_ahead >>= bits;
4264 else
4265 look_ahead = 0;
b411b363
PR
4266 have -= bits;
4267
4268 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4269 if (bits < 0)
2c46407d 4270 return -EIO;
b411b363
PR
4271 look_ahead |= tmp << have;
4272 have += bits;
4273 }
4274
4275 c->bit_offset = s;
4276 bm_xfer_ctx_bit_to_word_offset(c);
4277
2c46407d 4278 return (s != c->bm_bits);
b411b363
PR
4279}
4280
2c46407d
AG
4281/**
4282 * decode_bitmap_c
4283 *
4284 * Return 0 when done, 1 when another iteration is needed, and a negative error
4285 * code upon failure.
4286 */
4287static int
69a22773 4288decode_bitmap_c(struct drbd_peer_device *peer_device,
b411b363 4289 struct p_compressed_bm *p,
c6d25cfe
PR
4290 struct bm_xfer_ctx *c,
4291 unsigned int len)
b411b363 4292{
a02d1240 4293 if (dcbp_get_code(p) == RLE_VLI_Bits)
69a22773 4294 return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
b411b363
PR
4295
4296 /* other variants had been implemented for evaluation,
4297 * but have been dropped as this one turned out to be "best"
4298 * during all our tests. */
4299
69a22773
AG
4300 drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
4301 conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
2c46407d 4302 return -EIO;
b411b363
PR
4303}
4304
b30ab791 4305void INFO_bm_xfer_stats(struct drbd_device *device,
b411b363
PR
4306 const char *direction, struct bm_xfer_ctx *c)
4307{
4308 /* what would it take to transfer it "plaintext" */
a6b32bc3 4309 unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
50d0b1ad
AG
4310 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4311 unsigned int plain =
4312 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4313 c->bm_words * sizeof(unsigned long);
4314 unsigned int total = c->bytes[0] + c->bytes[1];
4315 unsigned int r;
b411b363
PR
4316
4317 /* total can not be zero. but just in case: */
4318 if (total == 0)
4319 return;
4320
4321 /* don't report if not compressed */
4322 if (total >= plain)
4323 return;
4324
4325 /* total < plain. check for overflow, still */
4326 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4327 : (1000 * total / plain);
4328
4329 if (r > 1000)
4330 r = 1000;
4331
4332 r = 1000 - r;
d0180171 4333 drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
b411b363
PR
4334 "total %u; compression: %u.%u%%\n",
4335 direction,
4336 c->bytes[1], c->packets[1],
4337 c->bytes[0], c->packets[0],
4338 total, r/10, r % 10);
4339}
4340
4341/* Since we are processing the bitfield from lower addresses to higher,
4342 it does not matter if the process it in 32 bit chunks or 64 bit
4343 chunks as long as it is little endian. (Understand it as byte stream,
4344 beginning with the lowest byte...) If we would use big endian
4345 we would need to process it from the highest address to the lowest,
4346 in order to be agnostic to the 32 vs 64 bits issue.
4347
4348 returns 0 on failure, 1 if we successfully received it. */
bde89a9e 4349static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
b411b363 4350{
9f4fe9ad 4351 struct drbd_peer_device *peer_device;
b30ab791 4352 struct drbd_device *device;
b411b363 4353 struct bm_xfer_ctx c;
2c46407d 4354 int err;
4a76b161 4355
9f4fe9ad
AG
4356 peer_device = conn_peer_device(connection, pi->vnr);
4357 if (!peer_device)
4a76b161 4358 return -EIO;
9f4fe9ad 4359 device = peer_device->device;
b411b363 4360
b30ab791 4361 drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
20ceb2b2
LE
4362 /* you are supposed to send additional out-of-sync information
4363 * if you actually set bits during this phase */
b411b363 4364
b411b363 4365 c = (struct bm_xfer_ctx) {
b30ab791
AG
4366 .bm_bits = drbd_bm_bits(device),
4367 .bm_words = drbd_bm_words(device),
b411b363
PR
4368 };
4369
2c46407d 4370 for(;;) {
e658983a 4371 if (pi->cmd == P_BITMAP)
69a22773 4372 err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
e658983a 4373 else if (pi->cmd == P_COMPRESSED_BITMAP) {
b411b363
PR
4374 /* MAYBE: sanity check that we speak proto >= 90,
4375 * and the feature is enabled! */
e658983a 4376 struct p_compressed_bm *p = pi->data;
b411b363 4377
bde89a9e 4378 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
d0180171 4379 drbd_err(device, "ReportCBitmap packet too large\n");
82bc0194 4380 err = -EIO;
b411b363
PR
4381 goto out;
4382 }
e658983a 4383 if (pi->size <= sizeof(*p)) {
d0180171 4384 drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
82bc0194 4385 err = -EIO;
78fcbdae 4386 goto out;
b411b363 4387 }
9f4fe9ad 4388 err = drbd_recv_all(peer_device->connection, p, pi->size);
e658983a
AG
4389 if (err)
4390 goto out;
69a22773 4391 err = decode_bitmap_c(peer_device, p, &c, pi->size);
b411b363 4392 } else {
d0180171 4393 drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
82bc0194 4394 err = -EIO;
b411b363
PR
4395 goto out;
4396 }
4397
e2857216 4398 c.packets[pi->cmd == P_BITMAP]++;
bde89a9e 4399 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
b411b363 4400
2c46407d
AG
4401 if (err <= 0) {
4402 if (err < 0)
4403 goto out;
b411b363 4404 break;
2c46407d 4405 }
9f4fe9ad 4406 err = drbd_recv_header(peer_device->connection, pi);
82bc0194 4407 if (err)
b411b363 4408 goto out;
2c46407d 4409 }
b411b363 4410
b30ab791 4411 INFO_bm_xfer_stats(device, "receive", &c);
b411b363 4412
b30ab791 4413 if (device->state.conn == C_WF_BITMAP_T) {
de1f8e4a
AG
4414 enum drbd_state_rv rv;
4415
b30ab791 4416 err = drbd_send_bitmap(device);
82bc0194 4417 if (err)
b411b363
PR
4418 goto out;
4419 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
b30ab791 4420 rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
0b0ba1ef 4421 D_ASSERT(device, rv == SS_SUCCESS);
b30ab791 4422 } else if (device->state.conn != C_WF_BITMAP_S) {
b411b363
PR
4423 /* admin may have requested C_DISCONNECTING,
4424 * other threads may have noticed network errors */
d0180171 4425 drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
b30ab791 4426 drbd_conn_str(device->state.conn));
b411b363 4427 }
82bc0194 4428 err = 0;
b411b363 4429
b411b363 4430 out:
b30ab791
AG
4431 drbd_bm_unlock(device);
4432 if (!err && device->state.conn == C_WF_BITMAP_S)
4433 drbd_start_resync(device, C_SYNC_SOURCE);
82bc0194 4434 return err;
b411b363
PR
4435}
4436
bde89a9e 4437static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
b411b363 4438{
1ec861eb 4439 drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
e2857216 4440 pi->cmd, pi->size);
b411b363 4441
bde89a9e 4442 return ignore_remaining_packet(connection, pi);
b411b363
PR
4443}
4444
bde89a9e 4445static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
0ced55a3 4446{
e7f52dfb
LE
4447 /* Make sure we've acked all the TCP data associated
4448 * with the data requests being unplugged */
bde89a9e 4449 drbd_tcp_quickack(connection->data.socket);
0ced55a3 4450
82bc0194 4451 return 0;
0ced55a3
PR
4452}
4453
bde89a9e 4454static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
73a01a18 4455{
9f4fe9ad 4456 struct drbd_peer_device *peer_device;
b30ab791 4457 struct drbd_device *device;
e658983a 4458 struct p_block_desc *p = pi->data;
4a76b161 4459
9f4fe9ad
AG
4460 peer_device = conn_peer_device(connection, pi->vnr);
4461 if (!peer_device)
4a76b161 4462 return -EIO;
9f4fe9ad 4463 device = peer_device->device;
73a01a18 4464
b30ab791 4465 switch (device->state.conn) {
f735e363
LE
4466 case C_WF_SYNC_UUID:
4467 case C_WF_BITMAP_T:
4468 case C_BEHIND:
4469 break;
4470 default:
d0180171 4471 drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
b30ab791 4472 drbd_conn_str(device->state.conn));
f735e363
LE
4473 }
4474
b30ab791 4475 drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
73a01a18 4476
82bc0194 4477 return 0;
73a01a18
PR
4478}
4479
02918be2
PR
4480struct data_cmd {
4481 int expect_payload;
4482 size_t pkt_size;
bde89a9e 4483 int (*fn)(struct drbd_connection *, struct packet_info *);
02918be2
PR
4484};
4485
4486static struct data_cmd drbd_cmd_handler[] = {
4487 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4488 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4489 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4490 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
e658983a
AG
4491 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4492 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4493 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
02918be2
PR
4494 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4495 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
e658983a
AG
4496 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4497 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
02918be2
PR
4498 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4499 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4500 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4501 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4502 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4503 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4504 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4505 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4506 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4507 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
73a01a18 4508 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
4a76b161 4509 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
036b17ea 4510 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
a0fb3c47 4511 [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data },
b411b363
PR
4512};
4513
bde89a9e 4514static void drbdd(struct drbd_connection *connection)
b411b363 4515{
77351055 4516 struct packet_info pi;
02918be2 4517 size_t shs; /* sub header size */
82bc0194 4518 int err;
b411b363 4519
bde89a9e 4520 while (get_t_state(&connection->receiver) == RUNNING) {
deebe195 4521 struct data_cmd *cmd;
b411b363 4522
bde89a9e
AG
4523 drbd_thread_current_set_cpu(&connection->receiver);
4524 if (drbd_recv_header(connection, &pi))
02918be2 4525 goto err_out;
b411b363 4526
deebe195 4527 cmd = &drbd_cmd_handler[pi.cmd];
4a76b161 4528 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
1ec861eb 4529 drbd_err(connection, "Unexpected data packet %s (0x%04x)",
2fcb8f30 4530 cmdname(pi.cmd), pi.cmd);
02918be2 4531 goto err_out;
0b33a916 4532 }
b411b363 4533
e658983a
AG
4534 shs = cmd->pkt_size;
4535 if (pi.size > shs && !cmd->expect_payload) {
1ec861eb 4536 drbd_err(connection, "No payload expected %s l:%d\n",
2fcb8f30 4537 cmdname(pi.cmd), pi.size);
02918be2 4538 goto err_out;
b411b363 4539 }
b411b363 4540
c13f7e1a 4541 if (shs) {
bde89a9e 4542 err = drbd_recv_all_warn(connection, pi.data, shs);
a5c31904 4543 if (err)
c13f7e1a 4544 goto err_out;
e2857216 4545 pi.size -= shs;
c13f7e1a
LE
4546 }
4547
bde89a9e 4548 err = cmd->fn(connection, &pi);
4a76b161 4549 if (err) {
1ec861eb 4550 drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
9f5bdc33 4551 cmdname(pi.cmd), err, pi.size);
02918be2 4552 goto err_out;
b411b363
PR
4553 }
4554 }
82bc0194 4555 return;
b411b363 4556
82bc0194 4557 err_out:
bde89a9e 4558 conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
b411b363
PR
4559}
4560
bde89a9e 4561static void conn_disconnect(struct drbd_connection *connection)
b411b363 4562{
c06ece6b 4563 struct drbd_peer_device *peer_device;
bbeb641c 4564 enum drbd_conns oc;
376694a0 4565 int vnr;
b411b363 4566
bde89a9e 4567 if (connection->cstate == C_STANDALONE)
b411b363 4568 return;
b411b363 4569
545752d5
LE
4570 /* We are about to start the cleanup after connection loss.
4571 * Make sure drbd_make_request knows about that.
4572 * Usually we should be in some network failure state already,
4573 * but just in case we are not, we fix it up here.
4574 */
bde89a9e 4575 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
545752d5 4576
b411b363 4577 /* asender does not clean up anything. it must not interfere, either */
bde89a9e
AG
4578 drbd_thread_stop(&connection->asender);
4579 drbd_free_sock(connection);
360cc740 4580
c141ebda 4581 rcu_read_lock();
c06ece6b
AG
4582 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
4583 struct drbd_device *device = peer_device->device;
b30ab791 4584 kref_get(&device->kref);
c141ebda 4585 rcu_read_unlock();
69a22773 4586 drbd_disconnected(peer_device);
c06ece6b 4587 kref_put(&device->kref, drbd_destroy_device);
c141ebda
PR
4588 rcu_read_lock();
4589 }
4590 rcu_read_unlock();
4591
bde89a9e 4592 if (!list_empty(&connection->current_epoch->list))
1ec861eb 4593 drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
12038a3a 4594 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
bde89a9e
AG
4595 atomic_set(&connection->current_epoch->epoch_size, 0);
4596 connection->send.seen_any_write_yet = false;
12038a3a 4597
1ec861eb 4598 drbd_info(connection, "Connection closed\n");
360cc740 4599
bde89a9e
AG
4600 if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
4601 conn_try_outdate_peer_async(connection);
cb703454 4602
0500813f 4603 spin_lock_irq(&connection->resource->req_lock);
bde89a9e 4604 oc = connection->cstate;
bbeb641c 4605 if (oc >= C_UNCONNECTED)
bde89a9e 4606 _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
bbeb641c 4607
0500813f 4608 spin_unlock_irq(&connection->resource->req_lock);
360cc740 4609
f3dfa40a 4610 if (oc == C_DISCONNECTING)
bde89a9e 4611 conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
360cc740
PR
4612}
4613
69a22773 4614static int drbd_disconnected(struct drbd_peer_device *peer_device)
360cc740 4615{
69a22773 4616 struct drbd_device *device = peer_device->device;
360cc740 4617 unsigned int i;
b411b363 4618
85719573 4619 /* wait for current activity to cease. */
0500813f 4620 spin_lock_irq(&device->resource->req_lock);
b30ab791
AG
4621 _drbd_wait_ee_list_empty(device, &device->active_ee);
4622 _drbd_wait_ee_list_empty(device, &device->sync_ee);
4623 _drbd_wait_ee_list_empty(device, &device->read_ee);
0500813f 4624 spin_unlock_irq(&device->resource->req_lock);
b411b363
PR
4625
4626 /* We do not have data structures that would allow us to
4627 * get the rs_pending_cnt down to 0 again.
4628 * * On C_SYNC_TARGET we do not have any data structures describing
4629 * the pending RSDataRequest's we have sent.
4630 * * On C_SYNC_SOURCE there is no data structure that tracks
4631 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4632 * And no, it is not the sum of the reference counts in the
4633 * resync_LRU. The resync_LRU tracks the whole operation including
4634 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4635 * on the fly. */
b30ab791
AG
4636 drbd_rs_cancel_all(device);
4637 device->rs_total = 0;
4638 device->rs_failed = 0;
4639 atomic_set(&device->rs_pending_cnt, 0);
4640 wake_up(&device->misc_wait);
b411b363 4641
b30ab791
AG
4642 del_timer_sync(&device->resync_timer);
4643 resync_timer_fn((unsigned long)device);
b411b363 4644
b411b363
PR
4645 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4646 * w_make_resync_request etc. which may still be on the worker queue
4647 * to be "canceled" */
b5043c5e 4648 drbd_flush_workqueue(&peer_device->connection->sender_work);
b411b363 4649
b30ab791 4650 drbd_finish_peer_reqs(device);
b411b363 4651
d10b4ea3
PR
4652 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4653 might have issued a work again. The one before drbd_finish_peer_reqs() is
4654 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
b5043c5e 4655 drbd_flush_workqueue(&peer_device->connection->sender_work);
d10b4ea3 4656
08332d73
LE
4657 /* need to do it again, drbd_finish_peer_reqs() may have populated it
4658 * again via drbd_try_clear_on_disk_bm(). */
b30ab791 4659 drbd_rs_cancel_all(device);
b411b363 4660
b30ab791
AG
4661 kfree(device->p_uuid);
4662 device->p_uuid = NULL;
b411b363 4663
b30ab791 4664 if (!drbd_suspended(device))
69a22773 4665 tl_clear(peer_device->connection);
b411b363 4666
b30ab791 4667 drbd_md_sync(device);
b411b363 4668
20ceb2b2
LE
4669 /* serialize with bitmap writeout triggered by the state change,
4670 * if any. */
b30ab791 4671 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
20ceb2b2 4672
b411b363
PR
4673 /* tcp_close and release of sendpage pages can be deferred. I don't
4674 * want to use SO_LINGER, because apparently it can be deferred for
4675 * more than 20 seconds (longest time I checked).
4676 *
4677 * Actually we don't care for exactly when the network stack does its
4678 * put_page(), but release our reference on these pages right here.
4679 */
b30ab791 4680 i = drbd_free_peer_reqs(device, &device->net_ee);
b411b363 4681 if (i)
d0180171 4682 drbd_info(device, "net_ee not empty, killed %u entries\n", i);
b30ab791 4683 i = atomic_read(&device->pp_in_use_by_net);
435f0740 4684 if (i)
d0180171 4685 drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
b30ab791 4686 i = atomic_read(&device->pp_in_use);
b411b363 4687 if (i)
d0180171 4688 drbd_info(device, "pp_in_use = %d, expected 0\n", i);
b411b363 4689
0b0ba1ef
AG
4690 D_ASSERT(device, list_empty(&device->read_ee));
4691 D_ASSERT(device, list_empty(&device->active_ee));
4692 D_ASSERT(device, list_empty(&device->sync_ee));
4693 D_ASSERT(device, list_empty(&device->done_ee));
b411b363 4694
360cc740 4695 return 0;
b411b363
PR
4696}
4697
4698/*
4699 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4700 * we can agree on is stored in agreed_pro_version.
4701 *
4702 * feature flags and the reserved array should be enough room for future
4703 * enhancements of the handshake protocol, and possible plugins...
4704 *
4705 * for now, they are expected to be zero, but ignored.
4706 */
bde89a9e 4707static int drbd_send_features(struct drbd_connection *connection)
b411b363 4708{
9f5bdc33
AG
4709 struct drbd_socket *sock;
4710 struct p_connection_features *p;
b411b363 4711
bde89a9e
AG
4712 sock = &connection->data;
4713 p = conn_prepare_command(connection, sock);
9f5bdc33 4714 if (!p)
e8d17b01 4715 return -EIO;
b411b363
PR
4716 memset(p, 0, sizeof(*p));
4717 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4718 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
20c68fde 4719 p->feature_flags = cpu_to_be32(PRO_FEATURES);
bde89a9e 4720 return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
b411b363
PR
4721}
4722
4723/*
4724 * return values:
4725 * 1 yes, we have a valid connection
4726 * 0 oops, did not work out, please try again
4727 * -1 peer talks different language,
4728 * no point in trying again, please go standalone.
4729 */
bde89a9e 4730static int drbd_do_features(struct drbd_connection *connection)
b411b363 4731{
bde89a9e 4732 /* ASSERT current == connection->receiver ... */
e658983a
AG
4733 struct p_connection_features *p;
4734 const int expect = sizeof(struct p_connection_features);
77351055 4735 struct packet_info pi;
a5c31904 4736 int err;
b411b363 4737
bde89a9e 4738 err = drbd_send_features(connection);
e8d17b01 4739 if (err)
b411b363
PR
4740 return 0;
4741
bde89a9e 4742 err = drbd_recv_header(connection, &pi);
69bc7bc3 4743 if (err)
b411b363
PR
4744 return 0;
4745
6038178e 4746 if (pi.cmd != P_CONNECTION_FEATURES) {
1ec861eb 4747 drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
2fcb8f30 4748 cmdname(pi.cmd), pi.cmd);
b411b363
PR
4749 return -1;
4750 }
4751
77351055 4752 if (pi.size != expect) {
1ec861eb 4753 drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
77351055 4754 expect, pi.size);
b411b363
PR
4755 return -1;
4756 }
4757
e658983a 4758 p = pi.data;
bde89a9e 4759 err = drbd_recv_all_warn(connection, p, expect);
a5c31904 4760 if (err)
b411b363 4761 return 0;
b411b363 4762
b411b363
PR
4763 p->protocol_min = be32_to_cpu(p->protocol_min);
4764 p->protocol_max = be32_to_cpu(p->protocol_max);
4765 if (p->protocol_max == 0)
4766 p->protocol_max = p->protocol_min;
4767
4768 if (PRO_VERSION_MAX < p->protocol_min ||
4769 PRO_VERSION_MIN > p->protocol_max)
4770 goto incompat;
4771
bde89a9e 4772 connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
20c68fde 4773 connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
b411b363 4774
1ec861eb 4775 drbd_info(connection, "Handshake successful: "
bde89a9e 4776 "Agreed network protocol version %d\n", connection->agreed_pro_version);
b411b363 4777
20c68fde
LE
4778 drbd_info(connection, "Agreed to%ssupport TRIM on protocol level\n",
4779 connection->agreed_features & FF_TRIM ? " " : " not ");
4780
b411b363
PR
4781 return 1;
4782
4783 incompat:
1ec861eb 4784 drbd_err(connection, "incompatible DRBD dialects: "
b411b363
PR
4785 "I support %d-%d, peer supports %d-%d\n",
4786 PRO_VERSION_MIN, PRO_VERSION_MAX,
4787 p->protocol_min, p->protocol_max);
4788 return -1;
4789}
4790
4791#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
bde89a9e 4792static int drbd_do_auth(struct drbd_connection *connection)
b411b363 4793{
1ec861eb
AG
4794 drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4795 drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
b10d96cb 4796 return -1;
b411b363
PR
4797}
4798#else
4799#define CHALLENGE_LEN 64
b10d96cb
JT
4800
4801/* Return value:
4802 1 - auth succeeded,
4803 0 - failed, try again (network error),
4804 -1 - auth failed, don't try again.
4805*/
4806
bde89a9e 4807static int drbd_do_auth(struct drbd_connection *connection)
b411b363 4808{
9f5bdc33 4809 struct drbd_socket *sock;
b411b363
PR
4810 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4811 struct scatterlist sg;
4812 char *response = NULL;
4813 char *right_response = NULL;
4814 char *peers_ch = NULL;
44ed167d
PR
4815 unsigned int key_len;
4816 char secret[SHARED_SECRET_MAX]; /* 64 byte */
b411b363
PR
4817 unsigned int resp_size;
4818 struct hash_desc desc;
77351055 4819 struct packet_info pi;
44ed167d 4820 struct net_conf *nc;
69bc7bc3 4821 int err, rv;
b411b363 4822
9f5bdc33 4823 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
b411b363 4824
44ed167d 4825 rcu_read_lock();
bde89a9e 4826 nc = rcu_dereference(connection->net_conf);
44ed167d
PR
4827 key_len = strlen(nc->shared_secret);
4828 memcpy(secret, nc->shared_secret, key_len);
4829 rcu_read_unlock();
4830
bde89a9e 4831 desc.tfm = connection->cram_hmac_tfm;
b411b363
PR
4832 desc.flags = 0;
4833
bde89a9e 4834 rv = crypto_hash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
b411b363 4835 if (rv) {
1ec861eb 4836 drbd_err(connection, "crypto_hash_setkey() failed with %d\n", rv);
b10d96cb 4837 rv = -1;
b411b363
PR
4838 goto fail;
4839 }
4840
4841 get_random_bytes(my_challenge, CHALLENGE_LEN);
4842
bde89a9e
AG
4843 sock = &connection->data;
4844 if (!conn_prepare_command(connection, sock)) {
9f5bdc33
AG
4845 rv = 0;
4846 goto fail;
4847 }
bde89a9e 4848 rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
9f5bdc33 4849 my_challenge, CHALLENGE_LEN);
b411b363
PR
4850 if (!rv)
4851 goto fail;
4852
bde89a9e 4853 err = drbd_recv_header(connection, &pi);
69bc7bc3
AG
4854 if (err) {
4855 rv = 0;
b411b363 4856 goto fail;
69bc7bc3 4857 }
b411b363 4858
77351055 4859 if (pi.cmd != P_AUTH_CHALLENGE) {
1ec861eb 4860 drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
2fcb8f30 4861 cmdname(pi.cmd), pi.cmd);
b411b363
PR
4862 rv = 0;
4863 goto fail;
4864 }
4865
77351055 4866 if (pi.size > CHALLENGE_LEN * 2) {
1ec861eb 4867 drbd_err(connection, "expected AuthChallenge payload too big.\n");
b10d96cb 4868 rv = -1;
b411b363
PR
4869 goto fail;
4870 }
4871
67cca286
PR
4872 if (pi.size < CHALLENGE_LEN) {
4873 drbd_err(connection, "AuthChallenge payload too small.\n");
4874 rv = -1;
4875 goto fail;
4876 }
4877
77351055 4878 peers_ch = kmalloc(pi.size, GFP_NOIO);
b411b363 4879 if (peers_ch == NULL) {
1ec861eb 4880 drbd_err(connection, "kmalloc of peers_ch failed\n");
b10d96cb 4881 rv = -1;
b411b363
PR
4882 goto fail;
4883 }
4884
bde89a9e 4885 err = drbd_recv_all_warn(connection, peers_ch, pi.size);
a5c31904 4886 if (err) {
b411b363
PR
4887 rv = 0;
4888 goto fail;
4889 }
4890
67cca286
PR
4891 if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
4892 drbd_err(connection, "Peer presented the same challenge!\n");
4893 rv = -1;
4894 goto fail;
4895 }
4896
bde89a9e 4897 resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm);
b411b363
PR
4898 response = kmalloc(resp_size, GFP_NOIO);
4899 if (response == NULL) {
1ec861eb 4900 drbd_err(connection, "kmalloc of response failed\n");
b10d96cb 4901 rv = -1;
b411b363
PR
4902 goto fail;
4903 }
4904
4905 sg_init_table(&sg, 1);
77351055 4906 sg_set_buf(&sg, peers_ch, pi.size);
b411b363
PR
4907
4908 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4909 if (rv) {
1ec861eb 4910 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
b10d96cb 4911 rv = -1;
b411b363
PR
4912 goto fail;
4913 }
4914
bde89a9e 4915 if (!conn_prepare_command(connection, sock)) {
9f5bdc33 4916 rv = 0;
b411b363 4917 goto fail;
9f5bdc33 4918 }
bde89a9e 4919 rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
9f5bdc33 4920 response, resp_size);
b411b363
PR
4921 if (!rv)
4922 goto fail;
4923
bde89a9e 4924 err = drbd_recv_header(connection, &pi);
69bc7bc3 4925 if (err) {
b411b363
PR
4926 rv = 0;
4927 goto fail;
4928 }
4929
77351055 4930 if (pi.cmd != P_AUTH_RESPONSE) {
1ec861eb 4931 drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
2fcb8f30 4932 cmdname(pi.cmd), pi.cmd);
b411b363
PR
4933 rv = 0;
4934 goto fail;
4935 }
4936
77351055 4937 if (pi.size != resp_size) {
1ec861eb 4938 drbd_err(connection, "expected AuthResponse payload of wrong size\n");
b411b363
PR
4939 rv = 0;
4940 goto fail;
4941 }
b411b363 4942
bde89a9e 4943 err = drbd_recv_all_warn(connection, response , resp_size);
a5c31904 4944 if (err) {
b411b363
PR
4945 rv = 0;
4946 goto fail;
4947 }
4948
4949 right_response = kmalloc(resp_size, GFP_NOIO);
2d1ee87d 4950 if (right_response == NULL) {
1ec861eb 4951 drbd_err(connection, "kmalloc of right_response failed\n");
b10d96cb 4952 rv = -1;
b411b363
PR
4953 goto fail;
4954 }
4955
4956 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4957
4958 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4959 if (rv) {
1ec861eb 4960 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
b10d96cb 4961 rv = -1;
b411b363
PR
4962 goto fail;
4963 }
4964
4965 rv = !memcmp(response, right_response, resp_size);
4966
4967 if (rv)
1ec861eb 4968 drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
44ed167d 4969 resp_size);
b10d96cb
JT
4970 else
4971 rv = -1;
b411b363
PR
4972
4973 fail:
4974 kfree(peers_ch);
4975 kfree(response);
4976 kfree(right_response);
4977
4978 return rv;
4979}
4980#endif
4981
8fe60551 4982int drbd_receiver(struct drbd_thread *thi)
b411b363 4983{
bde89a9e 4984 struct drbd_connection *connection = thi->connection;
b411b363
PR
4985 int h;
4986
1ec861eb 4987 drbd_info(connection, "receiver (re)started\n");
b411b363
PR
4988
4989 do {
bde89a9e 4990 h = conn_connect(connection);
b411b363 4991 if (h == 0) {
bde89a9e 4992 conn_disconnect(connection);
20ee6390 4993 schedule_timeout_interruptible(HZ);
b411b363
PR
4994 }
4995 if (h == -1) {
1ec861eb 4996 drbd_warn(connection, "Discarding network configuration.\n");
bde89a9e 4997 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363
PR
4998 }
4999 } while (h == 0);
5000
91fd4dad 5001 if (h > 0)
bde89a9e 5002 drbdd(connection);
b411b363 5003
bde89a9e 5004 conn_disconnect(connection);
b411b363 5005
1ec861eb 5006 drbd_info(connection, "receiver terminated\n");
b411b363
PR
5007 return 0;
5008}
5009
5010/* ********* acknowledge sender ******** */
5011
bde89a9e 5012static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5013{
e658983a 5014 struct p_req_state_reply *p = pi->data;
e4f78ede
PR
5015 int retcode = be32_to_cpu(p->retcode);
5016
5017 if (retcode >= SS_SUCCESS) {
bde89a9e 5018 set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
e4f78ede 5019 } else {
bde89a9e 5020 set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
1ec861eb 5021 drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
e4f78ede
PR
5022 drbd_set_st_err_str(retcode), retcode);
5023 }
bde89a9e 5024 wake_up(&connection->ping_wait);
e4f78ede 5025
2735a594 5026 return 0;
e4f78ede 5027}
b411b363 5028
bde89a9e 5029static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5030{
9f4fe9ad 5031 struct drbd_peer_device *peer_device;
b30ab791 5032 struct drbd_device *device;
e658983a 5033 struct p_req_state_reply *p = pi->data;
b411b363
PR
5034 int retcode = be32_to_cpu(p->retcode);
5035
9f4fe9ad
AG
5036 peer_device = conn_peer_device(connection, pi->vnr);
5037 if (!peer_device)
2735a594 5038 return -EIO;
9f4fe9ad 5039 device = peer_device->device;
1952e916 5040
bde89a9e 5041 if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
0b0ba1ef 5042 D_ASSERT(device, connection->agreed_pro_version < 100);
bde89a9e 5043 return got_conn_RqSReply(connection, pi);
4d0fc3fd
PR
5044 }
5045
b411b363 5046 if (retcode >= SS_SUCCESS) {
b30ab791 5047 set_bit(CL_ST_CHG_SUCCESS, &device->flags);
b411b363 5048 } else {
b30ab791 5049 set_bit(CL_ST_CHG_FAIL, &device->flags);
d0180171 5050 drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
e4f78ede 5051 drbd_set_st_err_str(retcode), retcode);
b411b363 5052 }
b30ab791 5053 wake_up(&device->state_wait);
b411b363 5054
2735a594 5055 return 0;
b411b363
PR
5056}
5057
bde89a9e 5058static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5059{
bde89a9e 5060 return drbd_send_ping_ack(connection);
b411b363
PR
5061
5062}
5063
bde89a9e 5064static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
b411b363
PR
5065{
5066 /* restore idle timeout */
bde89a9e
AG
5067 connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
5068 if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
5069 wake_up(&connection->ping_wait);
b411b363 5070
2735a594 5071 return 0;
b411b363
PR
5072}
5073
bde89a9e 5074static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5075{
9f4fe9ad 5076 struct drbd_peer_device *peer_device;
b30ab791 5077 struct drbd_device *device;
e658983a 5078 struct p_block_ack *p = pi->data;
b411b363
PR
5079 sector_t sector = be64_to_cpu(p->sector);
5080 int blksize = be32_to_cpu(p->blksize);
5081
9f4fe9ad
AG
5082 peer_device = conn_peer_device(connection, pi->vnr);
5083 if (!peer_device)
2735a594 5084 return -EIO;
9f4fe9ad 5085 device = peer_device->device;
1952e916 5086
9f4fe9ad 5087 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
b411b363 5088
69a22773 5089 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
b411b363 5090
b30ab791
AG
5091 if (get_ldev(device)) {
5092 drbd_rs_complete_io(device, sector);
5093 drbd_set_in_sync(device, sector, blksize);
1d53f09e 5094 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
b30ab791
AG
5095 device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5096 put_ldev(device);
1d53f09e 5097 }
b30ab791
AG
5098 dec_rs_pending(device);
5099 atomic_add(blksize >> 9, &device->rs_sect_in);
b411b363 5100
2735a594 5101 return 0;
b411b363
PR
5102}
5103
bc9c5c41 5104static int
b30ab791 5105validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
bc9c5c41
AG
5106 struct rb_root *root, const char *func,
5107 enum drbd_req_event what, bool missing_ok)
b411b363
PR
5108{
5109 struct drbd_request *req;
5110 struct bio_and_error m;
5111
0500813f 5112 spin_lock_irq(&device->resource->req_lock);
b30ab791 5113 req = find_request(device, root, id, sector, missing_ok, func);
b411b363 5114 if (unlikely(!req)) {
0500813f 5115 spin_unlock_irq(&device->resource->req_lock);
85997675 5116 return -EIO;
b411b363
PR
5117 }
5118 __req_mod(req, what, &m);
0500813f 5119 spin_unlock_irq(&device->resource->req_lock);
b411b363
PR
5120
5121 if (m.bio)
b30ab791 5122 complete_master_bio(device, &m);
85997675 5123 return 0;
b411b363
PR
5124}
5125
bde89a9e 5126static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5127{
9f4fe9ad 5128 struct drbd_peer_device *peer_device;
b30ab791 5129 struct drbd_device *device;
e658983a 5130 struct p_block_ack *p = pi->data;
b411b363
PR
5131 sector_t sector = be64_to_cpu(p->sector);
5132 int blksize = be32_to_cpu(p->blksize);
5133 enum drbd_req_event what;
5134
9f4fe9ad
AG
5135 peer_device = conn_peer_device(connection, pi->vnr);
5136 if (!peer_device)
2735a594 5137 return -EIO;
9f4fe9ad 5138 device = peer_device->device;
1952e916 5139
69a22773 5140 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
b411b363 5141
579b57ed 5142 if (p->block_id == ID_SYNCER) {
b30ab791
AG
5143 drbd_set_in_sync(device, sector, blksize);
5144 dec_rs_pending(device);
2735a594 5145 return 0;
b411b363 5146 }
e05e1e59 5147 switch (pi->cmd) {
b411b363 5148 case P_RS_WRITE_ACK:
8554df1c 5149 what = WRITE_ACKED_BY_PEER_AND_SIS;
b411b363
PR
5150 break;
5151 case P_WRITE_ACK:
8554df1c 5152 what = WRITE_ACKED_BY_PEER;
b411b363
PR
5153 break;
5154 case P_RECV_ACK:
8554df1c 5155 what = RECV_ACKED_BY_PEER;
b411b363 5156 break;
d4dabbe2
LE
5157 case P_SUPERSEDED:
5158 what = CONFLICT_RESOLVED;
b411b363 5159 break;
7be8da07 5160 case P_RETRY_WRITE:
7be8da07 5161 what = POSTPONE_WRITE;
b411b363
PR
5162 break;
5163 default:
2735a594 5164 BUG();
b411b363
PR
5165 }
5166
b30ab791
AG
5167 return validate_req_change_req_state(device, p->block_id, sector,
5168 &device->write_requests, __func__,
2735a594 5169 what, false);
b411b363
PR
5170}
5171
bde89a9e 5172static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5173{
9f4fe9ad 5174 struct drbd_peer_device *peer_device;
b30ab791 5175 struct drbd_device *device;
e658983a 5176 struct p_block_ack *p = pi->data;
b411b363 5177 sector_t sector = be64_to_cpu(p->sector);
2deb8336 5178 int size = be32_to_cpu(p->blksize);
85997675 5179 int err;
b411b363 5180
9f4fe9ad
AG
5181 peer_device = conn_peer_device(connection, pi->vnr);
5182 if (!peer_device)
2735a594 5183 return -EIO;
9f4fe9ad 5184 device = peer_device->device;
b411b363 5185
69a22773 5186 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
b411b363 5187
579b57ed 5188 if (p->block_id == ID_SYNCER) {
b30ab791
AG
5189 dec_rs_pending(device);
5190 drbd_rs_failed_io(device, sector, size);
2735a594 5191 return 0;
b411b363 5192 }
2deb8336 5193
b30ab791
AG
5194 err = validate_req_change_req_state(device, p->block_id, sector,
5195 &device->write_requests, __func__,
303d1448 5196 NEG_ACKED, true);
85997675 5197 if (err) {
c3afd8f5
AG
5198 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
5199 The master bio might already be completed, therefore the
5200 request is no longer in the collision hash. */
5201 /* In Protocol B we might already have got a P_RECV_ACK
5202 but then get a P_NEG_ACK afterwards. */
b30ab791 5203 drbd_set_out_of_sync(device, sector, size);
2deb8336 5204 }
2735a594 5205 return 0;
b411b363
PR
5206}
5207
bde89a9e 5208static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5209{
9f4fe9ad 5210 struct drbd_peer_device *peer_device;
b30ab791 5211 struct drbd_device *device;
e658983a 5212 struct p_block_ack *p = pi->data;
b411b363
PR
5213 sector_t sector = be64_to_cpu(p->sector);
5214
9f4fe9ad
AG
5215 peer_device = conn_peer_device(connection, pi->vnr);
5216 if (!peer_device)
2735a594 5217 return -EIO;
9f4fe9ad 5218 device = peer_device->device;
1952e916 5219
69a22773 5220 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
7be8da07 5221
d0180171 5222 drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
b411b363
PR
5223 (unsigned long long)sector, be32_to_cpu(p->blksize));
5224
b30ab791
AG
5225 return validate_req_change_req_state(device, p->block_id, sector,
5226 &device->read_requests, __func__,
2735a594 5227 NEG_ACKED, false);
b411b363
PR
5228}
5229
bde89a9e 5230static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5231{
9f4fe9ad 5232 struct drbd_peer_device *peer_device;
b30ab791 5233 struct drbd_device *device;
b411b363
PR
5234 sector_t sector;
5235 int size;
e658983a 5236 struct p_block_ack *p = pi->data;
1952e916 5237
9f4fe9ad
AG
5238 peer_device = conn_peer_device(connection, pi->vnr);
5239 if (!peer_device)
2735a594 5240 return -EIO;
9f4fe9ad 5241 device = peer_device->device;
b411b363
PR
5242
5243 sector = be64_to_cpu(p->sector);
5244 size = be32_to_cpu(p->blksize);
b411b363 5245
69a22773 5246 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
b411b363 5247
b30ab791 5248 dec_rs_pending(device);
b411b363 5249
b30ab791
AG
5250 if (get_ldev_if_state(device, D_FAILED)) {
5251 drbd_rs_complete_io(device, sector);
e05e1e59 5252 switch (pi->cmd) {
d612d309 5253 case P_NEG_RS_DREPLY:
b30ab791 5254 drbd_rs_failed_io(device, sector, size);
d612d309
PR
5255 case P_RS_CANCEL:
5256 break;
5257 default:
2735a594 5258 BUG();
d612d309 5259 }
b30ab791 5260 put_ldev(device);
b411b363
PR
5261 }
5262
2735a594 5263 return 0;
b411b363
PR
5264}
5265
bde89a9e 5266static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5267{
e658983a 5268 struct p_barrier_ack *p = pi->data;
c06ece6b 5269 struct drbd_peer_device *peer_device;
9ed57dcb 5270 int vnr;
1952e916 5271
bde89a9e 5272 tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
b411b363 5273
9ed57dcb 5274 rcu_read_lock();
c06ece6b
AG
5275 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5276 struct drbd_device *device = peer_device->device;
5277
b30ab791
AG
5278 if (device->state.conn == C_AHEAD &&
5279 atomic_read(&device->ap_in_flight) == 0 &&
5280 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5281 device->start_resync_timer.expires = jiffies + HZ;
5282 add_timer(&device->start_resync_timer);
9ed57dcb 5283 }
c4752ef1 5284 }
9ed57dcb 5285 rcu_read_unlock();
c4752ef1 5286
2735a594 5287 return 0;
b411b363
PR
5288}
5289
bde89a9e 5290static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5291{
9f4fe9ad 5292 struct drbd_peer_device *peer_device;
b30ab791 5293 struct drbd_device *device;
e658983a 5294 struct p_block_ack *p = pi->data;
84b8c06b 5295 struct drbd_device_work *dw;
b411b363
PR
5296 sector_t sector;
5297 int size;
5298
9f4fe9ad
AG
5299 peer_device = conn_peer_device(connection, pi->vnr);
5300 if (!peer_device)
2735a594 5301 return -EIO;
9f4fe9ad 5302 device = peer_device->device;
1952e916 5303
b411b363
PR
5304 sector = be64_to_cpu(p->sector);
5305 size = be32_to_cpu(p->blksize);
5306
69a22773 5307 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
b411b363
PR
5308
5309 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
b30ab791 5310 drbd_ov_out_of_sync_found(device, sector, size);
b411b363 5311 else
b30ab791 5312 ov_out_of_sync_print(device);
b411b363 5313
b30ab791 5314 if (!get_ldev(device))
2735a594 5315 return 0;
1d53f09e 5316
b30ab791
AG
5317 drbd_rs_complete_io(device, sector);
5318 dec_rs_pending(device);
b411b363 5319
b30ab791 5320 --device->ov_left;
ea5442af
LE
5321
5322 /* let's advance progress step marks only for every other megabyte */
b30ab791
AG
5323 if ((device->ov_left & 0x200) == 0x200)
5324 drbd_advance_rs_marks(device, device->ov_left);
ea5442af 5325
b30ab791 5326 if (device->ov_left == 0) {
84b8c06b
AG
5327 dw = kmalloc(sizeof(*dw), GFP_NOIO);
5328 if (dw) {
5329 dw->w.cb = w_ov_finished;
5330 dw->device = device;
5331 drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
b411b363 5332 } else {
84b8c06b 5333 drbd_err(device, "kmalloc(dw) failed.");
b30ab791
AG
5334 ov_out_of_sync_print(device);
5335 drbd_resync_finished(device);
b411b363
PR
5336 }
5337 }
b30ab791 5338 put_ldev(device);
2735a594 5339 return 0;
b411b363
PR
5340}
5341
bde89a9e 5342static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
0ced55a3 5343{
2735a594 5344 return 0;
b411b363
PR
5345}
5346
bde89a9e 5347static int connection_finish_peer_reqs(struct drbd_connection *connection)
0ced55a3 5348{
c06ece6b 5349 struct drbd_peer_device *peer_device;
c141ebda 5350 int vnr, not_empty = 0;
32862ec7
PR
5351
5352 do {
bde89a9e 5353 clear_bit(SIGNAL_ASENDER, &connection->flags);
32862ec7 5354 flush_signals(current);
c141ebda
PR
5355
5356 rcu_read_lock();
c06ece6b
AG
5357 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5358 struct drbd_device *device = peer_device->device;
b30ab791 5359 kref_get(&device->kref);
c141ebda 5360 rcu_read_unlock();
b30ab791 5361 if (drbd_finish_peer_reqs(device)) {
05a10ec7 5362 kref_put(&device->kref, drbd_destroy_device);
c141ebda 5363 return 1;
d3fcb490 5364 }
05a10ec7 5365 kref_put(&device->kref, drbd_destroy_device);
c141ebda 5366 rcu_read_lock();
082a3439 5367 }
bde89a9e 5368 set_bit(SIGNAL_ASENDER, &connection->flags);
082a3439 5369
0500813f 5370 spin_lock_irq(&connection->resource->req_lock);
c06ece6b
AG
5371 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5372 struct drbd_device *device = peer_device->device;
b30ab791 5373 not_empty = !list_empty(&device->done_ee);
082a3439
PR
5374 if (not_empty)
5375 break;
5376 }
0500813f 5377 spin_unlock_irq(&connection->resource->req_lock);
c141ebda 5378 rcu_read_unlock();
32862ec7
PR
5379 } while (not_empty);
5380
5381 return 0;
0ced55a3
PR
5382}
5383
b411b363
PR
5384struct asender_cmd {
5385 size_t pkt_size;
bde89a9e 5386 int (*fn)(struct drbd_connection *connection, struct packet_info *);
b411b363
PR
5387};
5388
7201b972 5389static struct asender_cmd asender_tbl[] = {
e658983a
AG
5390 [P_PING] = { 0, got_Ping },
5391 [P_PING_ACK] = { 0, got_PingAck },
b411b363
PR
5392 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5393 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5394 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
d4dabbe2 5395 [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck },
b411b363
PR
5396 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5397 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
1952e916 5398 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
b411b363
PR
5399 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5400 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5401 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5402 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
02918be2 5403 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
1952e916
AG
5404 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5405 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5406 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
7201b972 5407};
b411b363
PR
5408
5409int drbd_asender(struct drbd_thread *thi)
5410{
bde89a9e 5411 struct drbd_connection *connection = thi->connection;
b411b363 5412 struct asender_cmd *cmd = NULL;
77351055 5413 struct packet_info pi;
257d0af6 5414 int rv;
bde89a9e 5415 void *buf = connection->meta.rbuf;
b411b363 5416 int received = 0;
bde89a9e 5417 unsigned int header_size = drbd_header_size(connection);
52b061a4 5418 int expect = header_size;
44ed167d
PR
5419 bool ping_timeout_active = false;
5420 struct net_conf *nc;
bb77d34e 5421 int ping_timeo, tcp_cork, ping_int;
3990e04d 5422 struct sched_param param = { .sched_priority = 2 };
b411b363 5423
3990e04d
PR
5424 rv = sched_setscheduler(current, SCHED_RR, &param);
5425 if (rv < 0)
1ec861eb 5426 drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv);
b411b363 5427
e77a0a5c 5428 while (get_t_state(thi) == RUNNING) {
80822284 5429 drbd_thread_current_set_cpu(thi);
b411b363 5430
44ed167d 5431 rcu_read_lock();
bde89a9e 5432 nc = rcu_dereference(connection->net_conf);
44ed167d 5433 ping_timeo = nc->ping_timeo;
bb77d34e 5434 tcp_cork = nc->tcp_cork;
44ed167d
PR
5435 ping_int = nc->ping_int;
5436 rcu_read_unlock();
5437
bde89a9e
AG
5438 if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5439 if (drbd_send_ping(connection)) {
1ec861eb 5440 drbd_err(connection, "drbd_send_ping has failed\n");
b411b363 5441 goto reconnect;
841ce241 5442 }
bde89a9e 5443 connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
44ed167d 5444 ping_timeout_active = true;
b411b363
PR
5445 }
5446
32862ec7
PR
5447 /* TODO: conditionally cork; it may hurt latency if we cork without
5448 much to send */
bb77d34e 5449 if (tcp_cork)
bde89a9e
AG
5450 drbd_tcp_cork(connection->meta.socket);
5451 if (connection_finish_peer_reqs(connection)) {
1ec861eb 5452 drbd_err(connection, "connection_finish_peer_reqs() failed\n");
32862ec7 5453 goto reconnect;
b411b363
PR
5454 }
5455 /* but unconditionally uncork unless disabled */
bb77d34e 5456 if (tcp_cork)
bde89a9e 5457 drbd_tcp_uncork(connection->meta.socket);
b411b363
PR
5458
5459 /* short circuit, recv_msg would return EINTR anyways. */
5460 if (signal_pending(current))
5461 continue;
5462
bde89a9e
AG
5463 rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
5464 clear_bit(SIGNAL_ASENDER, &connection->flags);
b411b363
PR
5465
5466 flush_signals(current);
5467
5468 /* Note:
5469 * -EINTR (on meta) we got a signal
5470 * -EAGAIN (on meta) rcvtimeo expired
5471 * -ECONNRESET other side closed the connection
5472 * -ERESTARTSYS (on data) we got a signal
5473 * rv < 0 other than above: unexpected error!
5474 * rv == expected: full header or command
5475 * rv < expected: "woken" by signal during receive
5476 * rv == 0 : "connection shut down by peer"
5477 */
5478 if (likely(rv > 0)) {
5479 received += rv;
5480 buf += rv;
5481 } else if (rv == 0) {
bde89a9e 5482 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
b66623e3
PR
5483 long t;
5484 rcu_read_lock();
bde89a9e 5485 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
b66623e3
PR
5486 rcu_read_unlock();
5487
bde89a9e
AG
5488 t = wait_event_timeout(connection->ping_wait,
5489 connection->cstate < C_WF_REPORT_PARAMS,
b66623e3 5490 t);
599377ac
PR
5491 if (t)
5492 break;
5493 }
1ec861eb 5494 drbd_err(connection, "meta connection shut down by peer.\n");
b411b363
PR
5495 goto reconnect;
5496 } else if (rv == -EAGAIN) {
cb6518cb
LE
5497 /* If the data socket received something meanwhile,
5498 * that is good enough: peer is still alive. */
bde89a9e
AG
5499 if (time_after(connection->last_received,
5500 jiffies - connection->meta.socket->sk->sk_rcvtimeo))
cb6518cb 5501 continue;
f36af18c 5502 if (ping_timeout_active) {
1ec861eb 5503 drbd_err(connection, "PingAck did not arrive in time.\n");
b411b363
PR
5504 goto reconnect;
5505 }
bde89a9e 5506 set_bit(SEND_PING, &connection->flags);
b411b363
PR
5507 continue;
5508 } else if (rv == -EINTR) {
5509 continue;
5510 } else {
1ec861eb 5511 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
b411b363
PR
5512 goto reconnect;
5513 }
5514
5515 if (received == expect && cmd == NULL) {
bde89a9e 5516 if (decode_header(connection, connection->meta.rbuf, &pi))
b411b363 5517 goto reconnect;
7201b972 5518 cmd = &asender_tbl[pi.cmd];
1952e916 5519 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
1ec861eb 5520 drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
2fcb8f30 5521 cmdname(pi.cmd), pi.cmd);
b411b363
PR
5522 goto disconnect;
5523 }
e658983a 5524 expect = header_size + cmd->pkt_size;
52b061a4 5525 if (pi.size != expect - header_size) {
1ec861eb 5526 drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
77351055 5527 pi.cmd, pi.size);
b411b363 5528 goto reconnect;
257d0af6 5529 }
b411b363
PR
5530 }
5531 if (received == expect) {
2735a594 5532 bool err;
a4fbda8e 5533
bde89a9e 5534 err = cmd->fn(connection, &pi);
2735a594 5535 if (err) {
1ec861eb 5536 drbd_err(connection, "%pf failed\n", cmd->fn);
b411b363 5537 goto reconnect;
1952e916 5538 }
b411b363 5539
bde89a9e 5540 connection->last_received = jiffies;
f36af18c 5541
44ed167d
PR
5542 if (cmd == &asender_tbl[P_PING_ACK]) {
5543 /* restore idle timeout */
bde89a9e 5544 connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
44ed167d
PR
5545 ping_timeout_active = false;
5546 }
f36af18c 5547
bde89a9e 5548 buf = connection->meta.rbuf;
b411b363 5549 received = 0;
52b061a4 5550 expect = header_size;
b411b363
PR
5551 cmd = NULL;
5552 }
5553 }
5554
5555 if (0) {
5556reconnect:
bde89a9e
AG
5557 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5558 conn_md_sync(connection);
b411b363
PR
5559 }
5560 if (0) {
5561disconnect:
bde89a9e 5562 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363 5563 }
bde89a9e 5564 clear_bit(SIGNAL_ASENDER, &connection->flags);
b411b363 5565
1ec861eb 5566 drbd_info(connection, "asender terminated\n");
b411b363
PR
5567
5568 return 0;
5569}
This page took 1.399126 seconds and 5 git commands to generate.