drbd: debugfs: deal with destructor racing with open of debugfs file
[deliverable/linux.git] / drivers / block / drbd / drbd_receiver.c
CommitLineData
b411b363
PR
1/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
b411b363
PR
26#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
b411b363
PR
31#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
b411b363
PR
39#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
b411b363
PR
44#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
a3603a6e 47#include "drbd_protocol.h"
b411b363 48#include "drbd_req.h"
b411b363
PR
49#include "drbd_vli.h"
50
20c68fde
LE
51#define PRO_FEATURES (FF_TRIM)
52
77351055
PR
53struct packet_info {
54 enum drbd_packet cmd;
e2857216
AG
55 unsigned int size;
56 unsigned int vnr;
e658983a 57 void *data;
77351055
PR
58};
59
b411b363
PR
60enum finish_epoch {
61 FE_STILL_LIVE,
62 FE_DESTROYED,
63 FE_RECYCLED,
64};
65
bde89a9e
AG
66static int drbd_do_features(struct drbd_connection *connection);
67static int drbd_do_auth(struct drbd_connection *connection);
69a22773 68static int drbd_disconnected(struct drbd_peer_device *);
a0fb3c47 69static void conn_wait_active_ee_empty(struct drbd_connection *connection);
bde89a9e 70static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
99920dc5 71static int e_end_block(struct drbd_work *, int);
b411b363 72
b411b363
PR
73
74#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
75
45bb912b
LE
76/*
77 * some helper functions to deal with single linked page lists,
78 * page->private being our "next" pointer.
79 */
80
81/* If at least n pages are linked at head, get n pages off.
82 * Otherwise, don't modify head, and return NULL.
83 * Locking is the responsibility of the caller.
84 */
85static struct page *page_chain_del(struct page **head, int n)
86{
87 struct page *page;
88 struct page *tmp;
89
90 BUG_ON(!n);
91 BUG_ON(!head);
92
93 page = *head;
23ce4227
PR
94
95 if (!page)
96 return NULL;
97
45bb912b
LE
98 while (page) {
99 tmp = page_chain_next(page);
100 if (--n == 0)
101 break; /* found sufficient pages */
102 if (tmp == NULL)
103 /* insufficient pages, don't use any of them. */
104 return NULL;
105 page = tmp;
106 }
107
108 /* add end of list marker for the returned list */
109 set_page_private(page, 0);
110 /* actual return value, and adjustment of head */
111 page = *head;
112 *head = tmp;
113 return page;
114}
115
116/* may be used outside of locks to find the tail of a (usually short)
117 * "private" page chain, before adding it back to a global chain head
118 * with page_chain_add() under a spinlock. */
119static struct page *page_chain_tail(struct page *page, int *len)
120{
121 struct page *tmp;
122 int i = 1;
123 while ((tmp = page_chain_next(page)))
124 ++i, page = tmp;
125 if (len)
126 *len = i;
127 return page;
128}
129
130static int page_chain_free(struct page *page)
131{
132 struct page *tmp;
133 int i = 0;
134 page_chain_for_each_safe(page, tmp) {
135 put_page(page);
136 ++i;
137 }
138 return i;
139}
140
141static void page_chain_add(struct page **head,
142 struct page *chain_first, struct page *chain_last)
143{
144#if 1
145 struct page *tmp;
146 tmp = page_chain_tail(chain_first, NULL);
147 BUG_ON(tmp != chain_last);
148#endif
149
150 /* add chain to head */
151 set_page_private(chain_last, (unsigned long)*head);
152 *head = chain_first;
153}
154
b30ab791 155static struct page *__drbd_alloc_pages(struct drbd_device *device,
18c2d522 156 unsigned int number)
b411b363
PR
157{
158 struct page *page = NULL;
45bb912b 159 struct page *tmp = NULL;
18c2d522 160 unsigned int i = 0;
b411b363
PR
161
162 /* Yes, testing drbd_pp_vacant outside the lock is racy.
163 * So what. It saves a spin_lock. */
45bb912b 164 if (drbd_pp_vacant >= number) {
b411b363 165 spin_lock(&drbd_pp_lock);
45bb912b
LE
166 page = page_chain_del(&drbd_pp_pool, number);
167 if (page)
168 drbd_pp_vacant -= number;
b411b363 169 spin_unlock(&drbd_pp_lock);
45bb912b
LE
170 if (page)
171 return page;
b411b363 172 }
45bb912b 173
b411b363
PR
174 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
175 * "criss-cross" setup, that might cause write-out on some other DRBD,
176 * which in turn might block on the other node at this very place. */
45bb912b
LE
177 for (i = 0; i < number; i++) {
178 tmp = alloc_page(GFP_TRY);
179 if (!tmp)
180 break;
181 set_page_private(tmp, (unsigned long)page);
182 page = tmp;
183 }
184
185 if (i == number)
186 return page;
187
188 /* Not enough pages immediately available this time.
c37c8ecf 189 * No need to jump around here, drbd_alloc_pages will retry this
45bb912b
LE
190 * function "soon". */
191 if (page) {
192 tmp = page_chain_tail(page, NULL);
193 spin_lock(&drbd_pp_lock);
194 page_chain_add(&drbd_pp_pool, page, tmp);
195 drbd_pp_vacant += i;
196 spin_unlock(&drbd_pp_lock);
197 }
198 return NULL;
b411b363
PR
199}
200
b30ab791 201static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
a990be46 202 struct list_head *to_be_freed)
b411b363 203{
a8cd15ba 204 struct drbd_peer_request *peer_req, *tmp;
b411b363
PR
205
206 /* The EEs are always appended to the end of the list. Since
207 they are sent in order over the wire, they have to finish
208 in order. As soon as we see the first not finished we can
209 stop to examine the list... */
210
a8cd15ba 211 list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
045417f7 212 if (drbd_peer_req_has_active_page(peer_req))
b411b363 213 break;
a8cd15ba 214 list_move(&peer_req->w.list, to_be_freed);
b411b363
PR
215 }
216}
217
b30ab791 218static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
b411b363
PR
219{
220 LIST_HEAD(reclaimed);
db830c46 221 struct drbd_peer_request *peer_req, *t;
b411b363 222
0500813f 223 spin_lock_irq(&device->resource->req_lock);
b30ab791 224 reclaim_finished_net_peer_reqs(device, &reclaimed);
0500813f 225 spin_unlock_irq(&device->resource->req_lock);
b411b363 226
a8cd15ba 227 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
b30ab791 228 drbd_free_net_peer_req(device, peer_req);
b411b363
PR
229}
230
231/**
c37c8ecf 232 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
b30ab791 233 * @device: DRBD device.
45bb912b
LE
234 * @number: number of pages requested
235 * @retry: whether to retry, if not enough pages are available right now
236 *
237 * Tries to allocate number pages, first from our own page pool, then from
0e49d7b0 238 * the kernel.
45bb912b 239 * Possibly retry until DRBD frees sufficient pages somewhere else.
b411b363 240 *
0e49d7b0
LE
241 * If this allocation would exceed the max_buffers setting, we throttle
242 * allocation (schedule_timeout) to give the system some room to breathe.
243 *
244 * We do not use max-buffers as hard limit, because it could lead to
245 * congestion and further to a distributed deadlock during online-verify or
246 * (checksum based) resync, if the max-buffers, socket buffer sizes and
247 * resync-rate settings are mis-configured.
248 *
45bb912b 249 * Returns a page chain linked via page->private.
b411b363 250 */
69a22773 251struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
c37c8ecf 252 bool retry)
b411b363 253{
69a22773 254 struct drbd_device *device = peer_device->device;
b411b363 255 struct page *page = NULL;
44ed167d 256 struct net_conf *nc;
b411b363 257 DEFINE_WAIT(wait);
0e49d7b0 258 unsigned int mxb;
b411b363 259
44ed167d 260 rcu_read_lock();
69a22773 261 nc = rcu_dereference(peer_device->connection->net_conf);
44ed167d
PR
262 mxb = nc ? nc->max_buffers : 1000000;
263 rcu_read_unlock();
264
b30ab791
AG
265 if (atomic_read(&device->pp_in_use) < mxb)
266 page = __drbd_alloc_pages(device, number);
b411b363 267
45bb912b 268 while (page == NULL) {
b411b363
PR
269 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
270
b30ab791 271 drbd_kick_lo_and_reclaim_net(device);
b411b363 272
b30ab791
AG
273 if (atomic_read(&device->pp_in_use) < mxb) {
274 page = __drbd_alloc_pages(device, number);
b411b363
PR
275 if (page)
276 break;
277 }
278
279 if (!retry)
280 break;
281
282 if (signal_pending(current)) {
d0180171 283 drbd_warn(device, "drbd_alloc_pages interrupted!\n");
b411b363
PR
284 break;
285 }
286
0e49d7b0
LE
287 if (schedule_timeout(HZ/10) == 0)
288 mxb = UINT_MAX;
b411b363
PR
289 }
290 finish_wait(&drbd_pp_wait, &wait);
291
45bb912b 292 if (page)
b30ab791 293 atomic_add(number, &device->pp_in_use);
b411b363
PR
294 return page;
295}
296
c37c8ecf 297/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
0500813f 298 * Is also used from inside an other spin_lock_irq(&resource->req_lock);
45bb912b
LE
299 * Either links the page chain back to the global pool,
300 * or returns all pages to the system. */
b30ab791 301static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
b411b363 302{
b30ab791 303 atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
b411b363 304 int i;
435f0740 305
a73ff323
LE
306 if (page == NULL)
307 return;
308
81a5d60e 309 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
45bb912b
LE
310 i = page_chain_free(page);
311 else {
312 struct page *tmp;
313 tmp = page_chain_tail(page, &i);
314 spin_lock(&drbd_pp_lock);
315 page_chain_add(&drbd_pp_pool, page, tmp);
316 drbd_pp_vacant += i;
317 spin_unlock(&drbd_pp_lock);
b411b363 318 }
435f0740 319 i = atomic_sub_return(i, a);
45bb912b 320 if (i < 0)
d0180171 321 drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
435f0740 322 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
b411b363
PR
323 wake_up(&drbd_pp_wait);
324}
325
326/*
327You need to hold the req_lock:
328 _drbd_wait_ee_list_empty()
329
330You must not have the req_lock:
3967deb1 331 drbd_free_peer_req()
0db55363 332 drbd_alloc_peer_req()
7721f567 333 drbd_free_peer_reqs()
b411b363 334 drbd_ee_fix_bhs()
a990be46 335 drbd_finish_peer_reqs()
b411b363
PR
336 drbd_clear_done_ee()
337 drbd_wait_ee_list_empty()
338*/
339
f6ffca9f 340struct drbd_peer_request *
69a22773 341drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
a0fb3c47 342 unsigned int data_size, bool has_payload, gfp_t gfp_mask) __must_hold(local)
b411b363 343{
69a22773 344 struct drbd_device *device = peer_device->device;
db830c46 345 struct drbd_peer_request *peer_req;
a73ff323 346 struct page *page = NULL;
45bb912b 347 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
b411b363 348
b30ab791 349 if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
b411b363
PR
350 return NULL;
351
db830c46
AG
352 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
353 if (!peer_req) {
b411b363 354 if (!(gfp_mask & __GFP_NOWARN))
d0180171 355 drbd_err(device, "%s: allocation failed\n", __func__);
b411b363
PR
356 return NULL;
357 }
358
a0fb3c47 359 if (has_payload && data_size) {
69a22773 360 page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT));
a73ff323
LE
361 if (!page)
362 goto fail;
363 }
b411b363 364
c5a2c150
LE
365 memset(peer_req, 0, sizeof(*peer_req));
366 INIT_LIST_HEAD(&peer_req->w.list);
db830c46
AG
367 drbd_clear_interval(&peer_req->i);
368 peer_req->i.size = data_size;
369 peer_req->i.sector = sector;
c5a2c150 370 peer_req->submit_jif = jiffies;
a8cd15ba 371 peer_req->peer_device = peer_device;
db830c46 372 peer_req->pages = page;
9a8e7753
AG
373 /*
374 * The block_id is opaque to the receiver. It is not endianness
375 * converted, and sent back to the sender unchanged.
376 */
db830c46 377 peer_req->block_id = id;
b411b363 378
db830c46 379 return peer_req;
b411b363 380
45bb912b 381 fail:
db830c46 382 mempool_free(peer_req, drbd_ee_mempool);
b411b363
PR
383 return NULL;
384}
385
b30ab791 386void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
f6ffca9f 387 int is_net)
b411b363 388{
21ae5d7f 389 might_sleep();
db830c46
AG
390 if (peer_req->flags & EE_HAS_DIGEST)
391 kfree(peer_req->digest);
b30ab791 392 drbd_free_pages(device, peer_req->pages, is_net);
0b0ba1ef
AG
393 D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
394 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
21ae5d7f
LE
395 if (!expect(!(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) {
396 peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
397 drbd_al_complete_io(device, &peer_req->i);
398 }
db830c46 399 mempool_free(peer_req, drbd_ee_mempool);
b411b363
PR
400}
401
b30ab791 402int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
b411b363
PR
403{
404 LIST_HEAD(work_list);
db830c46 405 struct drbd_peer_request *peer_req, *t;
b411b363 406 int count = 0;
b30ab791 407 int is_net = list == &device->net_ee;
b411b363 408
0500813f 409 spin_lock_irq(&device->resource->req_lock);
b411b363 410 list_splice_init(list, &work_list);
0500813f 411 spin_unlock_irq(&device->resource->req_lock);
b411b363 412
a8cd15ba 413 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
b30ab791 414 __drbd_free_peer_req(device, peer_req, is_net);
b411b363
PR
415 count++;
416 }
417 return count;
418}
419
b411b363 420/*
a990be46 421 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
b411b363 422 */
b30ab791 423static int drbd_finish_peer_reqs(struct drbd_device *device)
b411b363
PR
424{
425 LIST_HEAD(work_list);
426 LIST_HEAD(reclaimed);
db830c46 427 struct drbd_peer_request *peer_req, *t;
e2b3032b 428 int err = 0;
b411b363 429
0500813f 430 spin_lock_irq(&device->resource->req_lock);
b30ab791
AG
431 reclaim_finished_net_peer_reqs(device, &reclaimed);
432 list_splice_init(&device->done_ee, &work_list);
0500813f 433 spin_unlock_irq(&device->resource->req_lock);
b411b363 434
a8cd15ba 435 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
b30ab791 436 drbd_free_net_peer_req(device, peer_req);
b411b363
PR
437
438 /* possible callbacks here:
d4dabbe2 439 * e_end_block, and e_end_resync_block, e_send_superseded.
b411b363
PR
440 * all ignore the last argument.
441 */
a8cd15ba 442 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
e2b3032b
AG
443 int err2;
444
b411b363 445 /* list_del not necessary, next/prev members not touched */
a8cd15ba 446 err2 = peer_req->w.cb(&peer_req->w, !!err);
e2b3032b
AG
447 if (!err)
448 err = err2;
b30ab791 449 drbd_free_peer_req(device, peer_req);
b411b363 450 }
b30ab791 451 wake_up(&device->ee_wait);
b411b363 452
e2b3032b 453 return err;
b411b363
PR
454}
455
b30ab791 456static void _drbd_wait_ee_list_empty(struct drbd_device *device,
d4da1537 457 struct list_head *head)
b411b363
PR
458{
459 DEFINE_WAIT(wait);
460
461 /* avoids spin_lock/unlock
462 * and calling prepare_to_wait in the fast path */
463 while (!list_empty(head)) {
b30ab791 464 prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
0500813f 465 spin_unlock_irq(&device->resource->req_lock);
7eaceacc 466 io_schedule();
b30ab791 467 finish_wait(&device->ee_wait, &wait);
0500813f 468 spin_lock_irq(&device->resource->req_lock);
b411b363
PR
469 }
470}
471
b30ab791 472static void drbd_wait_ee_list_empty(struct drbd_device *device,
d4da1537 473 struct list_head *head)
b411b363 474{
0500813f 475 spin_lock_irq(&device->resource->req_lock);
b30ab791 476 _drbd_wait_ee_list_empty(device, head);
0500813f 477 spin_unlock_irq(&device->resource->req_lock);
b411b363
PR
478}
479
dbd9eea0 480static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
b411b363 481{
b411b363
PR
482 struct kvec iov = {
483 .iov_base = buf,
484 .iov_len = size,
485 };
486 struct msghdr msg = {
b411b363
PR
487 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
488 };
f730c848 489 return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags);
b411b363
PR
490}
491
bde89a9e 492static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
b411b363 493{
b411b363
PR
494 int rv;
495
bde89a9e 496 rv = drbd_recv_short(connection->data.socket, buf, size, 0);
b411b363 497
dbd0820c
PR
498 if (rv < 0) {
499 if (rv == -ECONNRESET)
1ec861eb 500 drbd_info(connection, "sock was reset by peer\n");
dbd0820c 501 else if (rv != -ERESTARTSYS)
1ec861eb 502 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
dbd0820c 503 } else if (rv == 0) {
bde89a9e 504 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
b66623e3
PR
505 long t;
506 rcu_read_lock();
bde89a9e 507 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
b66623e3
PR
508 rcu_read_unlock();
509
bde89a9e 510 t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
b66623e3 511
599377ac
PR
512 if (t)
513 goto out;
514 }
1ec861eb 515 drbd_info(connection, "sock was shut down by peer\n");
599377ac
PR
516 }
517
b411b363 518 if (rv != size)
bde89a9e 519 conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
b411b363 520
599377ac 521out:
b411b363
PR
522 return rv;
523}
524
bde89a9e 525static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
c6967746
AG
526{
527 int err;
528
bde89a9e 529 err = drbd_recv(connection, buf, size);
c6967746
AG
530 if (err != size) {
531 if (err >= 0)
532 err = -EIO;
533 } else
534 err = 0;
535 return err;
536}
537
bde89a9e 538static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
a5c31904
AG
539{
540 int err;
541
bde89a9e 542 err = drbd_recv_all(connection, buf, size);
a5c31904 543 if (err && !signal_pending(current))
1ec861eb 544 drbd_warn(connection, "short read (expected size %d)\n", (int)size);
a5c31904
AG
545 return err;
546}
547
5dbf1673
LE
548/* quoting tcp(7):
549 * On individual connections, the socket buffer size must be set prior to the
550 * listen(2) or connect(2) calls in order to have it take effect.
551 * This is our wrapper to do so.
552 */
553static void drbd_setbufsize(struct socket *sock, unsigned int snd,
554 unsigned int rcv)
555{
556 /* open coded SO_SNDBUF, SO_RCVBUF */
557 if (snd) {
558 sock->sk->sk_sndbuf = snd;
559 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
560 }
561 if (rcv) {
562 sock->sk->sk_rcvbuf = rcv;
563 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
564 }
565}
566
bde89a9e 567static struct socket *drbd_try_connect(struct drbd_connection *connection)
b411b363
PR
568{
569 const char *what;
570 struct socket *sock;
571 struct sockaddr_in6 src_in6;
44ed167d
PR
572 struct sockaddr_in6 peer_in6;
573 struct net_conf *nc;
574 int err, peer_addr_len, my_addr_len;
69ef82de 575 int sndbuf_size, rcvbuf_size, connect_int;
b411b363
PR
576 int disconnect_on_error = 1;
577
44ed167d 578 rcu_read_lock();
bde89a9e 579 nc = rcu_dereference(connection->net_conf);
44ed167d
PR
580 if (!nc) {
581 rcu_read_unlock();
b411b363 582 return NULL;
44ed167d 583 }
44ed167d
PR
584 sndbuf_size = nc->sndbuf_size;
585 rcvbuf_size = nc->rcvbuf_size;
69ef82de 586 connect_int = nc->connect_int;
089c075d 587 rcu_read_unlock();
44ed167d 588
bde89a9e
AG
589 my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
590 memcpy(&src_in6, &connection->my_addr, my_addr_len);
44ed167d 591
bde89a9e 592 if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
44ed167d
PR
593 src_in6.sin6_port = 0;
594 else
595 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
596
bde89a9e
AG
597 peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
598 memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
b411b363
PR
599
600 what = "sock_create_kern";
44ed167d
PR
601 err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
602 SOCK_STREAM, IPPROTO_TCP, &sock);
b411b363
PR
603 if (err < 0) {
604 sock = NULL;
605 goto out;
606 }
607
608 sock->sk->sk_rcvtimeo =
69ef82de 609 sock->sk->sk_sndtimeo = connect_int * HZ;
44ed167d 610 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
b411b363
PR
611
612 /* explicitly bind to the configured IP as source IP
613 * for the outgoing connections.
614 * This is needed for multihomed hosts and to be
615 * able to use lo: interfaces for drbd.
616 * Make sure to use 0 as port number, so linux selects
617 * a free one dynamically.
618 */
b411b363 619 what = "bind before connect";
44ed167d 620 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
b411b363
PR
621 if (err < 0)
622 goto out;
623
624 /* connect may fail, peer not yet available.
625 * stay C_WF_CONNECTION, don't go Disconnecting! */
626 disconnect_on_error = 0;
627 what = "connect";
44ed167d 628 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
b411b363
PR
629
630out:
631 if (err < 0) {
632 if (sock) {
633 sock_release(sock);
634 sock = NULL;
635 }
636 switch (-err) {
637 /* timeout, busy, signal pending */
638 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
639 case EINTR: case ERESTARTSYS:
640 /* peer not (yet) available, network problem */
641 case ECONNREFUSED: case ENETUNREACH:
642 case EHOSTDOWN: case EHOSTUNREACH:
643 disconnect_on_error = 0;
644 break;
645 default:
1ec861eb 646 drbd_err(connection, "%s failed, err = %d\n", what, err);
b411b363
PR
647 }
648 if (disconnect_on_error)
bde89a9e 649 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363 650 }
44ed167d 651
b411b363
PR
652 return sock;
653}
654
7a426fd8 655struct accept_wait_data {
bde89a9e 656 struct drbd_connection *connection;
7a426fd8
PR
657 struct socket *s_listen;
658 struct completion door_bell;
659 void (*original_sk_state_change)(struct sock *sk);
660
661};
662
715306f6 663static void drbd_incoming_connection(struct sock *sk)
7a426fd8
PR
664{
665 struct accept_wait_data *ad = sk->sk_user_data;
715306f6 666 void (*state_change)(struct sock *sk);
7a426fd8 667
715306f6
AG
668 state_change = ad->original_sk_state_change;
669 if (sk->sk_state == TCP_ESTABLISHED)
670 complete(&ad->door_bell);
671 state_change(sk);
7a426fd8
PR
672}
673
bde89a9e 674static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
b411b363 675{
1f3e509b 676 int err, sndbuf_size, rcvbuf_size, my_addr_len;
44ed167d 677 struct sockaddr_in6 my_addr;
1f3e509b 678 struct socket *s_listen;
44ed167d 679 struct net_conf *nc;
b411b363
PR
680 const char *what;
681
44ed167d 682 rcu_read_lock();
bde89a9e 683 nc = rcu_dereference(connection->net_conf);
44ed167d
PR
684 if (!nc) {
685 rcu_read_unlock();
7a426fd8 686 return -EIO;
44ed167d 687 }
44ed167d
PR
688 sndbuf_size = nc->sndbuf_size;
689 rcvbuf_size = nc->rcvbuf_size;
44ed167d 690 rcu_read_unlock();
b411b363 691
bde89a9e
AG
692 my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
693 memcpy(&my_addr, &connection->my_addr, my_addr_len);
b411b363
PR
694
695 what = "sock_create_kern";
44ed167d 696 err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
1f3e509b 697 SOCK_STREAM, IPPROTO_TCP, &s_listen);
b411b363
PR
698 if (err) {
699 s_listen = NULL;
700 goto out;
701 }
702
98683650 703 s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
44ed167d 704 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
b411b363
PR
705
706 what = "bind before listen";
44ed167d 707 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
b411b363
PR
708 if (err < 0)
709 goto out;
710
7a426fd8
PR
711 ad->s_listen = s_listen;
712 write_lock_bh(&s_listen->sk->sk_callback_lock);
713 ad->original_sk_state_change = s_listen->sk->sk_state_change;
715306f6 714 s_listen->sk->sk_state_change = drbd_incoming_connection;
7a426fd8
PR
715 s_listen->sk->sk_user_data = ad;
716 write_unlock_bh(&s_listen->sk->sk_callback_lock);
b411b363 717
2820fd39
PR
718 what = "listen";
719 err = s_listen->ops->listen(s_listen, 5);
720 if (err < 0)
721 goto out;
722
7a426fd8 723 return 0;
b411b363
PR
724out:
725 if (s_listen)
726 sock_release(s_listen);
727 if (err < 0) {
728 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
1ec861eb 729 drbd_err(connection, "%s failed, err = %d\n", what, err);
bde89a9e 730 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363
PR
731 }
732 }
b411b363 733
7a426fd8 734 return -EIO;
b411b363
PR
735}
736
715306f6 737static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
b411b363 738{
715306f6
AG
739 write_lock_bh(&sk->sk_callback_lock);
740 sk->sk_state_change = ad->original_sk_state_change;
741 sk->sk_user_data = NULL;
742 write_unlock_bh(&sk->sk_callback_lock);
b411b363
PR
743}
744
bde89a9e 745static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
b411b363 746{
1f3e509b
PR
747 int timeo, connect_int, err = 0;
748 struct socket *s_estab = NULL;
1f3e509b
PR
749 struct net_conf *nc;
750
751 rcu_read_lock();
bde89a9e 752 nc = rcu_dereference(connection->net_conf);
1f3e509b
PR
753 if (!nc) {
754 rcu_read_unlock();
755 return NULL;
756 }
757 connect_int = nc->connect_int;
758 rcu_read_unlock();
759
760 timeo = connect_int * HZ;
38b682b2
AM
761 /* 28.5% random jitter */
762 timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
1f3e509b 763
7a426fd8
PR
764 err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
765 if (err <= 0)
766 return NULL;
b411b363 767
7a426fd8 768 err = kernel_accept(ad->s_listen, &s_estab, 0);
b411b363
PR
769 if (err < 0) {
770 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
1ec861eb 771 drbd_err(connection, "accept failed, err = %d\n", err);
bde89a9e 772 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363
PR
773 }
774 }
b411b363 775
715306f6
AG
776 if (s_estab)
777 unregister_state_change(s_estab->sk, ad);
b411b363 778
b411b363
PR
779 return s_estab;
780}
b411b363 781
bde89a9e 782static int decode_header(struct drbd_connection *, void *, struct packet_info *);
b411b363 783
bde89a9e 784static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
9f5bdc33
AG
785 enum drbd_packet cmd)
786{
bde89a9e 787 if (!conn_prepare_command(connection, sock))
9f5bdc33 788 return -EIO;
bde89a9e 789 return conn_send_command(connection, sock, cmd, 0, NULL, 0);
b411b363
PR
790}
791
bde89a9e 792static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
b411b363 793{
bde89a9e 794 unsigned int header_size = drbd_header_size(connection);
9f5bdc33 795 struct packet_info pi;
4920e37a 796 struct net_conf *nc;
9f5bdc33 797 int err;
b411b363 798
4920e37a
PR
799 rcu_read_lock();
800 nc = rcu_dereference(connection->net_conf);
801 if (!nc) {
802 rcu_read_unlock();
803 return -EIO;
804 }
805 sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10;
806 rcu_read_unlock();
807
bde89a9e 808 err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
9f5bdc33
AG
809 if (err != header_size) {
810 if (err >= 0)
811 err = -EIO;
812 return err;
813 }
bde89a9e 814 err = decode_header(connection, connection->data.rbuf, &pi);
9f5bdc33
AG
815 if (err)
816 return err;
817 return pi.cmd;
b411b363
PR
818}
819
820/**
821 * drbd_socket_okay() - Free the socket if its connection is not okay
b411b363
PR
822 * @sock: pointer to the pointer to the socket.
823 */
5d0b17f1 824static bool drbd_socket_okay(struct socket **sock)
b411b363
PR
825{
826 int rr;
827 char tb[4];
828
829 if (!*sock)
81e84650 830 return false;
b411b363 831
dbd9eea0 832 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
b411b363
PR
833
834 if (rr > 0 || rr == -EAGAIN) {
81e84650 835 return true;
b411b363
PR
836 } else {
837 sock_release(*sock);
838 *sock = NULL;
81e84650 839 return false;
b411b363
PR
840 }
841}
5d0b17f1
PR
842
843static bool connection_established(struct drbd_connection *connection,
844 struct socket **sock1,
845 struct socket **sock2)
846{
847 struct net_conf *nc;
848 int timeout;
849 bool ok;
850
851 if (!*sock1 || !*sock2)
852 return false;
853
854 rcu_read_lock();
855 nc = rcu_dereference(connection->net_conf);
856 timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10;
857 rcu_read_unlock();
858 schedule_timeout_interruptible(timeout);
859
860 ok = drbd_socket_okay(sock1);
861 ok = drbd_socket_okay(sock2) && ok;
862
863 return ok;
864}
865
2325eb66
PR
866/* Gets called if a connection is established, or if a new minor gets created
867 in a connection */
69a22773 868int drbd_connected(struct drbd_peer_device *peer_device)
907599e0 869{
69a22773 870 struct drbd_device *device = peer_device->device;
0829f5ed 871 int err;
907599e0 872
b30ab791
AG
873 atomic_set(&device->packet_seq, 0);
874 device->peer_seq = 0;
907599e0 875
69a22773
AG
876 device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
877 &peer_device->connection->cstate_mutex :
b30ab791 878 &device->own_state_mutex;
8410da8f 879
69a22773 880 err = drbd_send_sync_param(peer_device);
0829f5ed 881 if (!err)
69a22773 882 err = drbd_send_sizes(peer_device, 0, 0);
0829f5ed 883 if (!err)
69a22773 884 err = drbd_send_uuids(peer_device);
0829f5ed 885 if (!err)
69a22773 886 err = drbd_send_current_state(peer_device);
b30ab791
AG
887 clear_bit(USE_DEGR_WFC_T, &device->flags);
888 clear_bit(RESIZE_PENDING, &device->flags);
889 atomic_set(&device->ap_in_flight, 0);
890 mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
0829f5ed 891 return err;
907599e0 892}
b411b363
PR
893
894/*
895 * return values:
896 * 1 yes, we have a valid connection
897 * 0 oops, did not work out, please try again
898 * -1 peer talks different language,
899 * no point in trying again, please go standalone.
900 * -2 We do not have a network config...
901 */
bde89a9e 902static int conn_connect(struct drbd_connection *connection)
b411b363 903{
7da35862 904 struct drbd_socket sock, msock;
c06ece6b 905 struct drbd_peer_device *peer_device;
44ed167d 906 struct net_conf *nc;
5d0b17f1
PR
907 int vnr, timeout, h;
908 bool discard_my_data, ok;
197296ff 909 enum drbd_state_rv rv;
7a426fd8 910 struct accept_wait_data ad = {
bde89a9e 911 .connection = connection,
7a426fd8
PR
912 .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
913 };
b411b363 914
bde89a9e
AG
915 clear_bit(DISCONNECT_SENT, &connection->flags);
916 if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
b411b363
PR
917 return -2;
918
7da35862 919 mutex_init(&sock.mutex);
bde89a9e
AG
920 sock.sbuf = connection->data.sbuf;
921 sock.rbuf = connection->data.rbuf;
7da35862
PR
922 sock.socket = NULL;
923 mutex_init(&msock.mutex);
bde89a9e
AG
924 msock.sbuf = connection->meta.sbuf;
925 msock.rbuf = connection->meta.rbuf;
7da35862
PR
926 msock.socket = NULL;
927
0916e0e3 928 /* Assume that the peer only understands protocol 80 until we know better. */
bde89a9e 929 connection->agreed_pro_version = 80;
b411b363 930
bde89a9e 931 if (prepare_listen_socket(connection, &ad))
7a426fd8 932 return 0;
b411b363
PR
933
934 do {
2bf89621 935 struct socket *s;
b411b363 936
bde89a9e 937 s = drbd_try_connect(connection);
b411b363 938 if (s) {
7da35862
PR
939 if (!sock.socket) {
940 sock.socket = s;
bde89a9e 941 send_first_packet(connection, &sock, P_INITIAL_DATA);
7da35862 942 } else if (!msock.socket) {
bde89a9e 943 clear_bit(RESOLVE_CONFLICTS, &connection->flags);
7da35862 944 msock.socket = s;
bde89a9e 945 send_first_packet(connection, &msock, P_INITIAL_META);
b411b363 946 } else {
1ec861eb 947 drbd_err(connection, "Logic error in conn_connect()\n");
b411b363
PR
948 goto out_release_sockets;
949 }
950 }
951
5d0b17f1
PR
952 if (connection_established(connection, &sock.socket, &msock.socket))
953 break;
b411b363
PR
954
955retry:
bde89a9e 956 s = drbd_wait_for_connect(connection, &ad);
b411b363 957 if (s) {
bde89a9e 958 int fp = receive_first_packet(connection, s);
7da35862
PR
959 drbd_socket_okay(&sock.socket);
960 drbd_socket_okay(&msock.socket);
92f14951 961 switch (fp) {
e5d6f33a 962 case P_INITIAL_DATA:
7da35862 963 if (sock.socket) {
1ec861eb 964 drbd_warn(connection, "initial packet S crossed\n");
7da35862 965 sock_release(sock.socket);
80c6eed4
PR
966 sock.socket = s;
967 goto randomize;
b411b363 968 }
7da35862 969 sock.socket = s;
b411b363 970 break;
e5d6f33a 971 case P_INITIAL_META:
bde89a9e 972 set_bit(RESOLVE_CONFLICTS, &connection->flags);
7da35862 973 if (msock.socket) {
1ec861eb 974 drbd_warn(connection, "initial packet M crossed\n");
7da35862 975 sock_release(msock.socket);
80c6eed4
PR
976 msock.socket = s;
977 goto randomize;
b411b363 978 }
7da35862 979 msock.socket = s;
b411b363
PR
980 break;
981 default:
1ec861eb 982 drbd_warn(connection, "Error receiving initial packet\n");
b411b363 983 sock_release(s);
80c6eed4 984randomize:
38b682b2 985 if (prandom_u32() & 1)
b411b363
PR
986 goto retry;
987 }
988 }
989
bde89a9e 990 if (connection->cstate <= C_DISCONNECTING)
b411b363
PR
991 goto out_release_sockets;
992 if (signal_pending(current)) {
993 flush_signals(current);
994 smp_rmb();
bde89a9e 995 if (get_t_state(&connection->receiver) == EXITING)
b411b363
PR
996 goto out_release_sockets;
997 }
998
5d0b17f1 999 ok = connection_established(connection, &sock.socket, &msock.socket);
b666dbf8 1000 } while (!ok);
b411b363 1001
7a426fd8
PR
1002 if (ad.s_listen)
1003 sock_release(ad.s_listen);
b411b363 1004
98683650
PR
1005 sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
1006 msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
b411b363 1007
7da35862
PR
1008 sock.socket->sk->sk_allocation = GFP_NOIO;
1009 msock.socket->sk->sk_allocation = GFP_NOIO;
b411b363 1010
7da35862
PR
1011 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
1012 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
b411b363 1013
b411b363 1014 /* NOT YET ...
bde89a9e 1015 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
7da35862 1016 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
6038178e 1017 * first set it to the P_CONNECTION_FEATURES timeout,
b411b363 1018 * which we set to 4x the configured ping_timeout. */
44ed167d 1019 rcu_read_lock();
bde89a9e 1020 nc = rcu_dereference(connection->net_conf);
44ed167d 1021
7da35862
PR
1022 sock.socket->sk->sk_sndtimeo =
1023 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
b411b363 1024
7da35862 1025 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
44ed167d 1026 timeout = nc->timeout * HZ / 10;
08b165ba 1027 discard_my_data = nc->discard_my_data;
44ed167d 1028 rcu_read_unlock();
b411b363 1029
7da35862 1030 msock.socket->sk->sk_sndtimeo = timeout;
b411b363
PR
1031
1032 /* we don't want delays.
25985edc 1033 * we use TCP_CORK where appropriate, though */
7da35862
PR
1034 drbd_tcp_nodelay(sock.socket);
1035 drbd_tcp_nodelay(msock.socket);
b411b363 1036
bde89a9e
AG
1037 connection->data.socket = sock.socket;
1038 connection->meta.socket = msock.socket;
1039 connection->last_received = jiffies;
b411b363 1040
bde89a9e 1041 h = drbd_do_features(connection);
b411b363
PR
1042 if (h <= 0)
1043 return h;
1044
bde89a9e 1045 if (connection->cram_hmac_tfm) {
b30ab791 1046 /* drbd_request_state(device, NS(conn, WFAuth)); */
bde89a9e 1047 switch (drbd_do_auth(connection)) {
b10d96cb 1048 case -1:
1ec861eb 1049 drbd_err(connection, "Authentication of peer failed\n");
b411b363 1050 return -1;
b10d96cb 1051 case 0:
1ec861eb 1052 drbd_err(connection, "Authentication of peer failed, trying again.\n");
b10d96cb 1053 return 0;
b411b363
PR
1054 }
1055 }
1056
bde89a9e
AG
1057 connection->data.socket->sk->sk_sndtimeo = timeout;
1058 connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
b411b363 1059
bde89a9e 1060 if (drbd_send_protocol(connection) == -EOPNOTSUPP)
7e2455c1 1061 return -1;
b411b363 1062
31007745
PR
1063 /* Prevent a race between resync-handshake and
1064 * being promoted to Primary.
1065 *
1066 * Grab and release the state mutex, so we know that any current
1067 * drbd_set_role() is finished, and any incoming drbd_set_role
1068 * will see the STATE_SENT flag, and wait for it to be cleared.
1069 */
1070 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1071 mutex_lock(peer_device->device->state_mutex);
1072
bde89a9e 1073 set_bit(STATE_SENT, &connection->flags);
a1096a6e 1074
31007745
PR
1075 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1076 mutex_unlock(peer_device->device->state_mutex);
1077
c141ebda 1078 rcu_read_lock();
c06ece6b
AG
1079 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1080 struct drbd_device *device = peer_device->device;
b30ab791 1081 kref_get(&device->kref);
26ea8f92
AG
1082 rcu_read_unlock();
1083
08b165ba 1084 if (discard_my_data)
b30ab791 1085 set_bit(DISCARD_MY_DATA, &device->flags);
08b165ba 1086 else
b30ab791 1087 clear_bit(DISCARD_MY_DATA, &device->flags);
08b165ba 1088
69a22773 1089 drbd_connected(peer_device);
05a10ec7 1090 kref_put(&device->kref, drbd_destroy_device);
c141ebda
PR
1091 rcu_read_lock();
1092 }
1093 rcu_read_unlock();
1094
bde89a9e
AG
1095 rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1096 if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1097 clear_bit(STATE_SENT, &connection->flags);
1e86ac48 1098 return 0;
a1096a6e 1099 }
1e86ac48 1100
bde89a9e 1101 drbd_thread_start(&connection->asender);
b411b363 1102
0500813f 1103 mutex_lock(&connection->resource->conf_update);
08b165ba
PR
1104 /* The discard_my_data flag is a single-shot modifier to the next
1105 * connection attempt, the handshake of which is now well underway.
1106 * No need for rcu style copying of the whole struct
1107 * just to clear a single value. */
bde89a9e 1108 connection->net_conf->discard_my_data = 0;
0500813f 1109 mutex_unlock(&connection->resource->conf_update);
08b165ba 1110
d3fcb490 1111 return h;
b411b363
PR
1112
1113out_release_sockets:
7a426fd8
PR
1114 if (ad.s_listen)
1115 sock_release(ad.s_listen);
7da35862
PR
1116 if (sock.socket)
1117 sock_release(sock.socket);
1118 if (msock.socket)
1119 sock_release(msock.socket);
b411b363
PR
1120 return -1;
1121}
1122
bde89a9e 1123static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
b411b363 1124{
bde89a9e 1125 unsigned int header_size = drbd_header_size(connection);
e658983a 1126
0c8e36d9
AG
1127 if (header_size == sizeof(struct p_header100) &&
1128 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1129 struct p_header100 *h = header;
1130 if (h->pad != 0) {
1ec861eb 1131 drbd_err(connection, "Header padding is not zero\n");
0c8e36d9
AG
1132 return -EINVAL;
1133 }
1134 pi->vnr = be16_to_cpu(h->volume);
1135 pi->cmd = be16_to_cpu(h->command);
1136 pi->size = be32_to_cpu(h->length);
1137 } else if (header_size == sizeof(struct p_header95) &&
1138 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
e658983a 1139 struct p_header95 *h = header;
e658983a 1140 pi->cmd = be16_to_cpu(h->command);
b55d84ba
AG
1141 pi->size = be32_to_cpu(h->length);
1142 pi->vnr = 0;
e658983a
AG
1143 } else if (header_size == sizeof(struct p_header80) &&
1144 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1145 struct p_header80 *h = header;
1146 pi->cmd = be16_to_cpu(h->command);
1147 pi->size = be16_to_cpu(h->length);
77351055 1148 pi->vnr = 0;
02918be2 1149 } else {
1ec861eb 1150 drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
e658983a 1151 be32_to_cpu(*(__be32 *)header),
bde89a9e 1152 connection->agreed_pro_version);
8172f3e9 1153 return -EINVAL;
b411b363 1154 }
e658983a 1155 pi->data = header + header_size;
8172f3e9 1156 return 0;
257d0af6 1157}
b411b363 1158
bde89a9e 1159static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
257d0af6 1160{
bde89a9e 1161 void *buffer = connection->data.rbuf;
69bc7bc3 1162 int err;
257d0af6 1163
bde89a9e 1164 err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
a5c31904 1165 if (err)
69bc7bc3 1166 return err;
257d0af6 1167
bde89a9e
AG
1168 err = decode_header(connection, buffer, pi);
1169 connection->last_received = jiffies;
b411b363 1170
69bc7bc3 1171 return err;
b411b363
PR
1172}
1173
bde89a9e 1174static void drbd_flush(struct drbd_connection *connection)
b411b363
PR
1175{
1176 int rv;
c06ece6b 1177 struct drbd_peer_device *peer_device;
4b0007c0
PR
1178 int vnr;
1179
e9526580 1180 if (connection->resource->write_ordering >= WO_bdev_flush) {
615e087f 1181 rcu_read_lock();
c06ece6b
AG
1182 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1183 struct drbd_device *device = peer_device->device;
1184
b30ab791 1185 if (!get_ldev(device))
615e087f 1186 continue;
b30ab791 1187 kref_get(&device->kref);
615e087f
LE
1188 rcu_read_unlock();
1189
b30ab791 1190 rv = blkdev_issue_flush(device->ldev->backing_bdev,
615e087f
LE
1191 GFP_NOIO, NULL);
1192 if (rv) {
d0180171 1193 drbd_info(device, "local disk flush failed with status %d\n", rv);
615e087f
LE
1194 /* would rather check on EOPNOTSUPP, but that is not reliable.
1195 * don't try again for ANY return value != 0
1196 * if (rv == -EOPNOTSUPP) */
8fe39aac 1197 drbd_bump_write_ordering(connection->resource, NULL, WO_drain_io);
4b0007c0 1198 }
b30ab791 1199 put_ldev(device);
05a10ec7 1200 kref_put(&device->kref, drbd_destroy_device);
b411b363 1201
615e087f
LE
1202 rcu_read_lock();
1203 if (rv)
1204 break;
b411b363 1205 }
615e087f 1206 rcu_read_unlock();
b411b363 1207 }
b411b363
PR
1208}
1209
1210/**
1211 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
b30ab791 1212 * @device: DRBD device.
b411b363
PR
1213 * @epoch: Epoch object.
1214 * @ev: Epoch event.
1215 */
bde89a9e 1216static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
b411b363
PR
1217 struct drbd_epoch *epoch,
1218 enum epoch_event ev)
1219{
2451fc3b 1220 int epoch_size;
b411b363 1221 struct drbd_epoch *next_epoch;
b411b363
PR
1222 enum finish_epoch rv = FE_STILL_LIVE;
1223
bde89a9e 1224 spin_lock(&connection->epoch_lock);
b411b363
PR
1225 do {
1226 next_epoch = NULL;
b411b363
PR
1227
1228 epoch_size = atomic_read(&epoch->epoch_size);
1229
1230 switch (ev & ~EV_CLEANUP) {
1231 case EV_PUT:
1232 atomic_dec(&epoch->active);
1233 break;
1234 case EV_GOT_BARRIER_NR:
1235 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
b411b363
PR
1236 break;
1237 case EV_BECAME_LAST:
1238 /* nothing to do*/
1239 break;
1240 }
1241
b411b363
PR
1242 if (epoch_size != 0 &&
1243 atomic_read(&epoch->active) == 0 &&
80f9fd55 1244 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
b411b363 1245 if (!(ev & EV_CLEANUP)) {
bde89a9e
AG
1246 spin_unlock(&connection->epoch_lock);
1247 drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1248 spin_lock(&connection->epoch_lock);
b411b363 1249 }
9ed57dcb
LE
1250#if 0
1251 /* FIXME: dec unacked on connection, once we have
1252 * something to count pending connection packets in. */
80f9fd55 1253 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
bde89a9e 1254 dec_unacked(epoch->connection);
9ed57dcb 1255#endif
b411b363 1256
bde89a9e 1257 if (connection->current_epoch != epoch) {
b411b363
PR
1258 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1259 list_del(&epoch->list);
1260 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
bde89a9e 1261 connection->epochs--;
b411b363
PR
1262 kfree(epoch);
1263
1264 if (rv == FE_STILL_LIVE)
1265 rv = FE_DESTROYED;
1266 } else {
1267 epoch->flags = 0;
1268 atomic_set(&epoch->epoch_size, 0);
698f9315 1269 /* atomic_set(&epoch->active, 0); is already zero */
b411b363
PR
1270 if (rv == FE_STILL_LIVE)
1271 rv = FE_RECYCLED;
1272 }
1273 }
1274
1275 if (!next_epoch)
1276 break;
1277
1278 epoch = next_epoch;
1279 } while (1);
1280
bde89a9e 1281 spin_unlock(&connection->epoch_lock);
b411b363 1282
b411b363
PR
1283 return rv;
1284}
1285
8fe39aac
PR
1286static enum write_ordering_e
1287max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
1288{
1289 struct disk_conf *dc;
1290
1291 dc = rcu_dereference(bdev->disk_conf);
1292
1293 if (wo == WO_bdev_flush && !dc->disk_flushes)
1294 wo = WO_drain_io;
1295 if (wo == WO_drain_io && !dc->disk_drain)
1296 wo = WO_none;
1297
1298 return wo;
1299}
1300
b411b363
PR
1301/**
1302 * drbd_bump_write_ordering() - Fall back to an other write ordering method
bde89a9e 1303 * @connection: DRBD connection.
b411b363
PR
1304 * @wo: Write ordering method to try.
1305 */
8fe39aac
PR
1306void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
1307 enum write_ordering_e wo)
b411b363 1308{
e9526580 1309 struct drbd_device *device;
b411b363 1310 enum write_ordering_e pwo;
4b0007c0 1311 int vnr;
b411b363
PR
1312 static char *write_ordering_str[] = {
1313 [WO_none] = "none",
1314 [WO_drain_io] = "drain",
1315 [WO_bdev_flush] = "flush",
b411b363
PR
1316 };
1317
e9526580 1318 pwo = resource->write_ordering;
70df7092
LE
1319 if (wo != WO_bdev_flush)
1320 wo = min(pwo, wo);
daeda1cc 1321 rcu_read_lock();
e9526580 1322 idr_for_each_entry(&resource->devices, device, vnr) {
8fe39aac
PR
1323 if (get_ldev(device)) {
1324 wo = max_allowed_wo(device->ldev, wo);
1325 if (device->ldev == bdev)
1326 bdev = NULL;
1327 put_ldev(device);
1328 }
4b0007c0 1329 }
8fe39aac
PR
1330
1331 if (bdev)
1332 wo = max_allowed_wo(bdev, wo);
1333
70df7092
LE
1334 rcu_read_unlock();
1335
e9526580
PR
1336 resource->write_ordering = wo;
1337 if (pwo != resource->write_ordering || wo == WO_bdev_flush)
1338 drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
b411b363
PR
1339}
1340
45bb912b 1341/**
fbe29dec 1342 * drbd_submit_peer_request()
b30ab791 1343 * @device: DRBD device.
db830c46 1344 * @peer_req: peer request
45bb912b 1345 * @rw: flag field, see bio->bi_rw
10f6d992
LE
1346 *
1347 * May spread the pages to multiple bios,
1348 * depending on bio_add_page restrictions.
1349 *
1350 * Returns 0 if all bios have been submitted,
1351 * -ENOMEM if we could not allocate enough bios,
1352 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1353 * single page to an empty bio (which should never happen and likely indicates
1354 * that the lower level IO stack is in some way broken). This has been observed
1355 * on certain Xen deployments.
45bb912b
LE
1356 */
1357/* TODO allocate from our own bio_set. */
b30ab791 1358int drbd_submit_peer_request(struct drbd_device *device,
fbe29dec
AG
1359 struct drbd_peer_request *peer_req,
1360 const unsigned rw, const int fault_type)
45bb912b
LE
1361{
1362 struct bio *bios = NULL;
1363 struct bio *bio;
db830c46
AG
1364 struct page *page = peer_req->pages;
1365 sector_t sector = peer_req->i.sector;
1366 unsigned ds = peer_req->i.size;
45bb912b
LE
1367 unsigned n_bios = 0;
1368 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
10f6d992 1369 int err = -ENOMEM;
45bb912b 1370
a0fb3c47
LE
1371 if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) {
1372 /* wait for all pending IO completions, before we start
1373 * zeroing things out. */
1374 conn_wait_active_ee_empty(first_peer_device(device)->connection);
45d2933c
LE
1375 /* add it to the active list now,
1376 * so we can find it to present it in debugfs */
21ae5d7f
LE
1377 peer_req->submit_jif = jiffies;
1378 peer_req->flags |= EE_SUBMITTED;
45d2933c
LE
1379 spin_lock_irq(&device->resource->req_lock);
1380 list_add_tail(&peer_req->w.list, &device->active_ee);
1381 spin_unlock_irq(&device->resource->req_lock);
a0fb3c47
LE
1382 if (blkdev_issue_zeroout(device->ldev->backing_bdev,
1383 sector, ds >> 9, GFP_NOIO))
1384 peer_req->flags |= EE_WAS_ERROR;
1385 drbd_endio_write_sec_final(peer_req);
1386 return 0;
1387 }
1388
54ed4ed8
LE
1389 /* Discards don't have any payload.
1390 * But the scsi layer still expects a bio_vec it can use internally,
1391 * see sd_setup_discard_cmnd() and blk_add_request_payload(). */
a0fb3c47 1392 if (peer_req->flags & EE_IS_TRIM)
54ed4ed8 1393 nr_pages = 1;
a0fb3c47 1394
45bb912b
LE
1395 /* In most cases, we will only need one bio. But in case the lower
1396 * level restrictions happen to be different at this offset on this
1397 * side than those of the sending peer, we may need to submit the
9476f39d
LE
1398 * request in more than one bio.
1399 *
1400 * Plain bio_alloc is good enough here, this is no DRBD internally
1401 * generated bio, but a bio allocated on behalf of the peer.
1402 */
45bb912b
LE
1403next_bio:
1404 bio = bio_alloc(GFP_NOIO, nr_pages);
1405 if (!bio) {
a0fb3c47 1406 drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
45bb912b
LE
1407 goto fail;
1408 }
db830c46 1409 /* > peer_req->i.sector, unless this is the first bio */
4f024f37 1410 bio->bi_iter.bi_sector = sector;
b30ab791 1411 bio->bi_bdev = device->ldev->backing_bdev;
45bb912b 1412 bio->bi_rw = rw;
db830c46 1413 bio->bi_private = peer_req;
fcefa62e 1414 bio->bi_end_io = drbd_peer_request_endio;
45bb912b
LE
1415
1416 bio->bi_next = bios;
1417 bios = bio;
1418 ++n_bios;
1419
a0fb3c47
LE
1420 if (rw & REQ_DISCARD) {
1421 bio->bi_iter.bi_size = ds;
1422 goto submit;
1423 }
1424
45bb912b
LE
1425 page_chain_for_each(page) {
1426 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1427 if (!bio_add_page(bio, page, len, 0)) {
10f6d992
LE
1428 /* A single page must always be possible!
1429 * But in case it fails anyways,
1430 * we deal with it, and complain (below). */
1431 if (bio->bi_vcnt == 0) {
d0180171 1432 drbd_err(device,
10f6d992
LE
1433 "bio_add_page failed for len=%u, "
1434 "bi_vcnt=0 (bi_sector=%llu)\n",
4f024f37 1435 len, (uint64_t)bio->bi_iter.bi_sector);
10f6d992
LE
1436 err = -ENOSPC;
1437 goto fail;
1438 }
45bb912b
LE
1439 goto next_bio;
1440 }
1441 ds -= len;
1442 sector += len >> 9;
1443 --nr_pages;
1444 }
0b0ba1ef 1445 D_ASSERT(device, ds == 0);
a0fb3c47
LE
1446submit:
1447 D_ASSERT(device, page == NULL);
45bb912b 1448
db830c46 1449 atomic_set(&peer_req->pending_bios, n_bios);
21ae5d7f
LE
1450 /* for debugfs: update timestamp, mark as submitted */
1451 peer_req->submit_jif = jiffies;
1452 peer_req->flags |= EE_SUBMITTED;
45bb912b
LE
1453 do {
1454 bio = bios;
1455 bios = bios->bi_next;
1456 bio->bi_next = NULL;
1457
b30ab791 1458 drbd_generic_make_request(device, fault_type, bio);
45bb912b 1459 } while (bios);
45bb912b
LE
1460 return 0;
1461
1462fail:
1463 while (bios) {
1464 bio = bios;
1465 bios = bios->bi_next;
1466 bio_put(bio);
1467 }
10f6d992 1468 return err;
45bb912b
LE
1469}
1470
b30ab791 1471static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
db830c46 1472 struct drbd_peer_request *peer_req)
53840641 1473{
db830c46 1474 struct drbd_interval *i = &peer_req->i;
53840641 1475
b30ab791 1476 drbd_remove_interval(&device->write_requests, i);
53840641
AG
1477 drbd_clear_interval(i);
1478
6c852bec 1479 /* Wake up any processes waiting for this peer request to complete. */
53840641 1480 if (i->waiting)
b30ab791 1481 wake_up(&device->misc_wait);
53840641
AG
1482}
1483
bde89a9e 1484static void conn_wait_active_ee_empty(struct drbd_connection *connection)
77fede51 1485{
c06ece6b 1486 struct drbd_peer_device *peer_device;
77fede51
PR
1487 int vnr;
1488
1489 rcu_read_lock();
c06ece6b
AG
1490 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1491 struct drbd_device *device = peer_device->device;
1492
b30ab791 1493 kref_get(&device->kref);
77fede51 1494 rcu_read_unlock();
b30ab791 1495 drbd_wait_ee_list_empty(device, &device->active_ee);
05a10ec7 1496 kref_put(&device->kref, drbd_destroy_device);
77fede51
PR
1497 rcu_read_lock();
1498 }
1499 rcu_read_unlock();
1500}
1501
9f4fe9ad
AG
1502static struct drbd_peer_device *
1503conn_peer_device(struct drbd_connection *connection, int volume_number)
1504{
1505 return idr_find(&connection->peer_devices, volume_number);
1506}
1507
bde89a9e 1508static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
b411b363 1509{
2451fc3b 1510 int rv;
e658983a 1511 struct p_barrier *p = pi->data;
b411b363
PR
1512 struct drbd_epoch *epoch;
1513
9ed57dcb
LE
1514 /* FIXME these are unacked on connection,
1515 * not a specific (peer)device.
1516 */
bde89a9e
AG
1517 connection->current_epoch->barrier_nr = p->barrier;
1518 connection->current_epoch->connection = connection;
1519 rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
b411b363
PR
1520
1521 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1522 * the activity log, which means it would not be resynced in case the
1523 * R_PRIMARY crashes now.
1524 * Therefore we must send the barrier_ack after the barrier request was
1525 * completed. */
e9526580 1526 switch (connection->resource->write_ordering) {
b411b363
PR
1527 case WO_none:
1528 if (rv == FE_RECYCLED)
82bc0194 1529 return 0;
2451fc3b
PR
1530
1531 /* receiver context, in the writeout path of the other node.
1532 * avoid potential distributed deadlock */
1533 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1534 if (epoch)
1535 break;
1536 else
1ec861eb 1537 drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
2451fc3b 1538 /* Fall through */
b411b363
PR
1539
1540 case WO_bdev_flush:
1541 case WO_drain_io:
bde89a9e
AG
1542 conn_wait_active_ee_empty(connection);
1543 drbd_flush(connection);
2451fc3b 1544
bde89a9e 1545 if (atomic_read(&connection->current_epoch->epoch_size)) {
2451fc3b
PR
1546 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1547 if (epoch)
1548 break;
b411b363
PR
1549 }
1550
82bc0194 1551 return 0;
2451fc3b 1552 default:
e9526580
PR
1553 drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
1554 connection->resource->write_ordering);
82bc0194 1555 return -EIO;
b411b363
PR
1556 }
1557
1558 epoch->flags = 0;
1559 atomic_set(&epoch->epoch_size, 0);
1560 atomic_set(&epoch->active, 0);
1561
bde89a9e
AG
1562 spin_lock(&connection->epoch_lock);
1563 if (atomic_read(&connection->current_epoch->epoch_size)) {
1564 list_add(&epoch->list, &connection->current_epoch->list);
1565 connection->current_epoch = epoch;
1566 connection->epochs++;
b411b363
PR
1567 } else {
1568 /* The current_epoch got recycled while we allocated this one... */
1569 kfree(epoch);
1570 }
bde89a9e 1571 spin_unlock(&connection->epoch_lock);
b411b363 1572
82bc0194 1573 return 0;
b411b363
PR
1574}
1575
1576/* used from receive_RSDataReply (recv_resync_read)
1577 * and from receive_Data */
f6ffca9f 1578static struct drbd_peer_request *
69a22773 1579read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
a0fb3c47 1580 struct packet_info *pi) __must_hold(local)
b411b363 1581{
69a22773 1582 struct drbd_device *device = peer_device->device;
b30ab791 1583 const sector_t capacity = drbd_get_capacity(device->this_bdev);
db830c46 1584 struct drbd_peer_request *peer_req;
b411b363 1585 struct page *page;
a5c31904 1586 int dgs, ds, err;
a0fb3c47 1587 int data_size = pi->size;
69a22773
AG
1588 void *dig_in = peer_device->connection->int_dig_in;
1589 void *dig_vv = peer_device->connection->int_dig_vv;
6b4388ac 1590 unsigned long *data;
a0fb3c47 1591 struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
b411b363 1592
88104ca4 1593 dgs = 0;
a0fb3c47 1594 if (!trim && peer_device->connection->peer_integrity_tfm) {
69a22773 1595 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
9f5bdc33
AG
1596 /*
1597 * FIXME: Receive the incoming digest into the receive buffer
1598 * here, together with its struct p_data?
1599 */
69a22773 1600 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
a5c31904 1601 if (err)
b411b363 1602 return NULL;
88104ca4 1603 data_size -= dgs;
b411b363
PR
1604 }
1605
a0fb3c47
LE
1606 if (trim) {
1607 D_ASSERT(peer_device, data_size == 0);
1608 data_size = be32_to_cpu(trim->size);
1609 }
1610
841ce241
AG
1611 if (!expect(IS_ALIGNED(data_size, 512)))
1612 return NULL;
a0fb3c47
LE
1613 /* prepare for larger trim requests. */
1614 if (!trim && !expect(data_size <= DRBD_MAX_BIO_SIZE))
841ce241 1615 return NULL;
b411b363 1616
6666032a
LE
1617 /* even though we trust out peer,
1618 * we sometimes have to double check. */
1619 if (sector + (data_size>>9) > capacity) {
d0180171 1620 drbd_err(device, "request from peer beyond end of local disk: "
fdda6544 1621 "capacity: %llus < sector: %llus + size: %u\n",
6666032a
LE
1622 (unsigned long long)capacity,
1623 (unsigned long long)sector, data_size);
1624 return NULL;
1625 }
1626
b411b363
PR
1627 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1628 * "criss-cross" setup, that might cause write-out on some other DRBD,
1629 * which in turn might block on the other node at this very place. */
a0fb3c47 1630 peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, trim == NULL, GFP_NOIO);
db830c46 1631 if (!peer_req)
b411b363 1632 return NULL;
45bb912b 1633
21ae5d7f 1634 peer_req->flags |= EE_WRITE;
a0fb3c47 1635 if (trim)
81a3537a 1636 return peer_req;
a73ff323 1637
b411b363 1638 ds = data_size;
db830c46 1639 page = peer_req->pages;
45bb912b
LE
1640 page_chain_for_each(page) {
1641 unsigned len = min_t(int, ds, PAGE_SIZE);
6b4388ac 1642 data = kmap(page);
69a22773 1643 err = drbd_recv_all_warn(peer_device->connection, data, len);
b30ab791 1644 if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
d0180171 1645 drbd_err(device, "Fault injection: Corrupting data on receive\n");
6b4388ac
PR
1646 data[0] = data[0] ^ (unsigned long)-1;
1647 }
b411b363 1648 kunmap(page);
a5c31904 1649 if (err) {
b30ab791 1650 drbd_free_peer_req(device, peer_req);
b411b363
PR
1651 return NULL;
1652 }
a5c31904 1653 ds -= len;
b411b363
PR
1654 }
1655
1656 if (dgs) {
69a22773 1657 drbd_csum_ee(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv);
b411b363 1658 if (memcmp(dig_in, dig_vv, dgs)) {
d0180171 1659 drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
470be44a 1660 (unsigned long long)sector, data_size);
b30ab791 1661 drbd_free_peer_req(device, peer_req);
b411b363
PR
1662 return NULL;
1663 }
1664 }
b30ab791 1665 device->recv_cnt += data_size>>9;
db830c46 1666 return peer_req;
b411b363
PR
1667}
1668
1669/* drbd_drain_block() just takes a data block
1670 * out of the socket input buffer, and discards it.
1671 */
69a22773 1672static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
b411b363
PR
1673{
1674 struct page *page;
a5c31904 1675 int err = 0;
b411b363
PR
1676 void *data;
1677
c3470cde 1678 if (!data_size)
fc5be839 1679 return 0;
c3470cde 1680
69a22773 1681 page = drbd_alloc_pages(peer_device, 1, 1);
b411b363
PR
1682
1683 data = kmap(page);
1684 while (data_size) {
fc5be839
AG
1685 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1686
69a22773 1687 err = drbd_recv_all_warn(peer_device->connection, data, len);
a5c31904 1688 if (err)
b411b363 1689 break;
a5c31904 1690 data_size -= len;
b411b363
PR
1691 }
1692 kunmap(page);
69a22773 1693 drbd_free_pages(peer_device->device, page, 0);
fc5be839 1694 return err;
b411b363
PR
1695}
1696
69a22773 1697static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
b411b363
PR
1698 sector_t sector, int data_size)
1699{
7988613b
KO
1700 struct bio_vec bvec;
1701 struct bvec_iter iter;
b411b363 1702 struct bio *bio;
7988613b 1703 int dgs, err, expect;
69a22773
AG
1704 void *dig_in = peer_device->connection->int_dig_in;
1705 void *dig_vv = peer_device->connection->int_dig_vv;
b411b363 1706
88104ca4 1707 dgs = 0;
69a22773
AG
1708 if (peer_device->connection->peer_integrity_tfm) {
1709 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
1710 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
a5c31904
AG
1711 if (err)
1712 return err;
88104ca4 1713 data_size -= dgs;
b411b363
PR
1714 }
1715
b411b363
PR
1716 /* optimistically update recv_cnt. if receiving fails below,
1717 * we disconnect anyways, and counters will be reset. */
69a22773 1718 peer_device->device->recv_cnt += data_size>>9;
b411b363
PR
1719
1720 bio = req->master_bio;
69a22773 1721 D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
b411b363 1722
7988613b
KO
1723 bio_for_each_segment(bvec, bio, iter) {
1724 void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
1725 expect = min_t(int, data_size, bvec.bv_len);
69a22773 1726 err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
7988613b 1727 kunmap(bvec.bv_page);
a5c31904
AG
1728 if (err)
1729 return err;
1730 data_size -= expect;
b411b363
PR
1731 }
1732
1733 if (dgs) {
69a22773 1734 drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
b411b363 1735 if (memcmp(dig_in, dig_vv, dgs)) {
69a22773 1736 drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
28284cef 1737 return -EINVAL;
b411b363
PR
1738 }
1739 }
1740
69a22773 1741 D_ASSERT(peer_device->device, data_size == 0);
28284cef 1742 return 0;
b411b363
PR
1743}
1744
a990be46
AG
1745/*
1746 * e_end_resync_block() is called in asender context via
1747 * drbd_finish_peer_reqs().
1748 */
99920dc5 1749static int e_end_resync_block(struct drbd_work *w, int unused)
b411b363 1750{
8050e6d0 1751 struct drbd_peer_request *peer_req =
a8cd15ba
AG
1752 container_of(w, struct drbd_peer_request, w);
1753 struct drbd_peer_device *peer_device = peer_req->peer_device;
1754 struct drbd_device *device = peer_device->device;
db830c46 1755 sector_t sector = peer_req->i.sector;
99920dc5 1756 int err;
b411b363 1757
0b0ba1ef 1758 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
b411b363 1759
db830c46 1760 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b30ab791 1761 drbd_set_in_sync(device, sector, peer_req->i.size);
a8cd15ba 1762 err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
b411b363
PR
1763 } else {
1764 /* Record failure to sync */
b30ab791 1765 drbd_rs_failed_io(device, sector, peer_req->i.size);
b411b363 1766
a8cd15ba 1767 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
b411b363 1768 }
b30ab791 1769 dec_unacked(device);
b411b363 1770
99920dc5 1771 return err;
b411b363
PR
1772}
1773
69a22773 1774static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
a0fb3c47 1775 struct packet_info *pi) __releases(local)
b411b363 1776{
69a22773 1777 struct drbd_device *device = peer_device->device;
db830c46 1778 struct drbd_peer_request *peer_req;
b411b363 1779
a0fb3c47 1780 peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
db830c46 1781 if (!peer_req)
45bb912b 1782 goto fail;
b411b363 1783
b30ab791 1784 dec_rs_pending(device);
b411b363 1785
b30ab791 1786 inc_unacked(device);
b411b363
PR
1787 /* corresponding dec_unacked() in e_end_resync_block()
1788 * respective _drbd_clear_done_ee */
1789
a8cd15ba 1790 peer_req->w.cb = e_end_resync_block;
21ae5d7f 1791 peer_req->submit_jif = jiffies;
45bb912b 1792
0500813f 1793 spin_lock_irq(&device->resource->req_lock);
b9ed7080 1794 list_add_tail(&peer_req->w.list, &device->sync_ee);
0500813f 1795 spin_unlock_irq(&device->resource->req_lock);
b411b363 1796
a0fb3c47 1797 atomic_add(pi->size >> 9, &device->rs_sect_ev);
b30ab791 1798 if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
e1c1b0fc 1799 return 0;
b411b363 1800
10f6d992 1801 /* don't care for the reason here */
d0180171 1802 drbd_err(device, "submit failed, triggering re-connect\n");
0500813f 1803 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 1804 list_del(&peer_req->w.list);
0500813f 1805 spin_unlock_irq(&device->resource->req_lock);
22cc37a9 1806
b30ab791 1807 drbd_free_peer_req(device, peer_req);
45bb912b 1808fail:
b30ab791 1809 put_ldev(device);
e1c1b0fc 1810 return -EIO;
b411b363
PR
1811}
1812
668eebc6 1813static struct drbd_request *
b30ab791 1814find_request(struct drbd_device *device, struct rb_root *root, u64 id,
bc9c5c41 1815 sector_t sector, bool missing_ok, const char *func)
51624585 1816{
51624585
AG
1817 struct drbd_request *req;
1818
bc9c5c41
AG
1819 /* Request object according to our peer */
1820 req = (struct drbd_request *)(unsigned long)id;
5e472264 1821 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
668eebc6 1822 return req;
c3afd8f5 1823 if (!missing_ok) {
d0180171 1824 drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
c3afd8f5
AG
1825 (unsigned long)id, (unsigned long long)sector);
1826 }
51624585 1827 return NULL;
b411b363
PR
1828}
1829
bde89a9e 1830static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 1831{
9f4fe9ad 1832 struct drbd_peer_device *peer_device;
b30ab791 1833 struct drbd_device *device;
b411b363
PR
1834 struct drbd_request *req;
1835 sector_t sector;
82bc0194 1836 int err;
e658983a 1837 struct p_data *p = pi->data;
4a76b161 1838
9f4fe9ad
AG
1839 peer_device = conn_peer_device(connection, pi->vnr);
1840 if (!peer_device)
4a76b161 1841 return -EIO;
9f4fe9ad 1842 device = peer_device->device;
b411b363
PR
1843
1844 sector = be64_to_cpu(p->sector);
1845
0500813f 1846 spin_lock_irq(&device->resource->req_lock);
b30ab791 1847 req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
0500813f 1848 spin_unlock_irq(&device->resource->req_lock);
c3afd8f5 1849 if (unlikely(!req))
82bc0194 1850 return -EIO;
b411b363 1851
24c4830c 1852 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
b411b363
PR
1853 * special casing it there for the various failure cases.
1854 * still no race with drbd_fail_pending_reads */
69a22773 1855 err = recv_dless_read(peer_device, req, sector, pi->size);
82bc0194 1856 if (!err)
8554df1c 1857 req_mod(req, DATA_RECEIVED);
b411b363
PR
1858 /* else: nothing. handled from drbd_disconnect...
1859 * I don't think we may complete this just yet
1860 * in case we are "on-disconnect: freeze" */
1861
82bc0194 1862 return err;
b411b363
PR
1863}
1864
bde89a9e 1865static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 1866{
9f4fe9ad 1867 struct drbd_peer_device *peer_device;
b30ab791 1868 struct drbd_device *device;
b411b363 1869 sector_t sector;
82bc0194 1870 int err;
e658983a 1871 struct p_data *p = pi->data;
4a76b161 1872
9f4fe9ad
AG
1873 peer_device = conn_peer_device(connection, pi->vnr);
1874 if (!peer_device)
4a76b161 1875 return -EIO;
9f4fe9ad 1876 device = peer_device->device;
b411b363
PR
1877
1878 sector = be64_to_cpu(p->sector);
0b0ba1ef 1879 D_ASSERT(device, p->block_id == ID_SYNCER);
b411b363 1880
b30ab791 1881 if (get_ldev(device)) {
b411b363
PR
1882 /* data is submitted to disk within recv_resync_read.
1883 * corresponding put_ldev done below on error,
fcefa62e 1884 * or in drbd_peer_request_endio. */
a0fb3c47 1885 err = recv_resync_read(peer_device, sector, pi);
b411b363
PR
1886 } else {
1887 if (__ratelimit(&drbd_ratelimit_state))
d0180171 1888 drbd_err(device, "Can not write resync data to local disk.\n");
b411b363 1889
69a22773 1890 err = drbd_drain_block(peer_device, pi->size);
b411b363 1891
69a22773 1892 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
b411b363
PR
1893 }
1894
b30ab791 1895 atomic_add(pi->size >> 9, &device->rs_sect_in);
778f271d 1896
82bc0194 1897 return err;
b411b363
PR
1898}
1899
b30ab791 1900static void restart_conflicting_writes(struct drbd_device *device,
7be8da07 1901 sector_t sector, int size)
b411b363 1902{
7be8da07
AG
1903 struct drbd_interval *i;
1904 struct drbd_request *req;
1905
b30ab791 1906 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
7be8da07
AG
1907 if (!i->local)
1908 continue;
1909 req = container_of(i, struct drbd_request, i);
1910 if (req->rq_state & RQ_LOCAL_PENDING ||
1911 !(req->rq_state & RQ_POSTPONED))
1912 continue;
2312f0b3
LE
1913 /* as it is RQ_POSTPONED, this will cause it to
1914 * be queued on the retry workqueue. */
d4dabbe2 1915 __req_mod(req, CONFLICT_RESOLVED, NULL);
7be8da07
AG
1916 }
1917}
b411b363 1918
a990be46
AG
1919/*
1920 * e_end_block() is called in asender context via drbd_finish_peer_reqs().
b411b363 1921 */
99920dc5 1922static int e_end_block(struct drbd_work *w, int cancel)
b411b363 1923{
8050e6d0 1924 struct drbd_peer_request *peer_req =
a8cd15ba
AG
1925 container_of(w, struct drbd_peer_request, w);
1926 struct drbd_peer_device *peer_device = peer_req->peer_device;
1927 struct drbd_device *device = peer_device->device;
db830c46 1928 sector_t sector = peer_req->i.sector;
99920dc5 1929 int err = 0, pcmd;
b411b363 1930
303d1448 1931 if (peer_req->flags & EE_SEND_WRITE_ACK) {
db830c46 1932 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b30ab791
AG
1933 pcmd = (device->state.conn >= C_SYNC_SOURCE &&
1934 device->state.conn <= C_PAUSED_SYNC_T &&
db830c46 1935 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
b411b363 1936 P_RS_WRITE_ACK : P_WRITE_ACK;
a8cd15ba 1937 err = drbd_send_ack(peer_device, pcmd, peer_req);
b411b363 1938 if (pcmd == P_RS_WRITE_ACK)
b30ab791 1939 drbd_set_in_sync(device, sector, peer_req->i.size);
b411b363 1940 } else {
a8cd15ba 1941 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
b411b363
PR
1942 /* we expect it to be marked out of sync anyways...
1943 * maybe assert this? */
1944 }
b30ab791 1945 dec_unacked(device);
b411b363 1946 }
08d0dabf 1947
b411b363
PR
1948 /* we delete from the conflict detection hash _after_ we sent out the
1949 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
302bdeae 1950 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
0500813f 1951 spin_lock_irq(&device->resource->req_lock);
0b0ba1ef 1952 D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
b30ab791 1953 drbd_remove_epoch_entry_interval(device, peer_req);
7be8da07 1954 if (peer_req->flags & EE_RESTART_REQUESTS)
b30ab791 1955 restart_conflicting_writes(device, sector, peer_req->i.size);
0500813f 1956 spin_unlock_irq(&device->resource->req_lock);
bb3bfe96 1957 } else
0b0ba1ef 1958 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
b411b363 1959
a6b32bc3 1960 drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
b411b363 1961
99920dc5 1962 return err;
b411b363
PR
1963}
1964
a8cd15ba 1965static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
b411b363 1966{
8050e6d0 1967 struct drbd_peer_request *peer_req =
a8cd15ba
AG
1968 container_of(w, struct drbd_peer_request, w);
1969 struct drbd_peer_device *peer_device = peer_req->peer_device;
99920dc5 1970 int err;
b411b363 1971
a8cd15ba
AG
1972 err = drbd_send_ack(peer_device, ack, peer_req);
1973 dec_unacked(peer_device->device);
b411b363 1974
99920dc5 1975 return err;
b411b363
PR
1976}
1977
d4dabbe2 1978static int e_send_superseded(struct drbd_work *w, int unused)
7be8da07 1979{
a8cd15ba 1980 return e_send_ack(w, P_SUPERSEDED);
7be8da07
AG
1981}
1982
99920dc5 1983static int e_send_retry_write(struct drbd_work *w, int unused)
7be8da07 1984{
a8cd15ba
AG
1985 struct drbd_peer_request *peer_req =
1986 container_of(w, struct drbd_peer_request, w);
1987 struct drbd_connection *connection = peer_req->peer_device->connection;
7be8da07 1988
a8cd15ba 1989 return e_send_ack(w, connection->agreed_pro_version >= 100 ?
d4dabbe2 1990 P_RETRY_WRITE : P_SUPERSEDED);
7be8da07 1991}
b411b363 1992
3e394da1
AG
1993static bool seq_greater(u32 a, u32 b)
1994{
1995 /*
1996 * We assume 32-bit wrap-around here.
1997 * For 24-bit wrap-around, we would have to shift:
1998 * a <<= 8; b <<= 8;
1999 */
2000 return (s32)a - (s32)b > 0;
2001}
b411b363 2002
3e394da1
AG
2003static u32 seq_max(u32 a, u32 b)
2004{
2005 return seq_greater(a, b) ? a : b;
b411b363
PR
2006}
2007
69a22773 2008static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
3e394da1 2009{
69a22773 2010 struct drbd_device *device = peer_device->device;
3c13b680 2011 unsigned int newest_peer_seq;
3e394da1 2012
69a22773 2013 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
b30ab791
AG
2014 spin_lock(&device->peer_seq_lock);
2015 newest_peer_seq = seq_max(device->peer_seq, peer_seq);
2016 device->peer_seq = newest_peer_seq;
2017 spin_unlock(&device->peer_seq_lock);
2018 /* wake up only if we actually changed device->peer_seq */
3c13b680 2019 if (peer_seq == newest_peer_seq)
b30ab791 2020 wake_up(&device->seq_wait);
7be8da07 2021 }
b411b363
PR
2022}
2023
d93f6302 2024static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
b6a370ba 2025{
d93f6302
LE
2026 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
2027}
b6a370ba 2028
d93f6302 2029/* maybe change sync_ee into interval trees as well? */
b30ab791 2030static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
d93f6302
LE
2031{
2032 struct drbd_peer_request *rs_req;
b6a370ba
PR
2033 bool rv = 0;
2034
0500813f 2035 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 2036 list_for_each_entry(rs_req, &device->sync_ee, w.list) {
d93f6302
LE
2037 if (overlaps(peer_req->i.sector, peer_req->i.size,
2038 rs_req->i.sector, rs_req->i.size)) {
b6a370ba
PR
2039 rv = 1;
2040 break;
2041 }
2042 }
0500813f 2043 spin_unlock_irq(&device->resource->req_lock);
b6a370ba
PR
2044
2045 return rv;
2046}
2047
b411b363
PR
2048/* Called from receive_Data.
2049 * Synchronize packets on sock with packets on msock.
2050 *
2051 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
2052 * packet traveling on msock, they are still processed in the order they have
2053 * been sent.
2054 *
2055 * Note: we don't care for Ack packets overtaking P_DATA packets.
2056 *
b30ab791 2057 * In case packet_seq is larger than device->peer_seq number, there are
b411b363 2058 * outstanding packets on the msock. We wait for them to arrive.
b30ab791 2059 * In case we are the logically next packet, we update device->peer_seq
b411b363
PR
2060 * ourselves. Correctly handles 32bit wrap around.
2061 *
2062 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
2063 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
2064 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
2065 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
2066 *
2067 * returns 0 if we may process the packet,
2068 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
69a22773 2069static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
b411b363 2070{
69a22773 2071 struct drbd_device *device = peer_device->device;
b411b363 2072 DEFINE_WAIT(wait);
b411b363 2073 long timeout;
b874d231 2074 int ret = 0, tp;
7be8da07 2075
69a22773 2076 if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
7be8da07
AG
2077 return 0;
2078
b30ab791 2079 spin_lock(&device->peer_seq_lock);
b411b363 2080 for (;;) {
b30ab791
AG
2081 if (!seq_greater(peer_seq - 1, device->peer_seq)) {
2082 device->peer_seq = seq_max(device->peer_seq, peer_seq);
b411b363 2083 break;
7be8da07 2084 }
b874d231 2085
b411b363
PR
2086 if (signal_pending(current)) {
2087 ret = -ERESTARTSYS;
2088 break;
2089 }
b874d231
PR
2090
2091 rcu_read_lock();
a6b32bc3 2092 tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries;
b874d231
PR
2093 rcu_read_unlock();
2094
2095 if (!tp)
2096 break;
2097
2098 /* Only need to wait if two_primaries is enabled */
b30ab791
AG
2099 prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2100 spin_unlock(&device->peer_seq_lock);
44ed167d 2101 rcu_read_lock();
69a22773 2102 timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
44ed167d 2103 rcu_read_unlock();
71b1c1eb 2104 timeout = schedule_timeout(timeout);
b30ab791 2105 spin_lock(&device->peer_seq_lock);
7be8da07 2106 if (!timeout) {
b411b363 2107 ret = -ETIMEDOUT;
d0180171 2108 drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
b411b363
PR
2109 break;
2110 }
2111 }
b30ab791
AG
2112 spin_unlock(&device->peer_seq_lock);
2113 finish_wait(&device->seq_wait, &wait);
b411b363
PR
2114 return ret;
2115}
2116
688593c5
LE
2117/* see also bio_flags_to_wire()
2118 * DRBD_REQ_*, because we need to semantically map the flags to data packet
2119 * flags and back. We may replicate to other kernel versions. */
81f0ffd2 2120static unsigned long wire_flags_to_bio(u32 dpf)
76d2e7ec 2121{
688593c5
LE
2122 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
2123 (dpf & DP_FUA ? REQ_FUA : 0) |
2124 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
2125 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
76d2e7ec
PR
2126}
2127
b30ab791 2128static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
7be8da07
AG
2129 unsigned int size)
2130{
2131 struct drbd_interval *i;
2132
2133 repeat:
b30ab791 2134 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
7be8da07
AG
2135 struct drbd_request *req;
2136 struct bio_and_error m;
2137
2138 if (!i->local)
2139 continue;
2140 req = container_of(i, struct drbd_request, i);
2141 if (!(req->rq_state & RQ_POSTPONED))
2142 continue;
2143 req->rq_state &= ~RQ_POSTPONED;
2144 __req_mod(req, NEG_ACKED, &m);
0500813f 2145 spin_unlock_irq(&device->resource->req_lock);
7be8da07 2146 if (m.bio)
b30ab791 2147 complete_master_bio(device, &m);
0500813f 2148 spin_lock_irq(&device->resource->req_lock);
7be8da07
AG
2149 goto repeat;
2150 }
2151}
2152
b30ab791 2153static int handle_write_conflicts(struct drbd_device *device,
7be8da07
AG
2154 struct drbd_peer_request *peer_req)
2155{
e33b32de 2156 struct drbd_connection *connection = peer_req->peer_device->connection;
bde89a9e 2157 bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
7be8da07
AG
2158 sector_t sector = peer_req->i.sector;
2159 const unsigned int size = peer_req->i.size;
2160 struct drbd_interval *i;
2161 bool equal;
2162 int err;
2163
2164 /*
2165 * Inserting the peer request into the write_requests tree will prevent
2166 * new conflicting local requests from being added.
2167 */
b30ab791 2168 drbd_insert_interval(&device->write_requests, &peer_req->i);
7be8da07
AG
2169
2170 repeat:
b30ab791 2171 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
7be8da07
AG
2172 if (i == &peer_req->i)
2173 continue;
08d0dabf
LE
2174 if (i->completed)
2175 continue;
7be8da07
AG
2176
2177 if (!i->local) {
2178 /*
2179 * Our peer has sent a conflicting remote request; this
2180 * should not happen in a two-node setup. Wait for the
2181 * earlier peer request to complete.
2182 */
b30ab791 2183 err = drbd_wait_misc(device, i);
7be8da07
AG
2184 if (err)
2185 goto out;
2186 goto repeat;
2187 }
2188
2189 equal = i->sector == sector && i->size == size;
2190 if (resolve_conflicts) {
2191 /*
2192 * If the peer request is fully contained within the
d4dabbe2
LE
2193 * overlapping request, it can be considered overwritten
2194 * and thus superseded; otherwise, it will be retried
2195 * once all overlapping requests have completed.
7be8da07 2196 */
d4dabbe2 2197 bool superseded = i->sector <= sector && i->sector +
7be8da07
AG
2198 (i->size >> 9) >= sector + (size >> 9);
2199
2200 if (!equal)
d0180171 2201 drbd_alert(device, "Concurrent writes detected: "
7be8da07
AG
2202 "local=%llus +%u, remote=%llus +%u, "
2203 "assuming %s came first\n",
2204 (unsigned long long)i->sector, i->size,
2205 (unsigned long long)sector, size,
d4dabbe2 2206 superseded ? "local" : "remote");
7be8da07 2207
a8cd15ba 2208 peer_req->w.cb = superseded ? e_send_superseded :
7be8da07 2209 e_send_retry_write;
a8cd15ba 2210 list_add_tail(&peer_req->w.list, &device->done_ee);
e33b32de 2211 wake_asender(connection);
7be8da07
AG
2212
2213 err = -ENOENT;
2214 goto out;
2215 } else {
2216 struct drbd_request *req =
2217 container_of(i, struct drbd_request, i);
2218
2219 if (!equal)
d0180171 2220 drbd_alert(device, "Concurrent writes detected: "
7be8da07
AG
2221 "local=%llus +%u, remote=%llus +%u\n",
2222 (unsigned long long)i->sector, i->size,
2223 (unsigned long long)sector, size);
2224
2225 if (req->rq_state & RQ_LOCAL_PENDING ||
2226 !(req->rq_state & RQ_POSTPONED)) {
2227 /*
2228 * Wait for the node with the discard flag to
d4dabbe2
LE
2229 * decide if this request has been superseded
2230 * or needs to be retried.
2231 * Requests that have been superseded will
7be8da07
AG
2232 * disappear from the write_requests tree.
2233 *
2234 * In addition, wait for the conflicting
2235 * request to finish locally before submitting
2236 * the conflicting peer request.
2237 */
b30ab791 2238 err = drbd_wait_misc(device, &req->i);
7be8da07 2239 if (err) {
e33b32de 2240 _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
b30ab791 2241 fail_postponed_requests(device, sector, size);
7be8da07
AG
2242 goto out;
2243 }
2244 goto repeat;
2245 }
2246 /*
2247 * Remember to restart the conflicting requests after
2248 * the new peer request has completed.
2249 */
2250 peer_req->flags |= EE_RESTART_REQUESTS;
2251 }
2252 }
2253 err = 0;
2254
2255 out:
2256 if (err)
b30ab791 2257 drbd_remove_epoch_entry_interval(device, peer_req);
7be8da07
AG
2258 return err;
2259}
2260
b411b363 2261/* mirrored write */
bde89a9e 2262static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
b411b363 2263{
9f4fe9ad 2264 struct drbd_peer_device *peer_device;
b30ab791 2265 struct drbd_device *device;
21ae5d7f 2266 struct net_conf *nc;
b411b363 2267 sector_t sector;
db830c46 2268 struct drbd_peer_request *peer_req;
e658983a 2269 struct p_data *p = pi->data;
7be8da07 2270 u32 peer_seq = be32_to_cpu(p->seq_num);
b411b363
PR
2271 int rw = WRITE;
2272 u32 dp_flags;
302bdeae 2273 int err, tp;
b411b363 2274
9f4fe9ad
AG
2275 peer_device = conn_peer_device(connection, pi->vnr);
2276 if (!peer_device)
4a76b161 2277 return -EIO;
9f4fe9ad 2278 device = peer_device->device;
b411b363 2279
b30ab791 2280 if (!get_ldev(device)) {
82bc0194
AG
2281 int err2;
2282
69a22773
AG
2283 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
2284 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
bde89a9e 2285 atomic_inc(&connection->current_epoch->epoch_size);
69a22773 2286 err2 = drbd_drain_block(peer_device, pi->size);
82bc0194
AG
2287 if (!err)
2288 err = err2;
2289 return err;
b411b363
PR
2290 }
2291
fcefa62e
AG
2292 /*
2293 * Corresponding put_ldev done either below (on various errors), or in
2294 * drbd_peer_request_endio, if we successfully submit the data at the
2295 * end of this function.
2296 */
b411b363
PR
2297
2298 sector = be64_to_cpu(p->sector);
a0fb3c47 2299 peer_req = read_in_block(peer_device, p->block_id, sector, pi);
db830c46 2300 if (!peer_req) {
b30ab791 2301 put_ldev(device);
82bc0194 2302 return -EIO;
b411b363
PR
2303 }
2304
a8cd15ba 2305 peer_req->w.cb = e_end_block;
21ae5d7f
LE
2306 peer_req->submit_jif = jiffies;
2307 peer_req->flags |= EE_APPLICATION;
b411b363 2308
688593c5 2309 dp_flags = be32_to_cpu(p->dp_flags);
81f0ffd2 2310 rw |= wire_flags_to_bio(dp_flags);
a0fb3c47
LE
2311 if (pi->cmd == P_TRIM) {
2312 struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
2313 peer_req->flags |= EE_IS_TRIM;
2314 if (!blk_queue_discard(q))
2315 peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
2316 D_ASSERT(peer_device, peer_req->i.size > 0);
2317 D_ASSERT(peer_device, rw & REQ_DISCARD);
2318 D_ASSERT(peer_device, peer_req->pages == NULL);
2319 } else if (peer_req->pages == NULL) {
0b0ba1ef
AG
2320 D_ASSERT(device, peer_req->i.size == 0);
2321 D_ASSERT(device, dp_flags & DP_FLUSH);
a73ff323 2322 }
688593c5
LE
2323
2324 if (dp_flags & DP_MAY_SET_IN_SYNC)
db830c46 2325 peer_req->flags |= EE_MAY_SET_IN_SYNC;
688593c5 2326
bde89a9e
AG
2327 spin_lock(&connection->epoch_lock);
2328 peer_req->epoch = connection->current_epoch;
db830c46
AG
2329 atomic_inc(&peer_req->epoch->epoch_size);
2330 atomic_inc(&peer_req->epoch->active);
bde89a9e 2331 spin_unlock(&connection->epoch_lock);
b411b363 2332
302bdeae 2333 rcu_read_lock();
21ae5d7f
LE
2334 nc = rcu_dereference(peer_device->connection->net_conf);
2335 tp = nc->two_primaries;
2336 if (peer_device->connection->agreed_pro_version < 100) {
2337 switch (nc->wire_protocol) {
2338 case DRBD_PROT_C:
2339 dp_flags |= DP_SEND_WRITE_ACK;
2340 break;
2341 case DRBD_PROT_B:
2342 dp_flags |= DP_SEND_RECEIVE_ACK;
2343 break;
2344 }
2345 }
302bdeae 2346 rcu_read_unlock();
21ae5d7f
LE
2347
2348 if (dp_flags & DP_SEND_WRITE_ACK) {
2349 peer_req->flags |= EE_SEND_WRITE_ACK;
2350 inc_unacked(device);
2351 /* corresponding dec_unacked() in e_end_block()
2352 * respective _drbd_clear_done_ee */
2353 }
2354
2355 if (dp_flags & DP_SEND_RECEIVE_ACK) {
2356 /* I really don't like it that the receiver thread
2357 * sends on the msock, but anyways */
2358 drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
2359 }
2360
302bdeae 2361 if (tp) {
21ae5d7f
LE
2362 /* two primaries implies protocol C */
2363 D_ASSERT(device, dp_flags & DP_SEND_WRITE_ACK);
302bdeae 2364 peer_req->flags |= EE_IN_INTERVAL_TREE;
69a22773 2365 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
7be8da07 2366 if (err)
b411b363 2367 goto out_interrupted;
0500813f 2368 spin_lock_irq(&device->resource->req_lock);
b30ab791 2369 err = handle_write_conflicts(device, peer_req);
7be8da07 2370 if (err) {
0500813f 2371 spin_unlock_irq(&device->resource->req_lock);
7be8da07 2372 if (err == -ENOENT) {
b30ab791 2373 put_ldev(device);
82bc0194 2374 return 0;
b411b363 2375 }
7be8da07 2376 goto out_interrupted;
b411b363 2377 }
b874d231 2378 } else {
69a22773 2379 update_peer_seq(peer_device, peer_seq);
0500813f 2380 spin_lock_irq(&device->resource->req_lock);
b874d231 2381 }
a0fb3c47
LE
2382 /* if we use the zeroout fallback code, we process synchronously
2383 * and we wait for all pending requests, respectively wait for
2384 * active_ee to become empty in drbd_submit_peer_request();
2385 * better not add ourselves here. */
2386 if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0)
b9ed7080 2387 list_add_tail(&peer_req->w.list, &device->active_ee);
0500813f 2388 spin_unlock_irq(&device->resource->req_lock);
b411b363 2389
b30ab791
AG
2390 if (device->state.conn == C_SYNC_TARGET)
2391 wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
b411b363 2392
b30ab791 2393 if (device->state.pdsk < D_INCONSISTENT) {
b411b363 2394 /* In case we have the only disk of the cluster, */
b30ab791 2395 drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
db830c46 2396 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
4dd726f0 2397 drbd_al_begin_io(device, &peer_req->i);
21ae5d7f 2398 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
b411b363
PR
2399 }
2400
b30ab791 2401 err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR);
82bc0194
AG
2402 if (!err)
2403 return 0;
b411b363 2404
10f6d992 2405 /* don't care for the reason here */
d0180171 2406 drbd_err(device, "submit failed, triggering re-connect\n");
0500813f 2407 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 2408 list_del(&peer_req->w.list);
b30ab791 2409 drbd_remove_epoch_entry_interval(device, peer_req);
0500813f 2410 spin_unlock_irq(&device->resource->req_lock);
21ae5d7f
LE
2411 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) {
2412 peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
b30ab791 2413 drbd_al_complete_io(device, &peer_req->i);
21ae5d7f 2414 }
22cc37a9 2415
b411b363 2416out_interrupted:
bde89a9e 2417 drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP);
b30ab791
AG
2418 put_ldev(device);
2419 drbd_free_peer_req(device, peer_req);
82bc0194 2420 return err;
b411b363
PR
2421}
2422
0f0601f4
LE
2423/* We may throttle resync, if the lower device seems to be busy,
2424 * and current sync rate is above c_min_rate.
2425 *
2426 * To decide whether or not the lower device is busy, we use a scheme similar
2427 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2428 * (more than 64 sectors) of activity we cannot account for with our own resync
2429 * activity, it obviously is "busy".
2430 *
2431 * The current sync rate used here uses only the most recent two step marks,
2432 * to have a short time average so we can react faster.
2433 */
ad3fee79
LE
2434bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
2435 bool throttle_if_app_is_waiting)
0f0601f4 2436{
e3555d85 2437 struct lc_element *tmp;
ad3fee79 2438 bool throttle = drbd_rs_c_min_rate_throttle(device);
daeda1cc 2439
ad3fee79
LE
2440 if (!throttle || throttle_if_app_is_waiting)
2441 return throttle;
0f0601f4 2442
b30ab791
AG
2443 spin_lock_irq(&device->al_lock);
2444 tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
e3555d85
PR
2445 if (tmp) {
2446 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
e8299874
LE
2447 if (test_bit(BME_PRIORITY, &bm_ext->flags))
2448 throttle = false;
ad3fee79
LE
2449 /* Do not slow down if app IO is already waiting for this extent,
2450 * and our progress is necessary for application IO to complete. */
e3555d85 2451 }
b30ab791 2452 spin_unlock_irq(&device->al_lock);
e3555d85 2453
e8299874
LE
2454 return throttle;
2455}
2456
2457bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
2458{
2459 struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
2460 unsigned long db, dt, dbdt;
2461 unsigned int c_min_rate;
2462 int curr_events;
2463
2464 rcu_read_lock();
2465 c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
2466 rcu_read_unlock();
2467
2468 /* feature disabled? */
2469 if (c_min_rate == 0)
2470 return false;
2471
0f0601f4
LE
2472 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2473 (int)part_stat_read(&disk->part0, sectors[1]) -
b30ab791 2474 atomic_read(&device->rs_sect_ev);
ad3fee79
LE
2475
2476 if (atomic_read(&device->ap_actlog_cnt)
2477 || !device->rs_last_events || curr_events - device->rs_last_events > 64) {
0f0601f4
LE
2478 unsigned long rs_left;
2479 int i;
2480
b30ab791 2481 device->rs_last_events = curr_events;
0f0601f4
LE
2482
2483 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2484 * approx. */
b30ab791 2485 i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
2649f080 2486
b30ab791
AG
2487 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2488 rs_left = device->ov_left;
2649f080 2489 else
b30ab791 2490 rs_left = drbd_bm_total_weight(device) - device->rs_failed;
0f0601f4 2491
b30ab791 2492 dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
0f0601f4
LE
2493 if (!dt)
2494 dt++;
b30ab791 2495 db = device->rs_mark_left[i] - rs_left;
0f0601f4
LE
2496 dbdt = Bit2KB(db/dt);
2497
daeda1cc 2498 if (dbdt > c_min_rate)
e8299874 2499 return true;
0f0601f4 2500 }
e8299874 2501 return false;
0f0601f4
LE
2502}
2503
bde89a9e 2504static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
b411b363 2505{
9f4fe9ad 2506 struct drbd_peer_device *peer_device;
b30ab791 2507 struct drbd_device *device;
b411b363 2508 sector_t sector;
4a76b161 2509 sector_t capacity;
db830c46 2510 struct drbd_peer_request *peer_req;
b411b363 2511 struct digest_info *di = NULL;
b18b37be 2512 int size, verb;
b411b363 2513 unsigned int fault_type;
e658983a 2514 struct p_block_req *p = pi->data;
4a76b161 2515
9f4fe9ad
AG
2516 peer_device = conn_peer_device(connection, pi->vnr);
2517 if (!peer_device)
4a76b161 2518 return -EIO;
9f4fe9ad 2519 device = peer_device->device;
b30ab791 2520 capacity = drbd_get_capacity(device->this_bdev);
b411b363
PR
2521
2522 sector = be64_to_cpu(p->sector);
2523 size = be32_to_cpu(p->blksize);
2524
c670a398 2525 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
d0180171 2526 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
b411b363 2527 (unsigned long long)sector, size);
82bc0194 2528 return -EINVAL;
b411b363
PR
2529 }
2530 if (sector + (size>>9) > capacity) {
d0180171 2531 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
b411b363 2532 (unsigned long long)sector, size);
82bc0194 2533 return -EINVAL;
b411b363
PR
2534 }
2535
b30ab791 2536 if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
b18b37be 2537 verb = 1;
e2857216 2538 switch (pi->cmd) {
b18b37be 2539 case P_DATA_REQUEST:
69a22773 2540 drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
b18b37be
PR
2541 break;
2542 case P_RS_DATA_REQUEST:
2543 case P_CSUM_RS_REQUEST:
2544 case P_OV_REQUEST:
69a22773 2545 drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
b18b37be
PR
2546 break;
2547 case P_OV_REPLY:
2548 verb = 0;
b30ab791 2549 dec_rs_pending(device);
69a22773 2550 drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
b18b37be
PR
2551 break;
2552 default:
49ba9b1b 2553 BUG();
b18b37be
PR
2554 }
2555 if (verb && __ratelimit(&drbd_ratelimit_state))
d0180171 2556 drbd_err(device, "Can not satisfy peer's read request, "
b411b363 2557 "no local data.\n");
b18b37be 2558
a821cc4a 2559 /* drain possibly payload */
69a22773 2560 return drbd_drain_block(peer_device, pi->size);
b411b363
PR
2561 }
2562
2563 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2564 * "criss-cross" setup, that might cause write-out on some other DRBD,
2565 * which in turn might block on the other node at this very place. */
a0fb3c47
LE
2566 peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
2567 true /* has real payload */, GFP_NOIO);
db830c46 2568 if (!peer_req) {
b30ab791 2569 put_ldev(device);
82bc0194 2570 return -ENOMEM;
b411b363
PR
2571 }
2572
e2857216 2573 switch (pi->cmd) {
b411b363 2574 case P_DATA_REQUEST:
a8cd15ba 2575 peer_req->w.cb = w_e_end_data_req;
b411b363 2576 fault_type = DRBD_FAULT_DT_RD;
80a40e43 2577 /* application IO, don't drbd_rs_begin_io */
21ae5d7f 2578 peer_req->flags |= EE_APPLICATION;
80a40e43
LE
2579 goto submit;
2580
b411b363 2581 case P_RS_DATA_REQUEST:
a8cd15ba 2582 peer_req->w.cb = w_e_end_rsdata_req;
b411b363 2583 fault_type = DRBD_FAULT_RS_RD;
5f9915bb 2584 /* used in the sector offset progress display */
b30ab791 2585 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
b411b363
PR
2586 break;
2587
2588 case P_OV_REPLY:
2589 case P_CSUM_RS_REQUEST:
2590 fault_type = DRBD_FAULT_RS_RD;
e2857216 2591 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
b411b363
PR
2592 if (!di)
2593 goto out_free_e;
2594
e2857216 2595 di->digest_size = pi->size;
b411b363
PR
2596 di->digest = (((char *)di)+sizeof(struct digest_info));
2597
db830c46
AG
2598 peer_req->digest = di;
2599 peer_req->flags |= EE_HAS_DIGEST;
c36c3ced 2600
9f4fe9ad 2601 if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
b411b363
PR
2602 goto out_free_e;
2603
e2857216 2604 if (pi->cmd == P_CSUM_RS_REQUEST) {
9f4fe9ad 2605 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
a8cd15ba 2606 peer_req->w.cb = w_e_end_csum_rs_req;
5f9915bb 2607 /* used in the sector offset progress display */
b30ab791 2608 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
aaaba345
LE
2609 /* remember to report stats in drbd_resync_finished */
2610 device->use_csums = true;
e2857216 2611 } else if (pi->cmd == P_OV_REPLY) {
2649f080 2612 /* track progress, we may need to throttle */
b30ab791 2613 atomic_add(size >> 9, &device->rs_sect_in);
a8cd15ba 2614 peer_req->w.cb = w_e_end_ov_reply;
b30ab791 2615 dec_rs_pending(device);
0f0601f4
LE
2616 /* drbd_rs_begin_io done when we sent this request,
2617 * but accounting still needs to be done. */
2618 goto submit_for_resync;
b411b363
PR
2619 }
2620 break;
2621
2622 case P_OV_REQUEST:
b30ab791 2623 if (device->ov_start_sector == ~(sector_t)0 &&
9f4fe9ad 2624 peer_device->connection->agreed_pro_version >= 90) {
de228bba
LE
2625 unsigned long now = jiffies;
2626 int i;
b30ab791
AG
2627 device->ov_start_sector = sector;
2628 device->ov_position = sector;
2629 device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2630 device->rs_total = device->ov_left;
de228bba 2631 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
b30ab791
AG
2632 device->rs_mark_left[i] = device->ov_left;
2633 device->rs_mark_time[i] = now;
de228bba 2634 }
d0180171 2635 drbd_info(device, "Online Verify start sector: %llu\n",
b411b363
PR
2636 (unsigned long long)sector);
2637 }
a8cd15ba 2638 peer_req->w.cb = w_e_end_ov_req;
b411b363 2639 fault_type = DRBD_FAULT_RS_RD;
b411b363
PR
2640 break;
2641
b411b363 2642 default:
49ba9b1b 2643 BUG();
b411b363
PR
2644 }
2645
0f0601f4
LE
2646 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2647 * wrt the receiver, but it is not as straightforward as it may seem.
2648 * Various places in the resync start and stop logic assume resync
2649 * requests are processed in order, requeuing this on the worker thread
2650 * introduces a bunch of new code for synchronization between threads.
2651 *
2652 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2653 * "forever", throttling after drbd_rs_begin_io will lock that extent
2654 * for application writes for the same time. For now, just throttle
2655 * here, where the rest of the code expects the receiver to sleep for
2656 * a while, anyways.
2657 */
2658
2659 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2660 * this defers syncer requests for some time, before letting at least
2661 * on request through. The resync controller on the receiving side
2662 * will adapt to the incoming rate accordingly.
2663 *
2664 * We cannot throttle here if remote is Primary/SyncTarget:
2665 * we would also throttle its application reads.
2666 * In that case, throttling is done on the SyncTarget only.
2667 */
c5a2c150
LE
2668
2669 /* Even though this may be a resync request, we do add to "read_ee";
2670 * "sync_ee" is only used for resync WRITEs.
2671 * Add to list early, so debugfs can find this request
2672 * even if we have to sleep below. */
2673 spin_lock_irq(&device->resource->req_lock);
2674 list_add_tail(&peer_req->w.list, &device->read_ee);
2675 spin_unlock_irq(&device->resource->req_lock);
2676
ad3fee79
LE
2677 if (device->state.peer != R_PRIMARY
2678 && drbd_rs_should_slow_down(device, sector, false))
e3555d85 2679 schedule_timeout_uninterruptible(HZ/10);
b30ab791 2680 if (drbd_rs_begin_io(device, sector))
80a40e43 2681 goto out_free_e;
b411b363 2682
0f0601f4 2683submit_for_resync:
b30ab791 2684 atomic_add(size >> 9, &device->rs_sect_ev);
0f0601f4 2685
80a40e43 2686submit:
b30ab791 2687 inc_unacked(device);
b30ab791 2688 if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0)
82bc0194 2689 return 0;
b411b363 2690
10f6d992 2691 /* don't care for the reason here */
d0180171 2692 drbd_err(device, "submit failed, triggering re-connect\n");
c5a2c150
LE
2693
2694out_free_e:
0500813f 2695 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 2696 list_del(&peer_req->w.list);
0500813f 2697 spin_unlock_irq(&device->resource->req_lock);
22cc37a9
LE
2698 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2699
b30ab791
AG
2700 put_ldev(device);
2701 drbd_free_peer_req(device, peer_req);
82bc0194 2702 return -EIO;
b411b363
PR
2703}
2704
69a22773
AG
2705/**
2706 * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries
2707 */
2708static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
b411b363 2709{
69a22773 2710 struct drbd_device *device = peer_device->device;
b411b363
PR
2711 int self, peer, rv = -100;
2712 unsigned long ch_self, ch_peer;
44ed167d 2713 enum drbd_after_sb_p after_sb_0p;
b411b363 2714
b30ab791
AG
2715 self = device->ldev->md.uuid[UI_BITMAP] & 1;
2716 peer = device->p_uuid[UI_BITMAP] & 1;
b411b363 2717
b30ab791
AG
2718 ch_peer = device->p_uuid[UI_SIZE];
2719 ch_self = device->comm_bm_set;
b411b363 2720
44ed167d 2721 rcu_read_lock();
69a22773 2722 after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
44ed167d
PR
2723 rcu_read_unlock();
2724 switch (after_sb_0p) {
b411b363
PR
2725 case ASB_CONSENSUS:
2726 case ASB_DISCARD_SECONDARY:
2727 case ASB_CALL_HELPER:
44ed167d 2728 case ASB_VIOLENTLY:
d0180171 2729 drbd_err(device, "Configuration error.\n");
b411b363
PR
2730 break;
2731 case ASB_DISCONNECT:
2732 break;
2733 case ASB_DISCARD_YOUNGER_PRI:
2734 if (self == 0 && peer == 1) {
2735 rv = -1;
2736 break;
2737 }
2738 if (self == 1 && peer == 0) {
2739 rv = 1;
2740 break;
2741 }
2742 /* Else fall through to one of the other strategies... */
2743 case ASB_DISCARD_OLDER_PRI:
2744 if (self == 0 && peer == 1) {
2745 rv = 1;
2746 break;
2747 }
2748 if (self == 1 && peer == 0) {
2749 rv = -1;
2750 break;
2751 }
2752 /* Else fall through to one of the other strategies... */
d0180171 2753 drbd_warn(device, "Discard younger/older primary did not find a decision\n"
b411b363
PR
2754 "Using discard-least-changes instead\n");
2755 case ASB_DISCARD_ZERO_CHG:
2756 if (ch_peer == 0 && ch_self == 0) {
69a22773 2757 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
b411b363
PR
2758 ? -1 : 1;
2759 break;
2760 } else {
2761 if (ch_peer == 0) { rv = 1; break; }
2762 if (ch_self == 0) { rv = -1; break; }
2763 }
44ed167d 2764 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
b411b363
PR
2765 break;
2766 case ASB_DISCARD_LEAST_CHG:
2767 if (ch_self < ch_peer)
2768 rv = -1;
2769 else if (ch_self > ch_peer)
2770 rv = 1;
2771 else /* ( ch_self == ch_peer ) */
2772 /* Well, then use something else. */
69a22773 2773 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
b411b363
PR
2774 ? -1 : 1;
2775 break;
2776 case ASB_DISCARD_LOCAL:
2777 rv = -1;
2778 break;
2779 case ASB_DISCARD_REMOTE:
2780 rv = 1;
2781 }
2782
2783 return rv;
2784}
2785
69a22773
AG
2786/**
2787 * drbd_asb_recover_1p - Recover after split-brain with one remaining primary
2788 */
2789static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
b411b363 2790{
69a22773 2791 struct drbd_device *device = peer_device->device;
6184ea21 2792 int hg, rv = -100;
44ed167d 2793 enum drbd_after_sb_p after_sb_1p;
b411b363 2794
44ed167d 2795 rcu_read_lock();
69a22773 2796 after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
44ed167d
PR
2797 rcu_read_unlock();
2798 switch (after_sb_1p) {
b411b363
PR
2799 case ASB_DISCARD_YOUNGER_PRI:
2800 case ASB_DISCARD_OLDER_PRI:
2801 case ASB_DISCARD_LEAST_CHG:
2802 case ASB_DISCARD_LOCAL:
2803 case ASB_DISCARD_REMOTE:
44ed167d 2804 case ASB_DISCARD_ZERO_CHG:
d0180171 2805 drbd_err(device, "Configuration error.\n");
b411b363
PR
2806 break;
2807 case ASB_DISCONNECT:
2808 break;
2809 case ASB_CONSENSUS:
69a22773 2810 hg = drbd_asb_recover_0p(peer_device);
b30ab791 2811 if (hg == -1 && device->state.role == R_SECONDARY)
b411b363 2812 rv = hg;
b30ab791 2813 if (hg == 1 && device->state.role == R_PRIMARY)
b411b363
PR
2814 rv = hg;
2815 break;
2816 case ASB_VIOLENTLY:
69a22773 2817 rv = drbd_asb_recover_0p(peer_device);
b411b363
PR
2818 break;
2819 case ASB_DISCARD_SECONDARY:
b30ab791 2820 return device->state.role == R_PRIMARY ? 1 : -1;
b411b363 2821 case ASB_CALL_HELPER:
69a22773 2822 hg = drbd_asb_recover_0p(peer_device);
b30ab791 2823 if (hg == -1 && device->state.role == R_PRIMARY) {
bb437946
AG
2824 enum drbd_state_rv rv2;
2825
b411b363
PR
2826 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2827 * we might be here in C_WF_REPORT_PARAMS which is transient.
2828 * we do not need to wait for the after state change work either. */
b30ab791 2829 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
bb437946 2830 if (rv2 != SS_SUCCESS) {
b30ab791 2831 drbd_khelper(device, "pri-lost-after-sb");
b411b363 2832 } else {
d0180171 2833 drbd_warn(device, "Successfully gave up primary role.\n");
b411b363
PR
2834 rv = hg;
2835 }
2836 } else
2837 rv = hg;
2838 }
2839
2840 return rv;
2841}
2842
69a22773
AG
2843/**
2844 * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries
2845 */
2846static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
b411b363 2847{
69a22773 2848 struct drbd_device *device = peer_device->device;
6184ea21 2849 int hg, rv = -100;
44ed167d 2850 enum drbd_after_sb_p after_sb_2p;
b411b363 2851
44ed167d 2852 rcu_read_lock();
69a22773 2853 after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
44ed167d
PR
2854 rcu_read_unlock();
2855 switch (after_sb_2p) {
b411b363
PR
2856 case ASB_DISCARD_YOUNGER_PRI:
2857 case ASB_DISCARD_OLDER_PRI:
2858 case ASB_DISCARD_LEAST_CHG:
2859 case ASB_DISCARD_LOCAL:
2860 case ASB_DISCARD_REMOTE:
2861 case ASB_CONSENSUS:
2862 case ASB_DISCARD_SECONDARY:
44ed167d 2863 case ASB_DISCARD_ZERO_CHG:
d0180171 2864 drbd_err(device, "Configuration error.\n");
b411b363
PR
2865 break;
2866 case ASB_VIOLENTLY:
69a22773 2867 rv = drbd_asb_recover_0p(peer_device);
b411b363
PR
2868 break;
2869 case ASB_DISCONNECT:
2870 break;
2871 case ASB_CALL_HELPER:
69a22773 2872 hg = drbd_asb_recover_0p(peer_device);
b411b363 2873 if (hg == -1) {
bb437946
AG
2874 enum drbd_state_rv rv2;
2875
b411b363
PR
2876 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2877 * we might be here in C_WF_REPORT_PARAMS which is transient.
2878 * we do not need to wait for the after state change work either. */
b30ab791 2879 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
bb437946 2880 if (rv2 != SS_SUCCESS) {
b30ab791 2881 drbd_khelper(device, "pri-lost-after-sb");
b411b363 2882 } else {
d0180171 2883 drbd_warn(device, "Successfully gave up primary role.\n");
b411b363
PR
2884 rv = hg;
2885 }
2886 } else
2887 rv = hg;
2888 }
2889
2890 return rv;
2891}
2892
b30ab791 2893static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
b411b363
PR
2894 u64 bits, u64 flags)
2895{
2896 if (!uuid) {
d0180171 2897 drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
b411b363
PR
2898 return;
2899 }
d0180171 2900 drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
b411b363
PR
2901 text,
2902 (unsigned long long)uuid[UI_CURRENT],
2903 (unsigned long long)uuid[UI_BITMAP],
2904 (unsigned long long)uuid[UI_HISTORY_START],
2905 (unsigned long long)uuid[UI_HISTORY_END],
2906 (unsigned long long)bits,
2907 (unsigned long long)flags);
2908}
2909
2910/*
2911 100 after split brain try auto recover
2912 2 C_SYNC_SOURCE set BitMap
2913 1 C_SYNC_SOURCE use BitMap
2914 0 no Sync
2915 -1 C_SYNC_TARGET use BitMap
2916 -2 C_SYNC_TARGET set BitMap
2917 -100 after split brain, disconnect
2918-1000 unrelated data
4a23f264
PR
2919-1091 requires proto 91
2920-1096 requires proto 96
b411b363 2921 */
44a4d551 2922static int drbd_uuid_compare(struct drbd_device *const device, int *rule_nr) __must_hold(local)
b411b363 2923{
44a4d551
LE
2924 struct drbd_peer_device *const peer_device = first_peer_device(device);
2925 struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
b411b363
PR
2926 u64 self, peer;
2927 int i, j;
2928
b30ab791
AG
2929 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2930 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
b411b363
PR
2931
2932 *rule_nr = 10;
2933 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2934 return 0;
2935
2936 *rule_nr = 20;
2937 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2938 peer != UUID_JUST_CREATED)
2939 return -2;
2940
2941 *rule_nr = 30;
2942 if (self != UUID_JUST_CREATED &&
2943 (peer == UUID_JUST_CREATED || peer == (u64)0))
2944 return 2;
2945
2946 if (self == peer) {
2947 int rct, dc; /* roles at crash time */
2948
b30ab791 2949 if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
b411b363 2950
44a4d551 2951 if (connection->agreed_pro_version < 91)
4a23f264 2952 return -1091;
b411b363 2953
b30ab791
AG
2954 if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2955 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
d0180171 2956 drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
b30ab791
AG
2957 drbd_uuid_move_history(device);
2958 device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
2959 device->ldev->md.uuid[UI_BITMAP] = 0;
b411b363 2960
b30ab791
AG
2961 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2962 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
b411b363
PR
2963 *rule_nr = 34;
2964 } else {
d0180171 2965 drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
b411b363
PR
2966 *rule_nr = 36;
2967 }
2968
2969 return 1;
2970 }
2971
b30ab791 2972 if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
b411b363 2973
44a4d551 2974 if (connection->agreed_pro_version < 91)
4a23f264 2975 return -1091;
b411b363 2976
b30ab791
AG
2977 if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2978 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
d0180171 2979 drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
b411b363 2980
b30ab791
AG
2981 device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
2982 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
2983 device->p_uuid[UI_BITMAP] = 0UL;
b411b363 2984
b30ab791 2985 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
b411b363
PR
2986 *rule_nr = 35;
2987 } else {
d0180171 2988 drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
b411b363
PR
2989 *rule_nr = 37;
2990 }
2991
2992 return -1;
2993 }
2994
2995 /* Common power [off|failure] */
b30ab791
AG
2996 rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
2997 (device->p_uuid[UI_FLAGS] & 2);
b411b363
PR
2998 /* lowest bit is set when we were primary,
2999 * next bit (weight 2) is set when peer was primary */
3000 *rule_nr = 40;
3001
3002 switch (rct) {
3003 case 0: /* !self_pri && !peer_pri */ return 0;
3004 case 1: /* self_pri && !peer_pri */ return 1;
3005 case 2: /* !self_pri && peer_pri */ return -1;
3006 case 3: /* self_pri && peer_pri */
44a4d551 3007 dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
b411b363
PR
3008 return dc ? -1 : 1;
3009 }
3010 }
3011
3012 *rule_nr = 50;
b30ab791 3013 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
b411b363
PR
3014 if (self == peer)
3015 return -1;
3016
3017 *rule_nr = 51;
b30ab791 3018 peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
b411b363 3019 if (self == peer) {
44a4d551 3020 if (connection->agreed_pro_version < 96 ?
b30ab791
AG
3021 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
3022 (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
3023 peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
b411b363
PR
3024 /* The last P_SYNC_UUID did not get though. Undo the last start of
3025 resync as sync source modifications of the peer's UUIDs. */
3026
44a4d551 3027 if (connection->agreed_pro_version < 91)
4a23f264 3028 return -1091;
b411b363 3029
b30ab791
AG
3030 device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
3031 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
4a23f264 3032
d0180171 3033 drbd_info(device, "Lost last syncUUID packet, corrected:\n");
b30ab791 3034 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
4a23f264 3035
b411b363
PR
3036 return -1;
3037 }
3038 }
3039
3040 *rule_nr = 60;
b30ab791 3041 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
b411b363 3042 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
b30ab791 3043 peer = device->p_uuid[i] & ~((u64)1);
b411b363
PR
3044 if (self == peer)
3045 return -2;
3046 }
3047
3048 *rule_nr = 70;
b30ab791
AG
3049 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3050 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
b411b363
PR
3051 if (self == peer)
3052 return 1;
3053
3054 *rule_nr = 71;
b30ab791 3055 self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
b411b363 3056 if (self == peer) {
44a4d551 3057 if (connection->agreed_pro_version < 96 ?
b30ab791
AG
3058 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
3059 (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
3060 self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
b411b363
PR
3061 /* The last P_SYNC_UUID did not get though. Undo the last start of
3062 resync as sync source modifications of our UUIDs. */
3063
44a4d551 3064 if (connection->agreed_pro_version < 91)
4a23f264 3065 return -1091;
b411b363 3066
b30ab791
AG
3067 __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
3068 __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
b411b363 3069
d0180171 3070 drbd_info(device, "Last syncUUID did not get through, corrected:\n");
b30ab791
AG
3071 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3072 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
b411b363
PR
3073
3074 return 1;
3075 }
3076 }
3077
3078
3079 *rule_nr = 80;
b30ab791 3080 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
b411b363 3081 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
b30ab791 3082 self = device->ldev->md.uuid[i] & ~((u64)1);
b411b363
PR
3083 if (self == peer)
3084 return 2;
3085 }
3086
3087 *rule_nr = 90;
b30ab791
AG
3088 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3089 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
b411b363
PR
3090 if (self == peer && self != ((u64)0))
3091 return 100;
3092
3093 *rule_nr = 100;
3094 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
b30ab791 3095 self = device->ldev->md.uuid[i] & ~((u64)1);
b411b363 3096 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
b30ab791 3097 peer = device->p_uuid[j] & ~((u64)1);
b411b363
PR
3098 if (self == peer)
3099 return -100;
3100 }
3101 }
3102
3103 return -1000;
3104}
3105
3106/* drbd_sync_handshake() returns the new conn state on success, or
3107 CONN_MASK (-1) on failure.
3108 */
69a22773
AG
3109static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
3110 enum drbd_role peer_role,
b411b363
PR
3111 enum drbd_disk_state peer_disk) __must_hold(local)
3112{
69a22773 3113 struct drbd_device *device = peer_device->device;
b411b363
PR
3114 enum drbd_conns rv = C_MASK;
3115 enum drbd_disk_state mydisk;
44ed167d 3116 struct net_conf *nc;
6dff2902 3117 int hg, rule_nr, rr_conflict, tentative;
b411b363 3118
b30ab791 3119 mydisk = device->state.disk;
b411b363 3120 if (mydisk == D_NEGOTIATING)
b30ab791 3121 mydisk = device->new_state_tmp.disk;
b411b363 3122
d0180171 3123 drbd_info(device, "drbd_sync_handshake:\n");
9f2247bb 3124
b30ab791
AG
3125 spin_lock_irq(&device->ldev->md.uuid_lock);
3126 drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3127 drbd_uuid_dump(device, "peer", device->p_uuid,
3128 device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
b411b363 3129
b30ab791
AG
3130 hg = drbd_uuid_compare(device, &rule_nr);
3131 spin_unlock_irq(&device->ldev->md.uuid_lock);
b411b363 3132
d0180171 3133 drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
b411b363
PR
3134
3135 if (hg == -1000) {
d0180171 3136 drbd_alert(device, "Unrelated data, aborting!\n");
b411b363
PR
3137 return C_MASK;
3138 }
4a23f264 3139 if (hg < -1000) {
d0180171 3140 drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
b411b363
PR
3141 return C_MASK;
3142 }
3143
3144 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3145 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
3146 int f = (hg == -100) || abs(hg) == 2;
3147 hg = mydisk > D_INCONSISTENT ? 1 : -1;
3148 if (f)
3149 hg = hg*2;
d0180171 3150 drbd_info(device, "Becoming sync %s due to disk states.\n",
b411b363
PR
3151 hg > 0 ? "source" : "target");
3152 }
3153
3a11a487 3154 if (abs(hg) == 100)
b30ab791 3155 drbd_khelper(device, "initial-split-brain");
3a11a487 3156
44ed167d 3157 rcu_read_lock();
69a22773 3158 nc = rcu_dereference(peer_device->connection->net_conf);
44ed167d
PR
3159
3160 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
b30ab791 3161 int pcount = (device->state.role == R_PRIMARY)
b411b363
PR
3162 + (peer_role == R_PRIMARY);
3163 int forced = (hg == -100);
3164
3165 switch (pcount) {
3166 case 0:
69a22773 3167 hg = drbd_asb_recover_0p(peer_device);
b411b363
PR
3168 break;
3169 case 1:
69a22773 3170 hg = drbd_asb_recover_1p(peer_device);
b411b363
PR
3171 break;
3172 case 2:
69a22773 3173 hg = drbd_asb_recover_2p(peer_device);
b411b363
PR
3174 break;
3175 }
3176 if (abs(hg) < 100) {
d0180171 3177 drbd_warn(device, "Split-Brain detected, %d primaries, "
b411b363
PR
3178 "automatically solved. Sync from %s node\n",
3179 pcount, (hg < 0) ? "peer" : "this");
3180 if (forced) {
d0180171 3181 drbd_warn(device, "Doing a full sync, since"
b411b363
PR
3182 " UUIDs where ambiguous.\n");
3183 hg = hg*2;
3184 }
3185 }
3186 }
3187
3188 if (hg == -100) {
b30ab791 3189 if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
b411b363 3190 hg = -1;
b30ab791 3191 if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
b411b363
PR
3192 hg = 1;
3193
3194 if (abs(hg) < 100)
d0180171 3195 drbd_warn(device, "Split-Brain detected, manually solved. "
b411b363
PR
3196 "Sync from %s node\n",
3197 (hg < 0) ? "peer" : "this");
3198 }
44ed167d 3199 rr_conflict = nc->rr_conflict;
6dff2902 3200 tentative = nc->tentative;
44ed167d 3201 rcu_read_unlock();
b411b363
PR
3202
3203 if (hg == -100) {
580b9767
LE
3204 /* FIXME this log message is not correct if we end up here
3205 * after an attempted attach on a diskless node.
3206 * We just refuse to attach -- well, we drop the "connection"
3207 * to that disk, in a way... */
d0180171 3208 drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
b30ab791 3209 drbd_khelper(device, "split-brain");
b411b363
PR
3210 return C_MASK;
3211 }
3212
3213 if (hg > 0 && mydisk <= D_INCONSISTENT) {
d0180171 3214 drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
b411b363
PR
3215 return C_MASK;
3216 }
3217
3218 if (hg < 0 && /* by intention we do not use mydisk here. */
b30ab791 3219 device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
44ed167d 3220 switch (rr_conflict) {
b411b363 3221 case ASB_CALL_HELPER:
b30ab791 3222 drbd_khelper(device, "pri-lost");
b411b363
PR
3223 /* fall through */
3224 case ASB_DISCONNECT:
d0180171 3225 drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
b411b363
PR
3226 return C_MASK;
3227 case ASB_VIOLENTLY:
d0180171 3228 drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
b411b363
PR
3229 "assumption\n");
3230 }
3231 }
3232
69a22773 3233 if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
cf14c2e9 3234 if (hg == 0)
d0180171 3235 drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
cf14c2e9 3236 else
d0180171 3237 drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
cf14c2e9
PR
3238 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3239 abs(hg) >= 2 ? "full" : "bit-map based");
3240 return C_MASK;
3241 }
3242
b411b363 3243 if (abs(hg) >= 2) {
d0180171 3244 drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
b30ab791 3245 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
20ceb2b2 3246 BM_LOCKED_SET_ALLOWED))
b411b363
PR
3247 return C_MASK;
3248 }
3249
3250 if (hg > 0) { /* become sync source. */
3251 rv = C_WF_BITMAP_S;
3252 } else if (hg < 0) { /* become sync target */
3253 rv = C_WF_BITMAP_T;
3254 } else {
3255 rv = C_CONNECTED;
b30ab791 3256 if (drbd_bm_total_weight(device)) {
d0180171 3257 drbd_info(device, "No resync, but %lu bits in bitmap!\n",
b30ab791 3258 drbd_bm_total_weight(device));
b411b363
PR
3259 }
3260 }
3261
3262 return rv;
3263}
3264
f179d76d 3265static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
b411b363
PR
3266{
3267 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
f179d76d
PR
3268 if (peer == ASB_DISCARD_REMOTE)
3269 return ASB_DISCARD_LOCAL;
b411b363
PR
3270
3271 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
f179d76d
PR
3272 if (peer == ASB_DISCARD_LOCAL)
3273 return ASB_DISCARD_REMOTE;
b411b363
PR
3274
3275 /* everything else is valid if they are equal on both sides. */
f179d76d 3276 return peer;
b411b363
PR
3277}
3278
bde89a9e 3279static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3280{
e658983a 3281 struct p_protocol *p = pi->data;
036b17ea
PR
3282 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3283 int p_proto, p_discard_my_data, p_two_primaries, cf;
3284 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3285 char integrity_alg[SHARED_SECRET_MAX] = "";
accdbcc5 3286 struct crypto_hash *peer_integrity_tfm = NULL;
7aca6c75 3287 void *int_dig_in = NULL, *int_dig_vv = NULL;
b411b363 3288
b411b363
PR
3289 p_proto = be32_to_cpu(p->protocol);
3290 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3291 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3292 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
b411b363 3293 p_two_primaries = be32_to_cpu(p->two_primaries);
cf14c2e9 3294 cf = be32_to_cpu(p->conn_flags);
6139f60d 3295 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
cf14c2e9 3296
bde89a9e 3297 if (connection->agreed_pro_version >= 87) {
86db0618 3298 int err;
cf14c2e9 3299
88104ca4 3300 if (pi->size > sizeof(integrity_alg))
86db0618 3301 return -EIO;
bde89a9e 3302 err = drbd_recv_all(connection, integrity_alg, pi->size);
86db0618
AG
3303 if (err)
3304 return err;
036b17ea 3305 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
b411b363
PR
3306 }
3307
7d4c782c 3308 if (pi->cmd != P_PROTOCOL_UPDATE) {
bde89a9e 3309 clear_bit(CONN_DRY_RUN, &connection->flags);
b411b363 3310
fbc12f45 3311 if (cf & CF_DRY_RUN)
bde89a9e 3312 set_bit(CONN_DRY_RUN, &connection->flags);
b411b363 3313
fbc12f45 3314 rcu_read_lock();
bde89a9e 3315 nc = rcu_dereference(connection->net_conf);
b411b363 3316
fbc12f45 3317 if (p_proto != nc->wire_protocol) {
1ec861eb 3318 drbd_err(connection, "incompatible %s settings\n", "protocol");
fbc12f45
AG
3319 goto disconnect_rcu_unlock;
3320 }
b411b363 3321
fbc12f45 3322 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
1ec861eb 3323 drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
fbc12f45
AG
3324 goto disconnect_rcu_unlock;
3325 }
b411b363 3326
fbc12f45 3327 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
1ec861eb 3328 drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
fbc12f45
AG
3329 goto disconnect_rcu_unlock;
3330 }
b411b363 3331
fbc12f45 3332 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
1ec861eb 3333 drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
fbc12f45
AG
3334 goto disconnect_rcu_unlock;
3335 }
b411b363 3336
fbc12f45 3337 if (p_discard_my_data && nc->discard_my_data) {
1ec861eb 3338 drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
fbc12f45
AG
3339 goto disconnect_rcu_unlock;
3340 }
b411b363 3341
fbc12f45 3342 if (p_two_primaries != nc->two_primaries) {
1ec861eb 3343 drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
fbc12f45
AG
3344 goto disconnect_rcu_unlock;
3345 }
b411b363 3346
fbc12f45 3347 if (strcmp(integrity_alg, nc->integrity_alg)) {
1ec861eb 3348 drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
fbc12f45
AG
3349 goto disconnect_rcu_unlock;
3350 }
b411b363 3351
fbc12f45 3352 rcu_read_unlock();
b411b363
PR
3353 }
3354
7d4c782c
AG
3355 if (integrity_alg[0]) {
3356 int hash_size;
3357
3358 /*
3359 * We can only change the peer data integrity algorithm
3360 * here. Changing our own data integrity algorithm
3361 * requires that we send a P_PROTOCOL_UPDATE packet at
3362 * the same time; otherwise, the peer has no way to
3363 * tell between which packets the algorithm should
3364 * change.
3365 */
b411b363 3366
7d4c782c
AG
3367 peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
3368 if (!peer_integrity_tfm) {
1ec861eb 3369 drbd_err(connection, "peer data-integrity-alg %s not supported\n",
7d4c782c
AG
3370 integrity_alg);
3371 goto disconnect;
3372 }
b411b363 3373
7d4c782c
AG
3374 hash_size = crypto_hash_digestsize(peer_integrity_tfm);
3375 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3376 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3377 if (!(int_dig_in && int_dig_vv)) {
1ec861eb 3378 drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
b411b363
PR
3379 goto disconnect;
3380 }
b411b363
PR
3381 }
3382
7d4c782c
AG
3383 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3384 if (!new_net_conf) {
1ec861eb 3385 drbd_err(connection, "Allocation of new net_conf failed\n");
7d4c782c
AG
3386 goto disconnect;
3387 }
3388
bde89a9e 3389 mutex_lock(&connection->data.mutex);
0500813f 3390 mutex_lock(&connection->resource->conf_update);
bde89a9e 3391 old_net_conf = connection->net_conf;
7d4c782c
AG
3392 *new_net_conf = *old_net_conf;
3393
3394 new_net_conf->wire_protocol = p_proto;
3395 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3396 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3397 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3398 new_net_conf->two_primaries = p_two_primaries;
3399
bde89a9e 3400 rcu_assign_pointer(connection->net_conf, new_net_conf);
0500813f 3401 mutex_unlock(&connection->resource->conf_update);
bde89a9e 3402 mutex_unlock(&connection->data.mutex);
7d4c782c 3403
bde89a9e
AG
3404 crypto_free_hash(connection->peer_integrity_tfm);
3405 kfree(connection->int_dig_in);
3406 kfree(connection->int_dig_vv);
3407 connection->peer_integrity_tfm = peer_integrity_tfm;
3408 connection->int_dig_in = int_dig_in;
3409 connection->int_dig_vv = int_dig_vv;
7d4c782c
AG
3410
3411 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
1ec861eb 3412 drbd_info(connection, "peer data-integrity-alg: %s\n",
7d4c782c
AG
3413 integrity_alg[0] ? integrity_alg : "(none)");
3414
3415 synchronize_rcu();
3416 kfree(old_net_conf);
82bc0194 3417 return 0;
b411b363 3418
44ed167d
PR
3419disconnect_rcu_unlock:
3420 rcu_read_unlock();
b411b363 3421disconnect:
b792c35c 3422 crypto_free_hash(peer_integrity_tfm);
036b17ea
PR
3423 kfree(int_dig_in);
3424 kfree(int_dig_vv);
bde89a9e 3425 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 3426 return -EIO;
b411b363
PR
3427}
3428
3429/* helper function
3430 * input: alg name, feature name
3431 * return: NULL (alg name was "")
3432 * ERR_PTR(error) if something goes wrong
3433 * or the crypto hash ptr, if it worked out ok. */
8ce953aa 3434static struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
b411b363
PR
3435 const char *alg, const char *name)
3436{
3437 struct crypto_hash *tfm;
3438
3439 if (!alg[0])
3440 return NULL;
3441
3442 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
3443 if (IS_ERR(tfm)) {
d0180171 3444 drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
b411b363
PR
3445 alg, name, PTR_ERR(tfm));
3446 return tfm;
3447 }
b411b363
PR
3448 return tfm;
3449}
3450
bde89a9e 3451static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
4a76b161 3452{
bde89a9e 3453 void *buffer = connection->data.rbuf;
4a76b161
AG
3454 int size = pi->size;
3455
3456 while (size) {
3457 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
bde89a9e 3458 s = drbd_recv(connection, buffer, s);
4a76b161
AG
3459 if (s <= 0) {
3460 if (s < 0)
3461 return s;
3462 break;
3463 }
3464 size -= s;
3465 }
3466 if (size)
3467 return -EIO;
3468 return 0;
3469}
3470
3471/*
3472 * config_unknown_volume - device configuration command for unknown volume
3473 *
3474 * When a device is added to an existing connection, the node on which the
3475 * device is added first will send configuration commands to its peer but the
3476 * peer will not know about the device yet. It will warn and ignore these
3477 * commands. Once the device is added on the second node, the second node will
3478 * send the same device configuration commands, but in the other direction.
3479 *
3480 * (We can also end up here if drbd is misconfigured.)
3481 */
bde89a9e 3482static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
4a76b161 3483{
1ec861eb 3484 drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
2fcb8f30 3485 cmdname(pi->cmd), pi->vnr);
bde89a9e 3486 return ignore_remaining_packet(connection, pi);
4a76b161
AG
3487}
3488
bde89a9e 3489static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3490{
9f4fe9ad 3491 struct drbd_peer_device *peer_device;
b30ab791 3492 struct drbd_device *device;
e658983a 3493 struct p_rs_param_95 *p;
b411b363
PR
3494 unsigned int header_size, data_size, exp_max_sz;
3495 struct crypto_hash *verify_tfm = NULL;
3496 struct crypto_hash *csums_tfm = NULL;
2ec91e0e 3497 struct net_conf *old_net_conf, *new_net_conf = NULL;
813472ce 3498 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
bde89a9e 3499 const int apv = connection->agreed_pro_version;
813472ce 3500 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
778f271d 3501 int fifo_size = 0;
82bc0194 3502 int err;
b411b363 3503
9f4fe9ad
AG
3504 peer_device = conn_peer_device(connection, pi->vnr);
3505 if (!peer_device)
bde89a9e 3506 return config_unknown_volume(connection, pi);
9f4fe9ad 3507 device = peer_device->device;
b411b363
PR
3508
3509 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3510 : apv == 88 ? sizeof(struct p_rs_param)
3511 + SHARED_SECRET_MAX
8e26f9cc
PR
3512 : apv <= 94 ? sizeof(struct p_rs_param_89)
3513 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
b411b363 3514
e2857216 3515 if (pi->size > exp_max_sz) {
d0180171 3516 drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
e2857216 3517 pi->size, exp_max_sz);
82bc0194 3518 return -EIO;
b411b363
PR
3519 }
3520
3521 if (apv <= 88) {
e658983a 3522 header_size = sizeof(struct p_rs_param);
e2857216 3523 data_size = pi->size - header_size;
8e26f9cc 3524 } else if (apv <= 94) {
e658983a 3525 header_size = sizeof(struct p_rs_param_89);
e2857216 3526 data_size = pi->size - header_size;
0b0ba1ef 3527 D_ASSERT(device, data_size == 0);
8e26f9cc 3528 } else {
e658983a 3529 header_size = sizeof(struct p_rs_param_95);
e2857216 3530 data_size = pi->size - header_size;
0b0ba1ef 3531 D_ASSERT(device, data_size == 0);
b411b363
PR
3532 }
3533
3534 /* initialize verify_alg and csums_alg */
e658983a 3535 p = pi->data;
b411b363
PR
3536 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3537
9f4fe9ad 3538 err = drbd_recv_all(peer_device->connection, p, header_size);
82bc0194
AG
3539 if (err)
3540 return err;
b411b363 3541
0500813f 3542 mutex_lock(&connection->resource->conf_update);
9f4fe9ad 3543 old_net_conf = peer_device->connection->net_conf;
b30ab791 3544 if (get_ldev(device)) {
813472ce
PR
3545 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3546 if (!new_disk_conf) {
b30ab791 3547 put_ldev(device);
0500813f 3548 mutex_unlock(&connection->resource->conf_update);
d0180171 3549 drbd_err(device, "Allocation of new disk_conf failed\n");
813472ce
PR
3550 return -ENOMEM;
3551 }
daeda1cc 3552
b30ab791 3553 old_disk_conf = device->ldev->disk_conf;
813472ce 3554 *new_disk_conf = *old_disk_conf;
b411b363 3555
6394b935 3556 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
813472ce 3557 }
b411b363
PR
3558
3559 if (apv >= 88) {
3560 if (apv == 88) {
5de73827 3561 if (data_size > SHARED_SECRET_MAX || data_size == 0) {
d0180171 3562 drbd_err(device, "verify-alg of wrong size, "
5de73827
PR
3563 "peer wants %u, accepting only up to %u byte\n",
3564 data_size, SHARED_SECRET_MAX);
813472ce
PR
3565 err = -EIO;
3566 goto reconnect;
b411b363
PR
3567 }
3568
9f4fe9ad 3569 err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
813472ce
PR
3570 if (err)
3571 goto reconnect;
b411b363
PR
3572 /* we expect NUL terminated string */
3573 /* but just in case someone tries to be evil */
0b0ba1ef 3574 D_ASSERT(device, p->verify_alg[data_size-1] == 0);
b411b363
PR
3575 p->verify_alg[data_size-1] = 0;
3576
3577 } else /* apv >= 89 */ {
3578 /* we still expect NUL terminated strings */
3579 /* but just in case someone tries to be evil */
0b0ba1ef
AG
3580 D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3581 D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
b411b363
PR
3582 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3583 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3584 }
3585
2ec91e0e 3586 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
b30ab791 3587 if (device->state.conn == C_WF_REPORT_PARAMS) {
d0180171 3588 drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
2ec91e0e 3589 old_net_conf->verify_alg, p->verify_alg);
b411b363
PR
3590 goto disconnect;
3591 }
b30ab791 3592 verify_tfm = drbd_crypto_alloc_digest_safe(device,
b411b363
PR
3593 p->verify_alg, "verify-alg");
3594 if (IS_ERR(verify_tfm)) {
3595 verify_tfm = NULL;
3596 goto disconnect;
3597 }
3598 }
3599
2ec91e0e 3600 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
b30ab791 3601 if (device->state.conn == C_WF_REPORT_PARAMS) {
d0180171 3602 drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
2ec91e0e 3603 old_net_conf->csums_alg, p->csums_alg);
b411b363
PR
3604 goto disconnect;
3605 }
b30ab791 3606 csums_tfm = drbd_crypto_alloc_digest_safe(device,
b411b363
PR
3607 p->csums_alg, "csums-alg");
3608 if (IS_ERR(csums_tfm)) {
3609 csums_tfm = NULL;
3610 goto disconnect;
3611 }
3612 }
3613
813472ce 3614 if (apv > 94 && new_disk_conf) {
daeda1cc
PR
3615 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3616 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3617 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3618 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
778f271d 3619
daeda1cc 3620 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
b30ab791 3621 if (fifo_size != device->rs_plan_s->size) {
813472ce
PR
3622 new_plan = fifo_alloc(fifo_size);
3623 if (!new_plan) {
d0180171 3624 drbd_err(device, "kmalloc of fifo_buffer failed");
b30ab791 3625 put_ldev(device);
778f271d
PR
3626 goto disconnect;
3627 }
3628 }
8e26f9cc 3629 }
b411b363 3630
91fd4dad 3631 if (verify_tfm || csums_tfm) {
2ec91e0e
PR
3632 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3633 if (!new_net_conf) {
d0180171 3634 drbd_err(device, "Allocation of new net_conf failed\n");
91fd4dad
PR
3635 goto disconnect;
3636 }
3637
2ec91e0e 3638 *new_net_conf = *old_net_conf;
91fd4dad
PR
3639
3640 if (verify_tfm) {
2ec91e0e
PR
3641 strcpy(new_net_conf->verify_alg, p->verify_alg);
3642 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
9f4fe9ad
AG
3643 crypto_free_hash(peer_device->connection->verify_tfm);
3644 peer_device->connection->verify_tfm = verify_tfm;
d0180171 3645 drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
91fd4dad
PR
3646 }
3647 if (csums_tfm) {
2ec91e0e
PR
3648 strcpy(new_net_conf->csums_alg, p->csums_alg);
3649 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
9f4fe9ad
AG
3650 crypto_free_hash(peer_device->connection->csums_tfm);
3651 peer_device->connection->csums_tfm = csums_tfm;
d0180171 3652 drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
91fd4dad 3653 }
bde89a9e 3654 rcu_assign_pointer(connection->net_conf, new_net_conf);
778f271d 3655 }
b411b363
PR
3656 }
3657
813472ce 3658 if (new_disk_conf) {
b30ab791
AG
3659 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
3660 put_ldev(device);
813472ce
PR
3661 }
3662
3663 if (new_plan) {
b30ab791
AG
3664 old_plan = device->rs_plan_s;
3665 rcu_assign_pointer(device->rs_plan_s, new_plan);
b411b363 3666 }
daeda1cc 3667
0500813f 3668 mutex_unlock(&connection->resource->conf_update);
daeda1cc
PR
3669 synchronize_rcu();
3670 if (new_net_conf)
3671 kfree(old_net_conf);
3672 kfree(old_disk_conf);
813472ce 3673 kfree(old_plan);
daeda1cc 3674
82bc0194 3675 return 0;
b411b363 3676
813472ce
PR
3677reconnect:
3678 if (new_disk_conf) {
b30ab791 3679 put_ldev(device);
813472ce
PR
3680 kfree(new_disk_conf);
3681 }
0500813f 3682 mutex_unlock(&connection->resource->conf_update);
813472ce
PR
3683 return -EIO;
3684
b411b363 3685disconnect:
813472ce
PR
3686 kfree(new_plan);
3687 if (new_disk_conf) {
b30ab791 3688 put_ldev(device);
813472ce
PR
3689 kfree(new_disk_conf);
3690 }
0500813f 3691 mutex_unlock(&connection->resource->conf_update);
b411b363
PR
3692 /* just for completeness: actually not needed,
3693 * as this is not reached if csums_tfm was ok. */
3694 crypto_free_hash(csums_tfm);
3695 /* but free the verify_tfm again, if csums_tfm did not work out */
3696 crypto_free_hash(verify_tfm);
9f4fe9ad 3697 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 3698 return -EIO;
b411b363
PR
3699}
3700
b411b363 3701/* warn if the arguments differ by more than 12.5% */
b30ab791 3702static void warn_if_differ_considerably(struct drbd_device *device,
b411b363
PR
3703 const char *s, sector_t a, sector_t b)
3704{
3705 sector_t d;
3706 if (a == 0 || b == 0)
3707 return;
3708 d = (a > b) ? (a - b) : (b - a);
3709 if (d > (a>>3) || d > (b>>3))
d0180171 3710 drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
b411b363
PR
3711 (unsigned long long)a, (unsigned long long)b);
3712}
3713
bde89a9e 3714static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3715{
9f4fe9ad 3716 struct drbd_peer_device *peer_device;
b30ab791 3717 struct drbd_device *device;
e658983a 3718 struct p_sizes *p = pi->data;
e96c9633 3719 enum determine_dev_size dd = DS_UNCHANGED;
6a8d68b1 3720 sector_t p_size, p_usize, p_csize, my_usize;
b411b363 3721 int ldsc = 0; /* local disk size changed */
e89b591c 3722 enum dds_flags ddsf;
b411b363 3723
9f4fe9ad
AG
3724 peer_device = conn_peer_device(connection, pi->vnr);
3725 if (!peer_device)
bde89a9e 3726 return config_unknown_volume(connection, pi);
9f4fe9ad 3727 device = peer_device->device;
4a76b161 3728
b411b363
PR
3729 p_size = be64_to_cpu(p->d_size);
3730 p_usize = be64_to_cpu(p->u_size);
6a8d68b1 3731 p_csize = be64_to_cpu(p->c_size);
b411b363 3732
b411b363
PR
3733 /* just store the peer's disk size for now.
3734 * we still need to figure out whether we accept that. */
b30ab791 3735 device->p_size = p_size;
b411b363 3736
b30ab791 3737 if (get_ldev(device)) {
daeda1cc 3738 rcu_read_lock();
b30ab791 3739 my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
daeda1cc
PR
3740 rcu_read_unlock();
3741
b30ab791
AG
3742 warn_if_differ_considerably(device, "lower level device sizes",
3743 p_size, drbd_get_max_capacity(device->ldev));
3744 warn_if_differ_considerably(device, "user requested size",
daeda1cc 3745 p_usize, my_usize);
b411b363
PR
3746
3747 /* if this is the first connect, or an otherwise expected
3748 * param exchange, choose the minimum */
b30ab791 3749 if (device->state.conn == C_WF_REPORT_PARAMS)
daeda1cc 3750 p_usize = min_not_zero(my_usize, p_usize);
b411b363
PR
3751
3752 /* Never shrink a device with usable data during connect.
3753 But allow online shrinking if we are connected. */
b30ab791
AG
3754 if (drbd_new_dev_size(device, device->ldev, p_usize, 0) <
3755 drbd_get_capacity(device->this_bdev) &&
3756 device->state.disk >= D_OUTDATED &&
3757 device->state.conn < C_CONNECTED) {
d0180171 3758 drbd_err(device, "The peer's disk size is too small!\n");
9f4fe9ad 3759 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
b30ab791 3760 put_ldev(device);
82bc0194 3761 return -EIO;
b411b363 3762 }
daeda1cc
PR
3763
3764 if (my_usize != p_usize) {
3765 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3766
3767 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3768 if (!new_disk_conf) {
d0180171 3769 drbd_err(device, "Allocation of new disk_conf failed\n");
b30ab791 3770 put_ldev(device);
daeda1cc
PR
3771 return -ENOMEM;
3772 }
3773
0500813f 3774 mutex_lock(&connection->resource->conf_update);
b30ab791 3775 old_disk_conf = device->ldev->disk_conf;
daeda1cc
PR
3776 *new_disk_conf = *old_disk_conf;
3777 new_disk_conf->disk_size = p_usize;
3778
b30ab791 3779 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
0500813f 3780 mutex_unlock(&connection->resource->conf_update);
daeda1cc
PR
3781 synchronize_rcu();
3782 kfree(old_disk_conf);
3783
d0180171 3784 drbd_info(device, "Peer sets u_size to %lu sectors\n",
daeda1cc 3785 (unsigned long)my_usize);
b411b363 3786 }
daeda1cc 3787
b30ab791 3788 put_ldev(device);
b411b363 3789 }
b411b363 3790
20c68fde 3791 device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
20c68fde
LE
3792 /* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size().
3793 In case we cleared the QUEUE_FLAG_DISCARD from our queue in
3794 drbd_reconsider_max_bio_size(), we can be sure that after
3795 drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
3796
e89b591c 3797 ddsf = be16_to_cpu(p->dds_flags);
b30ab791 3798 if (get_ldev(device)) {
8fe39aac 3799 drbd_reconsider_max_bio_size(device, device->ldev);
b30ab791
AG
3800 dd = drbd_determine_dev_size(device, ddsf, NULL);
3801 put_ldev(device);
e96c9633 3802 if (dd == DS_ERROR)
82bc0194 3803 return -EIO;
b30ab791 3804 drbd_md_sync(device);
b411b363 3805 } else {
6a8d68b1
LE
3806 /*
3807 * I am diskless, need to accept the peer's *current* size.
3808 * I must NOT accept the peers backing disk size,
3809 * it may have been larger than mine all along...
3810 *
3811 * At this point, the peer knows more about my disk, or at
3812 * least about what we last agreed upon, than myself.
3813 * So if his c_size is less than his d_size, the most likely
3814 * reason is that *my* d_size was smaller last time we checked.
3815 *
3816 * However, if he sends a zero current size,
3817 * take his (user-capped or) backing disk size anyways.
3818 */
8fe39aac 3819 drbd_reconsider_max_bio_size(device, NULL);
6a8d68b1 3820 drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size);
b411b363
PR
3821 }
3822
b30ab791
AG
3823 if (get_ldev(device)) {
3824 if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
3825 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
b411b363
PR
3826 ldsc = 1;
3827 }
3828
b30ab791 3829 put_ldev(device);
b411b363
PR
3830 }
3831
b30ab791 3832 if (device->state.conn > C_WF_REPORT_PARAMS) {
b411b363 3833 if (be64_to_cpu(p->c_size) !=
b30ab791 3834 drbd_get_capacity(device->this_bdev) || ldsc) {
b411b363
PR
3835 /* we have different sizes, probably peer
3836 * needs to know my new size... */
69a22773 3837 drbd_send_sizes(peer_device, 0, ddsf);
b411b363 3838 }
b30ab791
AG
3839 if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
3840 (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
3841 if (device->state.pdsk >= D_INCONSISTENT &&
3842 device->state.disk >= D_INCONSISTENT) {
e89b591c 3843 if (ddsf & DDSF_NO_RESYNC)
d0180171 3844 drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
e89b591c 3845 else
b30ab791 3846 resync_after_online_grow(device);
e89b591c 3847 } else
b30ab791 3848 set_bit(RESYNC_AFTER_NEG, &device->flags);
b411b363
PR
3849 }
3850 }
3851
82bc0194 3852 return 0;
b411b363
PR
3853}
3854
bde89a9e 3855static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3856{
9f4fe9ad 3857 struct drbd_peer_device *peer_device;
b30ab791 3858 struct drbd_device *device;
e658983a 3859 struct p_uuids *p = pi->data;
b411b363 3860 u64 *p_uuid;
62b0da3a 3861 int i, updated_uuids = 0;
b411b363 3862
9f4fe9ad
AG
3863 peer_device = conn_peer_device(connection, pi->vnr);
3864 if (!peer_device)
bde89a9e 3865 return config_unknown_volume(connection, pi);
9f4fe9ad 3866 device = peer_device->device;
4a76b161 3867
b411b363 3868 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
063eacf8 3869 if (!p_uuid) {
d0180171 3870 drbd_err(device, "kmalloc of p_uuid failed\n");
063eacf8
JW
3871 return false;
3872 }
b411b363
PR
3873
3874 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3875 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3876
b30ab791
AG
3877 kfree(device->p_uuid);
3878 device->p_uuid = p_uuid;
b411b363 3879
b30ab791
AG
3880 if (device->state.conn < C_CONNECTED &&
3881 device->state.disk < D_INCONSISTENT &&
3882 device->state.role == R_PRIMARY &&
3883 (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
d0180171 3884 drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
b30ab791 3885 (unsigned long long)device->ed_uuid);
9f4fe9ad 3886 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 3887 return -EIO;
b411b363
PR
3888 }
3889
b30ab791 3890 if (get_ldev(device)) {
b411b363 3891 int skip_initial_sync =
b30ab791 3892 device->state.conn == C_CONNECTED &&
9f4fe9ad 3893 peer_device->connection->agreed_pro_version >= 90 &&
b30ab791 3894 device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
b411b363
PR
3895 (p_uuid[UI_FLAGS] & 8);
3896 if (skip_initial_sync) {
d0180171 3897 drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
b30ab791 3898 drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
20ceb2b2
LE
3899 "clear_n_write from receive_uuids",
3900 BM_LOCKED_TEST_ALLOWED);
b30ab791
AG
3901 _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
3902 _drbd_uuid_set(device, UI_BITMAP, 0);
3903 _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
b411b363 3904 CS_VERBOSE, NULL);
b30ab791 3905 drbd_md_sync(device);
62b0da3a 3906 updated_uuids = 1;
b411b363 3907 }
b30ab791
AG
3908 put_ldev(device);
3909 } else if (device->state.disk < D_INCONSISTENT &&
3910 device->state.role == R_PRIMARY) {
18a50fa2
PR
3911 /* I am a diskless primary, the peer just created a new current UUID
3912 for me. */
b30ab791 3913 updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
b411b363
PR
3914 }
3915
3916 /* Before we test for the disk state, we should wait until an eventually
3917 ongoing cluster wide state change is finished. That is important if
3918 we are primary and are detaching from our disk. We need to see the
3919 new disk state... */
b30ab791
AG
3920 mutex_lock(device->state_mutex);
3921 mutex_unlock(device->state_mutex);
3922 if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
3923 updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
62b0da3a
LE
3924
3925 if (updated_uuids)
b30ab791 3926 drbd_print_uuids(device, "receiver updated UUIDs to");
b411b363 3927
82bc0194 3928 return 0;
b411b363
PR
3929}
3930
3931/**
3932 * convert_state() - Converts the peer's view of the cluster state to our point of view
3933 * @ps: The state as seen by the peer.
3934 */
3935static union drbd_state convert_state(union drbd_state ps)
3936{
3937 union drbd_state ms;
3938
3939 static enum drbd_conns c_tab[] = {
369bea63 3940 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
b411b363
PR
3941 [C_CONNECTED] = C_CONNECTED,
3942
3943 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3944 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3945 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3946 [C_VERIFY_S] = C_VERIFY_T,
3947 [C_MASK] = C_MASK,
3948 };
3949
3950 ms.i = ps.i;
3951
3952 ms.conn = c_tab[ps.conn];
3953 ms.peer = ps.role;
3954 ms.role = ps.peer;
3955 ms.pdsk = ps.disk;
3956 ms.disk = ps.pdsk;
3957 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3958
3959 return ms;
3960}
3961
bde89a9e 3962static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3963{
9f4fe9ad 3964 struct drbd_peer_device *peer_device;
b30ab791 3965 struct drbd_device *device;
e658983a 3966 struct p_req_state *p = pi->data;
b411b363 3967 union drbd_state mask, val;
bf885f8a 3968 enum drbd_state_rv rv;
b411b363 3969
9f4fe9ad
AG
3970 peer_device = conn_peer_device(connection, pi->vnr);
3971 if (!peer_device)
4a76b161 3972 return -EIO;
9f4fe9ad 3973 device = peer_device->device;
4a76b161 3974
b411b363
PR
3975 mask.i = be32_to_cpu(p->mask);
3976 val.i = be32_to_cpu(p->val);
3977
9f4fe9ad 3978 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
b30ab791 3979 mutex_is_locked(device->state_mutex)) {
69a22773 3980 drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
82bc0194 3981 return 0;
b411b363
PR
3982 }
3983
3984 mask = convert_state(mask);
3985 val = convert_state(val);
3986
b30ab791 3987 rv = drbd_change_state(device, CS_VERBOSE, mask, val);
69a22773 3988 drbd_send_sr_reply(peer_device, rv);
b411b363 3989
b30ab791 3990 drbd_md_sync(device);
b411b363 3991
82bc0194 3992 return 0;
b411b363
PR
3993}
3994
bde89a9e 3995static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
b411b363 3996{
e658983a 3997 struct p_req_state *p = pi->data;
b411b363 3998 union drbd_state mask, val;
bf885f8a 3999 enum drbd_state_rv rv;
b411b363 4000
b411b363
PR
4001 mask.i = be32_to_cpu(p->mask);
4002 val.i = be32_to_cpu(p->val);
4003
bde89a9e
AG
4004 if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
4005 mutex_is_locked(&connection->cstate_mutex)) {
4006 conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
82bc0194 4007 return 0;
b411b363
PR
4008 }
4009
4010 mask = convert_state(mask);
4011 val = convert_state(val);
4012
bde89a9e
AG
4013 rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
4014 conn_send_sr_reply(connection, rv);
b411b363 4015
82bc0194 4016 return 0;
b411b363
PR
4017}
4018
bde89a9e 4019static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
b411b363 4020{
9f4fe9ad 4021 struct drbd_peer_device *peer_device;
b30ab791 4022 struct drbd_device *device;
e658983a 4023 struct p_state *p = pi->data;
4ac4aada 4024 union drbd_state os, ns, peer_state;
b411b363 4025 enum drbd_disk_state real_peer_disk;
65d922c3 4026 enum chg_state_flags cs_flags;
b411b363
PR
4027 int rv;
4028
9f4fe9ad
AG
4029 peer_device = conn_peer_device(connection, pi->vnr);
4030 if (!peer_device)
bde89a9e 4031 return config_unknown_volume(connection, pi);
9f4fe9ad 4032 device = peer_device->device;
4a76b161 4033
b411b363
PR
4034 peer_state.i = be32_to_cpu(p->state);
4035
4036 real_peer_disk = peer_state.disk;
4037 if (peer_state.disk == D_NEGOTIATING) {
b30ab791 4038 real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
d0180171 4039 drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
b411b363
PR
4040 }
4041
0500813f 4042 spin_lock_irq(&device->resource->req_lock);
b411b363 4043 retry:
b30ab791 4044 os = ns = drbd_read_state(device);
0500813f 4045 spin_unlock_irq(&device->resource->req_lock);
b411b363 4046
545752d5
LE
4047 /* If some other part of the code (asender thread, timeout)
4048 * already decided to close the connection again,
4049 * we must not "re-establish" it here. */
4050 if (os.conn <= C_TEAR_DOWN)
58ffa580 4051 return -ECONNRESET;
545752d5 4052
40424e4a
LE
4053 /* If this is the "end of sync" confirmation, usually the peer disk
4054 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
4055 * set) resync started in PausedSyncT, or if the timing of pause-/
4056 * unpause-sync events has been "just right", the peer disk may
4057 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
4058 */
4059 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
4060 real_peer_disk == D_UP_TO_DATE &&
e9ef7bb6
LE
4061 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
4062 /* If we are (becoming) SyncSource, but peer is still in sync
4063 * preparation, ignore its uptodate-ness to avoid flapping, it
4064 * will change to inconsistent once the peer reaches active
4065 * syncing states.
4066 * It may have changed syncer-paused flags, however, so we
4067 * cannot ignore this completely. */
4068 if (peer_state.conn > C_CONNECTED &&
4069 peer_state.conn < C_SYNC_SOURCE)
4070 real_peer_disk = D_INCONSISTENT;
4071
4072 /* if peer_state changes to connected at the same time,
4073 * it explicitly notifies us that it finished resync.
4074 * Maybe we should finish it up, too? */
4075 else if (os.conn >= C_SYNC_SOURCE &&
4076 peer_state.conn == C_CONNECTED) {
b30ab791
AG
4077 if (drbd_bm_total_weight(device) <= device->rs_failed)
4078 drbd_resync_finished(device);
82bc0194 4079 return 0;
e9ef7bb6
LE
4080 }
4081 }
4082
02b91b55
LE
4083 /* explicit verify finished notification, stop sector reached. */
4084 if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
4085 peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
b30ab791
AG
4086 ov_out_of_sync_print(device);
4087 drbd_resync_finished(device);
58ffa580 4088 return 0;
02b91b55
LE
4089 }
4090
e9ef7bb6
LE
4091 /* peer says his disk is inconsistent, while we think it is uptodate,
4092 * and this happens while the peer still thinks we have a sync going on,
4093 * but we think we are already done with the sync.
4094 * We ignore this to avoid flapping pdsk.
4095 * This should not happen, if the peer is a recent version of drbd. */
4096 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
4097 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
4098 real_peer_disk = D_UP_TO_DATE;
4099
4ac4aada
LE
4100 if (ns.conn == C_WF_REPORT_PARAMS)
4101 ns.conn = C_CONNECTED;
b411b363 4102
67531718
PR
4103 if (peer_state.conn == C_AHEAD)
4104 ns.conn = C_BEHIND;
4105
b30ab791
AG
4106 if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4107 get_ldev_if_state(device, D_NEGOTIATING)) {
b411b363
PR
4108 int cr; /* consider resync */
4109
4110 /* if we established a new connection */
4ac4aada 4111 cr = (os.conn < C_CONNECTED);
b411b363
PR
4112 /* if we had an established connection
4113 * and one of the nodes newly attaches a disk */
4ac4aada 4114 cr |= (os.conn == C_CONNECTED &&
b411b363 4115 (peer_state.disk == D_NEGOTIATING ||
4ac4aada 4116 os.disk == D_NEGOTIATING));
b411b363
PR
4117 /* if we have both been inconsistent, and the peer has been
4118 * forced to be UpToDate with --overwrite-data */
b30ab791 4119 cr |= test_bit(CONSIDER_RESYNC, &device->flags);
b411b363
PR
4120 /* if we had been plain connected, and the admin requested to
4121 * start a sync by "invalidate" or "invalidate-remote" */
4ac4aada 4122 cr |= (os.conn == C_CONNECTED &&
b411b363
PR
4123 (peer_state.conn >= C_STARTING_SYNC_S &&
4124 peer_state.conn <= C_WF_BITMAP_T));
4125
4126 if (cr)
69a22773 4127 ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
b411b363 4128
b30ab791 4129 put_ldev(device);
4ac4aada
LE
4130 if (ns.conn == C_MASK) {
4131 ns.conn = C_CONNECTED;
b30ab791
AG
4132 if (device->state.disk == D_NEGOTIATING) {
4133 drbd_force_state(device, NS(disk, D_FAILED));
b411b363 4134 } else if (peer_state.disk == D_NEGOTIATING) {
d0180171 4135 drbd_err(device, "Disk attach process on the peer node was aborted.\n");
b411b363 4136 peer_state.disk = D_DISKLESS;
580b9767 4137 real_peer_disk = D_DISKLESS;
b411b363 4138 } else {
9f4fe9ad 4139 if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
82bc0194 4140 return -EIO;
0b0ba1ef 4141 D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
9f4fe9ad 4142 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 4143 return -EIO;
b411b363
PR
4144 }
4145 }
4146 }
4147
0500813f 4148 spin_lock_irq(&device->resource->req_lock);
b30ab791 4149 if (os.i != drbd_read_state(device).i)
b411b363 4150 goto retry;
b30ab791 4151 clear_bit(CONSIDER_RESYNC, &device->flags);
b411b363
PR
4152 ns.peer = peer_state.role;
4153 ns.pdsk = real_peer_disk;
4154 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
4ac4aada 4155 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
b30ab791 4156 ns.disk = device->new_state_tmp.disk;
4ac4aada 4157 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
b30ab791
AG
4158 if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4159 test_bit(NEW_CUR_UUID, &device->flags)) {
8554df1c 4160 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
481c6f50 4161 for temporal network outages! */
0500813f 4162 spin_unlock_irq(&device->resource->req_lock);
d0180171 4163 drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
9f4fe9ad 4164 tl_clear(peer_device->connection);
b30ab791
AG
4165 drbd_uuid_new_current(device);
4166 clear_bit(NEW_CUR_UUID, &device->flags);
9f4fe9ad 4167 conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
82bc0194 4168 return -EIO;
481c6f50 4169 }
b30ab791
AG
4170 rv = _drbd_set_state(device, ns, cs_flags, NULL);
4171 ns = drbd_read_state(device);
0500813f 4172 spin_unlock_irq(&device->resource->req_lock);
b411b363
PR
4173
4174 if (rv < SS_SUCCESS) {
9f4fe9ad 4175 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 4176 return -EIO;
b411b363
PR
4177 }
4178
4ac4aada
LE
4179 if (os.conn > C_WF_REPORT_PARAMS) {
4180 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
b411b363
PR
4181 peer_state.disk != D_NEGOTIATING ) {
4182 /* we want resync, peer has not yet decided to sync... */
4183 /* Nowadays only used when forcing a node into primary role and
4184 setting its disk to UpToDate with that */
69a22773
AG
4185 drbd_send_uuids(peer_device);
4186 drbd_send_current_state(peer_device);
b411b363
PR
4187 }
4188 }
4189
b30ab791 4190 clear_bit(DISCARD_MY_DATA, &device->flags);
b411b363 4191
b30ab791 4192 drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
b411b363 4193
82bc0194 4194 return 0;
b411b363
PR
4195}
4196
bde89a9e 4197static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
b411b363 4198{
9f4fe9ad 4199 struct drbd_peer_device *peer_device;
b30ab791 4200 struct drbd_device *device;
e658983a 4201 struct p_rs_uuid *p = pi->data;
4a76b161 4202
9f4fe9ad
AG
4203 peer_device = conn_peer_device(connection, pi->vnr);
4204 if (!peer_device)
4a76b161 4205 return -EIO;
9f4fe9ad 4206 device = peer_device->device;
b411b363 4207
b30ab791
AG
4208 wait_event(device->misc_wait,
4209 device->state.conn == C_WF_SYNC_UUID ||
4210 device->state.conn == C_BEHIND ||
4211 device->state.conn < C_CONNECTED ||
4212 device->state.disk < D_NEGOTIATING);
b411b363 4213
0b0ba1ef 4214 /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */
b411b363 4215
b411b363
PR
4216 /* Here the _drbd_uuid_ functions are right, current should
4217 _not_ be rotated into the history */
b30ab791
AG
4218 if (get_ldev_if_state(device, D_NEGOTIATING)) {
4219 _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4220 _drbd_uuid_set(device, UI_BITMAP, 0UL);
b411b363 4221
b30ab791
AG
4222 drbd_print_uuids(device, "updated sync uuid");
4223 drbd_start_resync(device, C_SYNC_TARGET);
b411b363 4224
b30ab791 4225 put_ldev(device);
b411b363 4226 } else
d0180171 4227 drbd_err(device, "Ignoring SyncUUID packet!\n");
b411b363 4228
82bc0194 4229 return 0;
b411b363
PR
4230}
4231
2c46407d
AG
4232/**
4233 * receive_bitmap_plain
4234 *
4235 * Return 0 when done, 1 when another iteration is needed, and a negative error
4236 * code upon failure.
4237 */
4238static int
69a22773 4239receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
e658983a 4240 unsigned long *p, struct bm_xfer_ctx *c)
b411b363 4241{
50d0b1ad 4242 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
69a22773 4243 drbd_header_size(peer_device->connection);
e658983a 4244 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
50d0b1ad 4245 c->bm_words - c->word_offset);
e658983a 4246 unsigned int want = num_words * sizeof(*p);
2c46407d 4247 int err;
b411b363 4248
50d0b1ad 4249 if (want != size) {
69a22773 4250 drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
2c46407d 4251 return -EIO;
b411b363
PR
4252 }
4253 if (want == 0)
2c46407d 4254 return 0;
69a22773 4255 err = drbd_recv_all(peer_device->connection, p, want);
82bc0194 4256 if (err)
2c46407d 4257 return err;
b411b363 4258
69a22773 4259 drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
b411b363
PR
4260
4261 c->word_offset += num_words;
4262 c->bit_offset = c->word_offset * BITS_PER_LONG;
4263 if (c->bit_offset > c->bm_bits)
4264 c->bit_offset = c->bm_bits;
4265
2c46407d 4266 return 1;
b411b363
PR
4267}
4268
a02d1240
AG
4269static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4270{
4271 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4272}
4273
4274static int dcbp_get_start(struct p_compressed_bm *p)
4275{
4276 return (p->encoding & 0x80) != 0;
4277}
4278
4279static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4280{
4281 return (p->encoding >> 4) & 0x7;
4282}
4283
2c46407d
AG
4284/**
4285 * recv_bm_rle_bits
4286 *
4287 * Return 0 when done, 1 when another iteration is needed, and a negative error
4288 * code upon failure.
4289 */
4290static int
69a22773 4291recv_bm_rle_bits(struct drbd_peer_device *peer_device,
b411b363 4292 struct p_compressed_bm *p,
c6d25cfe
PR
4293 struct bm_xfer_ctx *c,
4294 unsigned int len)
b411b363
PR
4295{
4296 struct bitstream bs;
4297 u64 look_ahead;
4298 u64 rl;
4299 u64 tmp;
4300 unsigned long s = c->bit_offset;
4301 unsigned long e;
a02d1240 4302 int toggle = dcbp_get_start(p);
b411b363
PR
4303 int have;
4304 int bits;
4305
a02d1240 4306 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
b411b363
PR
4307
4308 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4309 if (bits < 0)
2c46407d 4310 return -EIO;
b411b363
PR
4311
4312 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4313 bits = vli_decode_bits(&rl, look_ahead);
4314 if (bits <= 0)
2c46407d 4315 return -EIO;
b411b363
PR
4316
4317 if (toggle) {
4318 e = s + rl -1;
4319 if (e >= c->bm_bits) {
69a22773 4320 drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
2c46407d 4321 return -EIO;
b411b363 4322 }
69a22773 4323 _drbd_bm_set_bits(peer_device->device, s, e);
b411b363
PR
4324 }
4325
4326 if (have < bits) {
69a22773 4327 drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
b411b363
PR
4328 have, bits, look_ahead,
4329 (unsigned int)(bs.cur.b - p->code),
4330 (unsigned int)bs.buf_len);
2c46407d 4331 return -EIO;
b411b363 4332 }
d2da5b0c
LE
4333 /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4334 if (likely(bits < 64))
4335 look_ahead >>= bits;
4336 else
4337 look_ahead = 0;
b411b363
PR
4338 have -= bits;
4339
4340 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4341 if (bits < 0)
2c46407d 4342 return -EIO;
b411b363
PR
4343 look_ahead |= tmp << have;
4344 have += bits;
4345 }
4346
4347 c->bit_offset = s;
4348 bm_xfer_ctx_bit_to_word_offset(c);
4349
2c46407d 4350 return (s != c->bm_bits);
b411b363
PR
4351}
4352
2c46407d
AG
4353/**
4354 * decode_bitmap_c
4355 *
4356 * Return 0 when done, 1 when another iteration is needed, and a negative error
4357 * code upon failure.
4358 */
4359static int
69a22773 4360decode_bitmap_c(struct drbd_peer_device *peer_device,
b411b363 4361 struct p_compressed_bm *p,
c6d25cfe
PR
4362 struct bm_xfer_ctx *c,
4363 unsigned int len)
b411b363 4364{
a02d1240 4365 if (dcbp_get_code(p) == RLE_VLI_Bits)
69a22773 4366 return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
b411b363
PR
4367
4368 /* other variants had been implemented for evaluation,
4369 * but have been dropped as this one turned out to be "best"
4370 * during all our tests. */
4371
69a22773
AG
4372 drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
4373 conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
2c46407d 4374 return -EIO;
b411b363
PR
4375}
4376
b30ab791 4377void INFO_bm_xfer_stats(struct drbd_device *device,
b411b363
PR
4378 const char *direction, struct bm_xfer_ctx *c)
4379{
4380 /* what would it take to transfer it "plaintext" */
a6b32bc3 4381 unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
50d0b1ad
AG
4382 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4383 unsigned int plain =
4384 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4385 c->bm_words * sizeof(unsigned long);
4386 unsigned int total = c->bytes[0] + c->bytes[1];
4387 unsigned int r;
b411b363
PR
4388
4389 /* total can not be zero. but just in case: */
4390 if (total == 0)
4391 return;
4392
4393 /* don't report if not compressed */
4394 if (total >= plain)
4395 return;
4396
4397 /* total < plain. check for overflow, still */
4398 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4399 : (1000 * total / plain);
4400
4401 if (r > 1000)
4402 r = 1000;
4403
4404 r = 1000 - r;
d0180171 4405 drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
b411b363
PR
4406 "total %u; compression: %u.%u%%\n",
4407 direction,
4408 c->bytes[1], c->packets[1],
4409 c->bytes[0], c->packets[0],
4410 total, r/10, r % 10);
4411}
4412
4413/* Since we are processing the bitfield from lower addresses to higher,
4414 it does not matter if the process it in 32 bit chunks or 64 bit
4415 chunks as long as it is little endian. (Understand it as byte stream,
4416 beginning with the lowest byte...) If we would use big endian
4417 we would need to process it from the highest address to the lowest,
4418 in order to be agnostic to the 32 vs 64 bits issue.
4419
4420 returns 0 on failure, 1 if we successfully received it. */
bde89a9e 4421static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
b411b363 4422{
9f4fe9ad 4423 struct drbd_peer_device *peer_device;
b30ab791 4424 struct drbd_device *device;
b411b363 4425 struct bm_xfer_ctx c;
2c46407d 4426 int err;
4a76b161 4427
9f4fe9ad
AG
4428 peer_device = conn_peer_device(connection, pi->vnr);
4429 if (!peer_device)
4a76b161 4430 return -EIO;
9f4fe9ad 4431 device = peer_device->device;
b411b363 4432
b30ab791 4433 drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
20ceb2b2
LE
4434 /* you are supposed to send additional out-of-sync information
4435 * if you actually set bits during this phase */
b411b363 4436
b411b363 4437 c = (struct bm_xfer_ctx) {
b30ab791
AG
4438 .bm_bits = drbd_bm_bits(device),
4439 .bm_words = drbd_bm_words(device),
b411b363
PR
4440 };
4441
2c46407d 4442 for(;;) {
e658983a 4443 if (pi->cmd == P_BITMAP)
69a22773 4444 err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
e658983a 4445 else if (pi->cmd == P_COMPRESSED_BITMAP) {
b411b363
PR
4446 /* MAYBE: sanity check that we speak proto >= 90,
4447 * and the feature is enabled! */
e658983a 4448 struct p_compressed_bm *p = pi->data;
b411b363 4449
bde89a9e 4450 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
d0180171 4451 drbd_err(device, "ReportCBitmap packet too large\n");
82bc0194 4452 err = -EIO;
b411b363
PR
4453 goto out;
4454 }
e658983a 4455 if (pi->size <= sizeof(*p)) {
d0180171 4456 drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
82bc0194 4457 err = -EIO;
78fcbdae 4458 goto out;
b411b363 4459 }
9f4fe9ad 4460 err = drbd_recv_all(peer_device->connection, p, pi->size);
e658983a
AG
4461 if (err)
4462 goto out;
69a22773 4463 err = decode_bitmap_c(peer_device, p, &c, pi->size);
b411b363 4464 } else {
d0180171 4465 drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
82bc0194 4466 err = -EIO;
b411b363
PR
4467 goto out;
4468 }
4469
e2857216 4470 c.packets[pi->cmd == P_BITMAP]++;
bde89a9e 4471 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
b411b363 4472
2c46407d
AG
4473 if (err <= 0) {
4474 if (err < 0)
4475 goto out;
b411b363 4476 break;
2c46407d 4477 }
9f4fe9ad 4478 err = drbd_recv_header(peer_device->connection, pi);
82bc0194 4479 if (err)
b411b363 4480 goto out;
2c46407d 4481 }
b411b363 4482
b30ab791 4483 INFO_bm_xfer_stats(device, "receive", &c);
b411b363 4484
b30ab791 4485 if (device->state.conn == C_WF_BITMAP_T) {
de1f8e4a
AG
4486 enum drbd_state_rv rv;
4487
b30ab791 4488 err = drbd_send_bitmap(device);
82bc0194 4489 if (err)
b411b363
PR
4490 goto out;
4491 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
b30ab791 4492 rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
0b0ba1ef 4493 D_ASSERT(device, rv == SS_SUCCESS);
b30ab791 4494 } else if (device->state.conn != C_WF_BITMAP_S) {
b411b363
PR
4495 /* admin may have requested C_DISCONNECTING,
4496 * other threads may have noticed network errors */
d0180171 4497 drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
b30ab791 4498 drbd_conn_str(device->state.conn));
b411b363 4499 }
82bc0194 4500 err = 0;
b411b363 4501
b411b363 4502 out:
b30ab791
AG
4503 drbd_bm_unlock(device);
4504 if (!err && device->state.conn == C_WF_BITMAP_S)
4505 drbd_start_resync(device, C_SYNC_SOURCE);
82bc0194 4506 return err;
b411b363
PR
4507}
4508
bde89a9e 4509static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
b411b363 4510{
1ec861eb 4511 drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
e2857216 4512 pi->cmd, pi->size);
b411b363 4513
bde89a9e 4514 return ignore_remaining_packet(connection, pi);
b411b363
PR
4515}
4516
bde89a9e 4517static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
0ced55a3 4518{
e7f52dfb
LE
4519 /* Make sure we've acked all the TCP data associated
4520 * with the data requests being unplugged */
bde89a9e 4521 drbd_tcp_quickack(connection->data.socket);
0ced55a3 4522
82bc0194 4523 return 0;
0ced55a3
PR
4524}
4525
bde89a9e 4526static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
73a01a18 4527{
9f4fe9ad 4528 struct drbd_peer_device *peer_device;
b30ab791 4529 struct drbd_device *device;
e658983a 4530 struct p_block_desc *p = pi->data;
4a76b161 4531
9f4fe9ad
AG
4532 peer_device = conn_peer_device(connection, pi->vnr);
4533 if (!peer_device)
4a76b161 4534 return -EIO;
9f4fe9ad 4535 device = peer_device->device;
73a01a18 4536
b30ab791 4537 switch (device->state.conn) {
f735e363
LE
4538 case C_WF_SYNC_UUID:
4539 case C_WF_BITMAP_T:
4540 case C_BEHIND:
4541 break;
4542 default:
d0180171 4543 drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
b30ab791 4544 drbd_conn_str(device->state.conn));
f735e363
LE
4545 }
4546
b30ab791 4547 drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
73a01a18 4548
82bc0194 4549 return 0;
73a01a18
PR
4550}
4551
02918be2
PR
4552struct data_cmd {
4553 int expect_payload;
4554 size_t pkt_size;
bde89a9e 4555 int (*fn)(struct drbd_connection *, struct packet_info *);
02918be2
PR
4556};
4557
4558static struct data_cmd drbd_cmd_handler[] = {
4559 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4560 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4561 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4562 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
e658983a
AG
4563 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4564 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4565 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
02918be2
PR
4566 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4567 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
e658983a
AG
4568 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4569 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
02918be2
PR
4570 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4571 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4572 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4573 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4574 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4575 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4576 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4577 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4578 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4579 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
73a01a18 4580 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
4a76b161 4581 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
036b17ea 4582 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
a0fb3c47 4583 [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data },
b411b363
PR
4584};
4585
bde89a9e 4586static void drbdd(struct drbd_connection *connection)
b411b363 4587{
77351055 4588 struct packet_info pi;
02918be2 4589 size_t shs; /* sub header size */
82bc0194 4590 int err;
b411b363 4591
bde89a9e 4592 while (get_t_state(&connection->receiver) == RUNNING) {
deebe195 4593 struct data_cmd *cmd;
b411b363 4594
bde89a9e
AG
4595 drbd_thread_current_set_cpu(&connection->receiver);
4596 if (drbd_recv_header(connection, &pi))
02918be2 4597 goto err_out;
b411b363 4598
deebe195 4599 cmd = &drbd_cmd_handler[pi.cmd];
4a76b161 4600 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
1ec861eb 4601 drbd_err(connection, "Unexpected data packet %s (0x%04x)",
2fcb8f30 4602 cmdname(pi.cmd), pi.cmd);
02918be2 4603 goto err_out;
0b33a916 4604 }
b411b363 4605
e658983a
AG
4606 shs = cmd->pkt_size;
4607 if (pi.size > shs && !cmd->expect_payload) {
1ec861eb 4608 drbd_err(connection, "No payload expected %s l:%d\n",
2fcb8f30 4609 cmdname(pi.cmd), pi.size);
02918be2 4610 goto err_out;
b411b363 4611 }
b411b363 4612
c13f7e1a 4613 if (shs) {
bde89a9e 4614 err = drbd_recv_all_warn(connection, pi.data, shs);
a5c31904 4615 if (err)
c13f7e1a 4616 goto err_out;
e2857216 4617 pi.size -= shs;
c13f7e1a
LE
4618 }
4619
bde89a9e 4620 err = cmd->fn(connection, &pi);
4a76b161 4621 if (err) {
1ec861eb 4622 drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
9f5bdc33 4623 cmdname(pi.cmd), err, pi.size);
02918be2 4624 goto err_out;
b411b363
PR
4625 }
4626 }
82bc0194 4627 return;
b411b363 4628
82bc0194 4629 err_out:
bde89a9e 4630 conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
b411b363
PR
4631}
4632
bde89a9e 4633static void conn_disconnect(struct drbd_connection *connection)
b411b363 4634{
c06ece6b 4635 struct drbd_peer_device *peer_device;
bbeb641c 4636 enum drbd_conns oc;
376694a0 4637 int vnr;
b411b363 4638
bde89a9e 4639 if (connection->cstate == C_STANDALONE)
b411b363 4640 return;
b411b363 4641
545752d5
LE
4642 /* We are about to start the cleanup after connection loss.
4643 * Make sure drbd_make_request knows about that.
4644 * Usually we should be in some network failure state already,
4645 * but just in case we are not, we fix it up here.
4646 */
bde89a9e 4647 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
545752d5 4648
b411b363 4649 /* asender does not clean up anything. it must not interfere, either */
bde89a9e
AG
4650 drbd_thread_stop(&connection->asender);
4651 drbd_free_sock(connection);
360cc740 4652
c141ebda 4653 rcu_read_lock();
c06ece6b
AG
4654 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
4655 struct drbd_device *device = peer_device->device;
b30ab791 4656 kref_get(&device->kref);
c141ebda 4657 rcu_read_unlock();
69a22773 4658 drbd_disconnected(peer_device);
c06ece6b 4659 kref_put(&device->kref, drbd_destroy_device);
c141ebda
PR
4660 rcu_read_lock();
4661 }
4662 rcu_read_unlock();
4663
bde89a9e 4664 if (!list_empty(&connection->current_epoch->list))
1ec861eb 4665 drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
12038a3a 4666 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
bde89a9e
AG
4667 atomic_set(&connection->current_epoch->epoch_size, 0);
4668 connection->send.seen_any_write_yet = false;
12038a3a 4669
1ec861eb 4670 drbd_info(connection, "Connection closed\n");
360cc740 4671
bde89a9e
AG
4672 if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
4673 conn_try_outdate_peer_async(connection);
cb703454 4674
0500813f 4675 spin_lock_irq(&connection->resource->req_lock);
bde89a9e 4676 oc = connection->cstate;
bbeb641c 4677 if (oc >= C_UNCONNECTED)
bde89a9e 4678 _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
bbeb641c 4679
0500813f 4680 spin_unlock_irq(&connection->resource->req_lock);
360cc740 4681
f3dfa40a 4682 if (oc == C_DISCONNECTING)
bde89a9e 4683 conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
360cc740
PR
4684}
4685
69a22773 4686static int drbd_disconnected(struct drbd_peer_device *peer_device)
360cc740 4687{
69a22773 4688 struct drbd_device *device = peer_device->device;
360cc740 4689 unsigned int i;
b411b363 4690
85719573 4691 /* wait for current activity to cease. */
0500813f 4692 spin_lock_irq(&device->resource->req_lock);
b30ab791
AG
4693 _drbd_wait_ee_list_empty(device, &device->active_ee);
4694 _drbd_wait_ee_list_empty(device, &device->sync_ee);
4695 _drbd_wait_ee_list_empty(device, &device->read_ee);
0500813f 4696 spin_unlock_irq(&device->resource->req_lock);
b411b363
PR
4697
4698 /* We do not have data structures that would allow us to
4699 * get the rs_pending_cnt down to 0 again.
4700 * * On C_SYNC_TARGET we do not have any data structures describing
4701 * the pending RSDataRequest's we have sent.
4702 * * On C_SYNC_SOURCE there is no data structure that tracks
4703 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4704 * And no, it is not the sum of the reference counts in the
4705 * resync_LRU. The resync_LRU tracks the whole operation including
4706 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4707 * on the fly. */
b30ab791
AG
4708 drbd_rs_cancel_all(device);
4709 device->rs_total = 0;
4710 device->rs_failed = 0;
4711 atomic_set(&device->rs_pending_cnt, 0);
4712 wake_up(&device->misc_wait);
b411b363 4713
b30ab791
AG
4714 del_timer_sync(&device->resync_timer);
4715 resync_timer_fn((unsigned long)device);
b411b363 4716
b411b363
PR
4717 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4718 * w_make_resync_request etc. which may still be on the worker queue
4719 * to be "canceled" */
b5043c5e 4720 drbd_flush_workqueue(&peer_device->connection->sender_work);
b411b363 4721
b30ab791 4722 drbd_finish_peer_reqs(device);
b411b363 4723
d10b4ea3
PR
4724 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4725 might have issued a work again. The one before drbd_finish_peer_reqs() is
4726 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
b5043c5e 4727 drbd_flush_workqueue(&peer_device->connection->sender_work);
d10b4ea3 4728
08332d73
LE
4729 /* need to do it again, drbd_finish_peer_reqs() may have populated it
4730 * again via drbd_try_clear_on_disk_bm(). */
b30ab791 4731 drbd_rs_cancel_all(device);
b411b363 4732
b30ab791
AG
4733 kfree(device->p_uuid);
4734 device->p_uuid = NULL;
b411b363 4735
b30ab791 4736 if (!drbd_suspended(device))
69a22773 4737 tl_clear(peer_device->connection);
b411b363 4738
b30ab791 4739 drbd_md_sync(device);
b411b363 4740
20ceb2b2
LE
4741 /* serialize with bitmap writeout triggered by the state change,
4742 * if any. */
b30ab791 4743 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
20ceb2b2 4744
b411b363
PR
4745 /* tcp_close and release of sendpage pages can be deferred. I don't
4746 * want to use SO_LINGER, because apparently it can be deferred for
4747 * more than 20 seconds (longest time I checked).
4748 *
4749 * Actually we don't care for exactly when the network stack does its
4750 * put_page(), but release our reference on these pages right here.
4751 */
b30ab791 4752 i = drbd_free_peer_reqs(device, &device->net_ee);
b411b363 4753 if (i)
d0180171 4754 drbd_info(device, "net_ee not empty, killed %u entries\n", i);
b30ab791 4755 i = atomic_read(&device->pp_in_use_by_net);
435f0740 4756 if (i)
d0180171 4757 drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
b30ab791 4758 i = atomic_read(&device->pp_in_use);
b411b363 4759 if (i)
d0180171 4760 drbd_info(device, "pp_in_use = %d, expected 0\n", i);
b411b363 4761
0b0ba1ef
AG
4762 D_ASSERT(device, list_empty(&device->read_ee));
4763 D_ASSERT(device, list_empty(&device->active_ee));
4764 D_ASSERT(device, list_empty(&device->sync_ee));
4765 D_ASSERT(device, list_empty(&device->done_ee));
b411b363 4766
360cc740 4767 return 0;
b411b363
PR
4768}
4769
4770/*
4771 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4772 * we can agree on is stored in agreed_pro_version.
4773 *
4774 * feature flags and the reserved array should be enough room for future
4775 * enhancements of the handshake protocol, and possible plugins...
4776 *
4777 * for now, they are expected to be zero, but ignored.
4778 */
bde89a9e 4779static int drbd_send_features(struct drbd_connection *connection)
b411b363 4780{
9f5bdc33
AG
4781 struct drbd_socket *sock;
4782 struct p_connection_features *p;
b411b363 4783
bde89a9e
AG
4784 sock = &connection->data;
4785 p = conn_prepare_command(connection, sock);
9f5bdc33 4786 if (!p)
e8d17b01 4787 return -EIO;
b411b363
PR
4788 memset(p, 0, sizeof(*p));
4789 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4790 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
20c68fde 4791 p->feature_flags = cpu_to_be32(PRO_FEATURES);
bde89a9e 4792 return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
b411b363
PR
4793}
4794
4795/*
4796 * return values:
4797 * 1 yes, we have a valid connection
4798 * 0 oops, did not work out, please try again
4799 * -1 peer talks different language,
4800 * no point in trying again, please go standalone.
4801 */
bde89a9e 4802static int drbd_do_features(struct drbd_connection *connection)
b411b363 4803{
bde89a9e 4804 /* ASSERT current == connection->receiver ... */
e658983a
AG
4805 struct p_connection_features *p;
4806 const int expect = sizeof(struct p_connection_features);
77351055 4807 struct packet_info pi;
a5c31904 4808 int err;
b411b363 4809
bde89a9e 4810 err = drbd_send_features(connection);
e8d17b01 4811 if (err)
b411b363
PR
4812 return 0;
4813
bde89a9e 4814 err = drbd_recv_header(connection, &pi);
69bc7bc3 4815 if (err)
b411b363
PR
4816 return 0;
4817
6038178e 4818 if (pi.cmd != P_CONNECTION_FEATURES) {
1ec861eb 4819 drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
2fcb8f30 4820 cmdname(pi.cmd), pi.cmd);
b411b363
PR
4821 return -1;
4822 }
4823
77351055 4824 if (pi.size != expect) {
1ec861eb 4825 drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
77351055 4826 expect, pi.size);
b411b363
PR
4827 return -1;
4828 }
4829
e658983a 4830 p = pi.data;
bde89a9e 4831 err = drbd_recv_all_warn(connection, p, expect);
a5c31904 4832 if (err)
b411b363 4833 return 0;
b411b363 4834
b411b363
PR
4835 p->protocol_min = be32_to_cpu(p->protocol_min);
4836 p->protocol_max = be32_to_cpu(p->protocol_max);
4837 if (p->protocol_max == 0)
4838 p->protocol_max = p->protocol_min;
4839
4840 if (PRO_VERSION_MAX < p->protocol_min ||
4841 PRO_VERSION_MIN > p->protocol_max)
4842 goto incompat;
4843
bde89a9e 4844 connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
20c68fde 4845 connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
b411b363 4846
1ec861eb 4847 drbd_info(connection, "Handshake successful: "
bde89a9e 4848 "Agreed network protocol version %d\n", connection->agreed_pro_version);
b411b363 4849
20c68fde
LE
4850 drbd_info(connection, "Agreed to%ssupport TRIM on protocol level\n",
4851 connection->agreed_features & FF_TRIM ? " " : " not ");
4852
b411b363
PR
4853 return 1;
4854
4855 incompat:
1ec861eb 4856 drbd_err(connection, "incompatible DRBD dialects: "
b411b363
PR
4857 "I support %d-%d, peer supports %d-%d\n",
4858 PRO_VERSION_MIN, PRO_VERSION_MAX,
4859 p->protocol_min, p->protocol_max);
4860 return -1;
4861}
4862
4863#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
bde89a9e 4864static int drbd_do_auth(struct drbd_connection *connection)
b411b363 4865{
1ec861eb
AG
4866 drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4867 drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
b10d96cb 4868 return -1;
b411b363
PR
4869}
4870#else
4871#define CHALLENGE_LEN 64
b10d96cb
JT
4872
4873/* Return value:
4874 1 - auth succeeded,
4875 0 - failed, try again (network error),
4876 -1 - auth failed, don't try again.
4877*/
4878
bde89a9e 4879static int drbd_do_auth(struct drbd_connection *connection)
b411b363 4880{
9f5bdc33 4881 struct drbd_socket *sock;
b411b363
PR
4882 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4883 struct scatterlist sg;
4884 char *response = NULL;
4885 char *right_response = NULL;
4886 char *peers_ch = NULL;
44ed167d
PR
4887 unsigned int key_len;
4888 char secret[SHARED_SECRET_MAX]; /* 64 byte */
b411b363
PR
4889 unsigned int resp_size;
4890 struct hash_desc desc;
77351055 4891 struct packet_info pi;
44ed167d 4892 struct net_conf *nc;
69bc7bc3 4893 int err, rv;
b411b363 4894
9f5bdc33 4895 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
b411b363 4896
44ed167d 4897 rcu_read_lock();
bde89a9e 4898 nc = rcu_dereference(connection->net_conf);
44ed167d
PR
4899 key_len = strlen(nc->shared_secret);
4900 memcpy(secret, nc->shared_secret, key_len);
4901 rcu_read_unlock();
4902
bde89a9e 4903 desc.tfm = connection->cram_hmac_tfm;
b411b363
PR
4904 desc.flags = 0;
4905
bde89a9e 4906 rv = crypto_hash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
b411b363 4907 if (rv) {
1ec861eb 4908 drbd_err(connection, "crypto_hash_setkey() failed with %d\n", rv);
b10d96cb 4909 rv = -1;
b411b363
PR
4910 goto fail;
4911 }
4912
4913 get_random_bytes(my_challenge, CHALLENGE_LEN);
4914
bde89a9e
AG
4915 sock = &connection->data;
4916 if (!conn_prepare_command(connection, sock)) {
9f5bdc33
AG
4917 rv = 0;
4918 goto fail;
4919 }
bde89a9e 4920 rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
9f5bdc33 4921 my_challenge, CHALLENGE_LEN);
b411b363
PR
4922 if (!rv)
4923 goto fail;
4924
bde89a9e 4925 err = drbd_recv_header(connection, &pi);
69bc7bc3
AG
4926 if (err) {
4927 rv = 0;
b411b363 4928 goto fail;
69bc7bc3 4929 }
b411b363 4930
77351055 4931 if (pi.cmd != P_AUTH_CHALLENGE) {
1ec861eb 4932 drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
2fcb8f30 4933 cmdname(pi.cmd), pi.cmd);
b411b363
PR
4934 rv = 0;
4935 goto fail;
4936 }
4937
77351055 4938 if (pi.size > CHALLENGE_LEN * 2) {
1ec861eb 4939 drbd_err(connection, "expected AuthChallenge payload too big.\n");
b10d96cb 4940 rv = -1;
b411b363
PR
4941 goto fail;
4942 }
4943
67cca286
PR
4944 if (pi.size < CHALLENGE_LEN) {
4945 drbd_err(connection, "AuthChallenge payload too small.\n");
4946 rv = -1;
4947 goto fail;
4948 }
4949
77351055 4950 peers_ch = kmalloc(pi.size, GFP_NOIO);
b411b363 4951 if (peers_ch == NULL) {
1ec861eb 4952 drbd_err(connection, "kmalloc of peers_ch failed\n");
b10d96cb 4953 rv = -1;
b411b363
PR
4954 goto fail;
4955 }
4956
bde89a9e 4957 err = drbd_recv_all_warn(connection, peers_ch, pi.size);
a5c31904 4958 if (err) {
b411b363
PR
4959 rv = 0;
4960 goto fail;
4961 }
4962
67cca286
PR
4963 if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
4964 drbd_err(connection, "Peer presented the same challenge!\n");
4965 rv = -1;
4966 goto fail;
4967 }
4968
bde89a9e 4969 resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm);
b411b363
PR
4970 response = kmalloc(resp_size, GFP_NOIO);
4971 if (response == NULL) {
1ec861eb 4972 drbd_err(connection, "kmalloc of response failed\n");
b10d96cb 4973 rv = -1;
b411b363
PR
4974 goto fail;
4975 }
4976
4977 sg_init_table(&sg, 1);
77351055 4978 sg_set_buf(&sg, peers_ch, pi.size);
b411b363
PR
4979
4980 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4981 if (rv) {
1ec861eb 4982 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
b10d96cb 4983 rv = -1;
b411b363
PR
4984 goto fail;
4985 }
4986
bde89a9e 4987 if (!conn_prepare_command(connection, sock)) {
9f5bdc33 4988 rv = 0;
b411b363 4989 goto fail;
9f5bdc33 4990 }
bde89a9e 4991 rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
9f5bdc33 4992 response, resp_size);
b411b363
PR
4993 if (!rv)
4994 goto fail;
4995
bde89a9e 4996 err = drbd_recv_header(connection, &pi);
69bc7bc3 4997 if (err) {
b411b363
PR
4998 rv = 0;
4999 goto fail;
5000 }
5001
77351055 5002 if (pi.cmd != P_AUTH_RESPONSE) {
1ec861eb 5003 drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
2fcb8f30 5004 cmdname(pi.cmd), pi.cmd);
b411b363
PR
5005 rv = 0;
5006 goto fail;
5007 }
5008
77351055 5009 if (pi.size != resp_size) {
1ec861eb 5010 drbd_err(connection, "expected AuthResponse payload of wrong size\n");
b411b363
PR
5011 rv = 0;
5012 goto fail;
5013 }
b411b363 5014
bde89a9e 5015 err = drbd_recv_all_warn(connection, response , resp_size);
a5c31904 5016 if (err) {
b411b363
PR
5017 rv = 0;
5018 goto fail;
5019 }
5020
5021 right_response = kmalloc(resp_size, GFP_NOIO);
2d1ee87d 5022 if (right_response == NULL) {
1ec861eb 5023 drbd_err(connection, "kmalloc of right_response failed\n");
b10d96cb 5024 rv = -1;
b411b363
PR
5025 goto fail;
5026 }
5027
5028 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
5029
5030 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
5031 if (rv) {
1ec861eb 5032 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
b10d96cb 5033 rv = -1;
b411b363
PR
5034 goto fail;
5035 }
5036
5037 rv = !memcmp(response, right_response, resp_size);
5038
5039 if (rv)
1ec861eb 5040 drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
44ed167d 5041 resp_size);
b10d96cb
JT
5042 else
5043 rv = -1;
b411b363
PR
5044
5045 fail:
5046 kfree(peers_ch);
5047 kfree(response);
5048 kfree(right_response);
5049
5050 return rv;
5051}
5052#endif
5053
8fe60551 5054int drbd_receiver(struct drbd_thread *thi)
b411b363 5055{
bde89a9e 5056 struct drbd_connection *connection = thi->connection;
b411b363
PR
5057 int h;
5058
1ec861eb 5059 drbd_info(connection, "receiver (re)started\n");
b411b363
PR
5060
5061 do {
bde89a9e 5062 h = conn_connect(connection);
b411b363 5063 if (h == 0) {
bde89a9e 5064 conn_disconnect(connection);
20ee6390 5065 schedule_timeout_interruptible(HZ);
b411b363
PR
5066 }
5067 if (h == -1) {
1ec861eb 5068 drbd_warn(connection, "Discarding network configuration.\n");
bde89a9e 5069 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363
PR
5070 }
5071 } while (h == 0);
5072
91fd4dad 5073 if (h > 0)
bde89a9e 5074 drbdd(connection);
b411b363 5075
bde89a9e 5076 conn_disconnect(connection);
b411b363 5077
1ec861eb 5078 drbd_info(connection, "receiver terminated\n");
b411b363
PR
5079 return 0;
5080}
5081
5082/* ********* acknowledge sender ******** */
5083
bde89a9e 5084static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5085{
e658983a 5086 struct p_req_state_reply *p = pi->data;
e4f78ede
PR
5087 int retcode = be32_to_cpu(p->retcode);
5088
5089 if (retcode >= SS_SUCCESS) {
bde89a9e 5090 set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
e4f78ede 5091 } else {
bde89a9e 5092 set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
1ec861eb 5093 drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
e4f78ede
PR
5094 drbd_set_st_err_str(retcode), retcode);
5095 }
bde89a9e 5096 wake_up(&connection->ping_wait);
e4f78ede 5097
2735a594 5098 return 0;
e4f78ede 5099}
b411b363 5100
bde89a9e 5101static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5102{
9f4fe9ad 5103 struct drbd_peer_device *peer_device;
b30ab791 5104 struct drbd_device *device;
e658983a 5105 struct p_req_state_reply *p = pi->data;
b411b363
PR
5106 int retcode = be32_to_cpu(p->retcode);
5107
9f4fe9ad
AG
5108 peer_device = conn_peer_device(connection, pi->vnr);
5109 if (!peer_device)
2735a594 5110 return -EIO;
9f4fe9ad 5111 device = peer_device->device;
1952e916 5112
bde89a9e 5113 if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
0b0ba1ef 5114 D_ASSERT(device, connection->agreed_pro_version < 100);
bde89a9e 5115 return got_conn_RqSReply(connection, pi);
4d0fc3fd
PR
5116 }
5117
b411b363 5118 if (retcode >= SS_SUCCESS) {
b30ab791 5119 set_bit(CL_ST_CHG_SUCCESS, &device->flags);
b411b363 5120 } else {
b30ab791 5121 set_bit(CL_ST_CHG_FAIL, &device->flags);
d0180171 5122 drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
e4f78ede 5123 drbd_set_st_err_str(retcode), retcode);
b411b363 5124 }
b30ab791 5125 wake_up(&device->state_wait);
b411b363 5126
2735a594 5127 return 0;
b411b363
PR
5128}
5129
bde89a9e 5130static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5131{
bde89a9e 5132 return drbd_send_ping_ack(connection);
b411b363
PR
5133
5134}
5135
bde89a9e 5136static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
b411b363
PR
5137{
5138 /* restore idle timeout */
bde89a9e
AG
5139 connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
5140 if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
5141 wake_up(&connection->ping_wait);
b411b363 5142
2735a594 5143 return 0;
b411b363
PR
5144}
5145
bde89a9e 5146static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5147{
9f4fe9ad 5148 struct drbd_peer_device *peer_device;
b30ab791 5149 struct drbd_device *device;
e658983a 5150 struct p_block_ack *p = pi->data;
b411b363
PR
5151 sector_t sector = be64_to_cpu(p->sector);
5152 int blksize = be32_to_cpu(p->blksize);
5153
9f4fe9ad
AG
5154 peer_device = conn_peer_device(connection, pi->vnr);
5155 if (!peer_device)
2735a594 5156 return -EIO;
9f4fe9ad 5157 device = peer_device->device;
1952e916 5158
9f4fe9ad 5159 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
b411b363 5160
69a22773 5161 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
b411b363 5162
b30ab791
AG
5163 if (get_ldev(device)) {
5164 drbd_rs_complete_io(device, sector);
5165 drbd_set_in_sync(device, sector, blksize);
1d53f09e 5166 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
b30ab791
AG
5167 device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5168 put_ldev(device);
1d53f09e 5169 }
b30ab791
AG
5170 dec_rs_pending(device);
5171 atomic_add(blksize >> 9, &device->rs_sect_in);
b411b363 5172
2735a594 5173 return 0;
b411b363
PR
5174}
5175
bc9c5c41 5176static int
b30ab791 5177validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
bc9c5c41
AG
5178 struct rb_root *root, const char *func,
5179 enum drbd_req_event what, bool missing_ok)
b411b363
PR
5180{
5181 struct drbd_request *req;
5182 struct bio_and_error m;
5183
0500813f 5184 spin_lock_irq(&device->resource->req_lock);
b30ab791 5185 req = find_request(device, root, id, sector, missing_ok, func);
b411b363 5186 if (unlikely(!req)) {
0500813f 5187 spin_unlock_irq(&device->resource->req_lock);
85997675 5188 return -EIO;
b411b363
PR
5189 }
5190 __req_mod(req, what, &m);
0500813f 5191 spin_unlock_irq(&device->resource->req_lock);
b411b363
PR
5192
5193 if (m.bio)
b30ab791 5194 complete_master_bio(device, &m);
85997675 5195 return 0;
b411b363
PR
5196}
5197
bde89a9e 5198static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5199{
9f4fe9ad 5200 struct drbd_peer_device *peer_device;
b30ab791 5201 struct drbd_device *device;
e658983a 5202 struct p_block_ack *p = pi->data;
b411b363
PR
5203 sector_t sector = be64_to_cpu(p->sector);
5204 int blksize = be32_to_cpu(p->blksize);
5205 enum drbd_req_event what;
5206
9f4fe9ad
AG
5207 peer_device = conn_peer_device(connection, pi->vnr);
5208 if (!peer_device)
2735a594 5209 return -EIO;
9f4fe9ad 5210 device = peer_device->device;
1952e916 5211
69a22773 5212 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
b411b363 5213
579b57ed 5214 if (p->block_id == ID_SYNCER) {
b30ab791
AG
5215 drbd_set_in_sync(device, sector, blksize);
5216 dec_rs_pending(device);
2735a594 5217 return 0;
b411b363 5218 }
e05e1e59 5219 switch (pi->cmd) {
b411b363 5220 case P_RS_WRITE_ACK:
8554df1c 5221 what = WRITE_ACKED_BY_PEER_AND_SIS;
b411b363
PR
5222 break;
5223 case P_WRITE_ACK:
8554df1c 5224 what = WRITE_ACKED_BY_PEER;
b411b363
PR
5225 break;
5226 case P_RECV_ACK:
8554df1c 5227 what = RECV_ACKED_BY_PEER;
b411b363 5228 break;
d4dabbe2
LE
5229 case P_SUPERSEDED:
5230 what = CONFLICT_RESOLVED;
b411b363 5231 break;
7be8da07 5232 case P_RETRY_WRITE:
7be8da07 5233 what = POSTPONE_WRITE;
b411b363
PR
5234 break;
5235 default:
2735a594 5236 BUG();
b411b363
PR
5237 }
5238
b30ab791
AG
5239 return validate_req_change_req_state(device, p->block_id, sector,
5240 &device->write_requests, __func__,
2735a594 5241 what, false);
b411b363
PR
5242}
5243
bde89a9e 5244static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5245{
9f4fe9ad 5246 struct drbd_peer_device *peer_device;
b30ab791 5247 struct drbd_device *device;
e658983a 5248 struct p_block_ack *p = pi->data;
b411b363 5249 sector_t sector = be64_to_cpu(p->sector);
2deb8336 5250 int size = be32_to_cpu(p->blksize);
85997675 5251 int err;
b411b363 5252
9f4fe9ad
AG
5253 peer_device = conn_peer_device(connection, pi->vnr);
5254 if (!peer_device)
2735a594 5255 return -EIO;
9f4fe9ad 5256 device = peer_device->device;
b411b363 5257
69a22773 5258 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
b411b363 5259
579b57ed 5260 if (p->block_id == ID_SYNCER) {
b30ab791
AG
5261 dec_rs_pending(device);
5262 drbd_rs_failed_io(device, sector, size);
2735a594 5263 return 0;
b411b363 5264 }
2deb8336 5265
b30ab791
AG
5266 err = validate_req_change_req_state(device, p->block_id, sector,
5267 &device->write_requests, __func__,
303d1448 5268 NEG_ACKED, true);
85997675 5269 if (err) {
c3afd8f5
AG
5270 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
5271 The master bio might already be completed, therefore the
5272 request is no longer in the collision hash. */
5273 /* In Protocol B we might already have got a P_RECV_ACK
5274 but then get a P_NEG_ACK afterwards. */
b30ab791 5275 drbd_set_out_of_sync(device, sector, size);
2deb8336 5276 }
2735a594 5277 return 0;
b411b363
PR
5278}
5279
bde89a9e 5280static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5281{
9f4fe9ad 5282 struct drbd_peer_device *peer_device;
b30ab791 5283 struct drbd_device *device;
e658983a 5284 struct p_block_ack *p = pi->data;
b411b363
PR
5285 sector_t sector = be64_to_cpu(p->sector);
5286
9f4fe9ad
AG
5287 peer_device = conn_peer_device(connection, pi->vnr);
5288 if (!peer_device)
2735a594 5289 return -EIO;
9f4fe9ad 5290 device = peer_device->device;
1952e916 5291
69a22773 5292 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
7be8da07 5293
d0180171 5294 drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
b411b363
PR
5295 (unsigned long long)sector, be32_to_cpu(p->blksize));
5296
b30ab791
AG
5297 return validate_req_change_req_state(device, p->block_id, sector,
5298 &device->read_requests, __func__,
2735a594 5299 NEG_ACKED, false);
b411b363
PR
5300}
5301
bde89a9e 5302static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5303{
9f4fe9ad 5304 struct drbd_peer_device *peer_device;
b30ab791 5305 struct drbd_device *device;
b411b363
PR
5306 sector_t sector;
5307 int size;
e658983a 5308 struct p_block_ack *p = pi->data;
1952e916 5309
9f4fe9ad
AG
5310 peer_device = conn_peer_device(connection, pi->vnr);
5311 if (!peer_device)
2735a594 5312 return -EIO;
9f4fe9ad 5313 device = peer_device->device;
b411b363
PR
5314
5315 sector = be64_to_cpu(p->sector);
5316 size = be32_to_cpu(p->blksize);
b411b363 5317
69a22773 5318 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
b411b363 5319
b30ab791 5320 dec_rs_pending(device);
b411b363 5321
b30ab791
AG
5322 if (get_ldev_if_state(device, D_FAILED)) {
5323 drbd_rs_complete_io(device, sector);
e05e1e59 5324 switch (pi->cmd) {
d612d309 5325 case P_NEG_RS_DREPLY:
b30ab791 5326 drbd_rs_failed_io(device, sector, size);
d612d309
PR
5327 case P_RS_CANCEL:
5328 break;
5329 default:
2735a594 5330 BUG();
d612d309 5331 }
b30ab791 5332 put_ldev(device);
b411b363
PR
5333 }
5334
2735a594 5335 return 0;
b411b363
PR
5336}
5337
bde89a9e 5338static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5339{
e658983a 5340 struct p_barrier_ack *p = pi->data;
c06ece6b 5341 struct drbd_peer_device *peer_device;
9ed57dcb 5342 int vnr;
1952e916 5343
bde89a9e 5344 tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
b411b363 5345
9ed57dcb 5346 rcu_read_lock();
c06ece6b
AG
5347 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5348 struct drbd_device *device = peer_device->device;
5349
b30ab791
AG
5350 if (device->state.conn == C_AHEAD &&
5351 atomic_read(&device->ap_in_flight) == 0 &&
5352 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5353 device->start_resync_timer.expires = jiffies + HZ;
5354 add_timer(&device->start_resync_timer);
9ed57dcb 5355 }
c4752ef1 5356 }
9ed57dcb 5357 rcu_read_unlock();
c4752ef1 5358
2735a594 5359 return 0;
b411b363
PR
5360}
5361
bde89a9e 5362static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
b411b363 5363{
9f4fe9ad 5364 struct drbd_peer_device *peer_device;
b30ab791 5365 struct drbd_device *device;
e658983a 5366 struct p_block_ack *p = pi->data;
84b8c06b 5367 struct drbd_device_work *dw;
b411b363
PR
5368 sector_t sector;
5369 int size;
5370
9f4fe9ad
AG
5371 peer_device = conn_peer_device(connection, pi->vnr);
5372 if (!peer_device)
2735a594 5373 return -EIO;
9f4fe9ad 5374 device = peer_device->device;
1952e916 5375
b411b363
PR
5376 sector = be64_to_cpu(p->sector);
5377 size = be32_to_cpu(p->blksize);
5378
69a22773 5379 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
b411b363
PR
5380
5381 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
b30ab791 5382 drbd_ov_out_of_sync_found(device, sector, size);
b411b363 5383 else
b30ab791 5384 ov_out_of_sync_print(device);
b411b363 5385
b30ab791 5386 if (!get_ldev(device))
2735a594 5387 return 0;
1d53f09e 5388
b30ab791
AG
5389 drbd_rs_complete_io(device, sector);
5390 dec_rs_pending(device);
b411b363 5391
b30ab791 5392 --device->ov_left;
ea5442af
LE
5393
5394 /* let's advance progress step marks only for every other megabyte */
b30ab791
AG
5395 if ((device->ov_left & 0x200) == 0x200)
5396 drbd_advance_rs_marks(device, device->ov_left);
ea5442af 5397
b30ab791 5398 if (device->ov_left == 0) {
84b8c06b
AG
5399 dw = kmalloc(sizeof(*dw), GFP_NOIO);
5400 if (dw) {
5401 dw->w.cb = w_ov_finished;
5402 dw->device = device;
5403 drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
b411b363 5404 } else {
84b8c06b 5405 drbd_err(device, "kmalloc(dw) failed.");
b30ab791
AG
5406 ov_out_of_sync_print(device);
5407 drbd_resync_finished(device);
b411b363
PR
5408 }
5409 }
b30ab791 5410 put_ldev(device);
2735a594 5411 return 0;
b411b363
PR
5412}
5413
bde89a9e 5414static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
0ced55a3 5415{
2735a594 5416 return 0;
b411b363
PR
5417}
5418
bde89a9e 5419static int connection_finish_peer_reqs(struct drbd_connection *connection)
0ced55a3 5420{
c06ece6b 5421 struct drbd_peer_device *peer_device;
c141ebda 5422 int vnr, not_empty = 0;
32862ec7
PR
5423
5424 do {
bde89a9e 5425 clear_bit(SIGNAL_ASENDER, &connection->flags);
32862ec7 5426 flush_signals(current);
c141ebda
PR
5427
5428 rcu_read_lock();
c06ece6b
AG
5429 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5430 struct drbd_device *device = peer_device->device;
b30ab791 5431 kref_get(&device->kref);
c141ebda 5432 rcu_read_unlock();
b30ab791 5433 if (drbd_finish_peer_reqs(device)) {
05a10ec7 5434 kref_put(&device->kref, drbd_destroy_device);
c141ebda 5435 return 1;
d3fcb490 5436 }
05a10ec7 5437 kref_put(&device->kref, drbd_destroy_device);
c141ebda 5438 rcu_read_lock();
082a3439 5439 }
bde89a9e 5440 set_bit(SIGNAL_ASENDER, &connection->flags);
082a3439 5441
0500813f 5442 spin_lock_irq(&connection->resource->req_lock);
c06ece6b
AG
5443 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5444 struct drbd_device *device = peer_device->device;
b30ab791 5445 not_empty = !list_empty(&device->done_ee);
082a3439
PR
5446 if (not_empty)
5447 break;
5448 }
0500813f 5449 spin_unlock_irq(&connection->resource->req_lock);
c141ebda 5450 rcu_read_unlock();
32862ec7
PR
5451 } while (not_empty);
5452
5453 return 0;
0ced55a3
PR
5454}
5455
b411b363
PR
5456struct asender_cmd {
5457 size_t pkt_size;
bde89a9e 5458 int (*fn)(struct drbd_connection *connection, struct packet_info *);
b411b363
PR
5459};
5460
7201b972 5461static struct asender_cmd asender_tbl[] = {
e658983a
AG
5462 [P_PING] = { 0, got_Ping },
5463 [P_PING_ACK] = { 0, got_PingAck },
b411b363
PR
5464 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5465 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5466 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
d4dabbe2 5467 [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck },
b411b363
PR
5468 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5469 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
1952e916 5470 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
b411b363
PR
5471 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5472 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5473 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5474 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
02918be2 5475 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
1952e916
AG
5476 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5477 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5478 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
7201b972 5479};
b411b363
PR
5480
5481int drbd_asender(struct drbd_thread *thi)
5482{
bde89a9e 5483 struct drbd_connection *connection = thi->connection;
b411b363 5484 struct asender_cmd *cmd = NULL;
77351055 5485 struct packet_info pi;
257d0af6 5486 int rv;
bde89a9e 5487 void *buf = connection->meta.rbuf;
b411b363 5488 int received = 0;
bde89a9e 5489 unsigned int header_size = drbd_header_size(connection);
52b061a4 5490 int expect = header_size;
44ed167d
PR
5491 bool ping_timeout_active = false;
5492 struct net_conf *nc;
bb77d34e 5493 int ping_timeo, tcp_cork, ping_int;
3990e04d 5494 struct sched_param param = { .sched_priority = 2 };
b411b363 5495
3990e04d
PR
5496 rv = sched_setscheduler(current, SCHED_RR, &param);
5497 if (rv < 0)
1ec861eb 5498 drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv);
b411b363 5499
e77a0a5c 5500 while (get_t_state(thi) == RUNNING) {
80822284 5501 drbd_thread_current_set_cpu(thi);
b411b363 5502
44ed167d 5503 rcu_read_lock();
bde89a9e 5504 nc = rcu_dereference(connection->net_conf);
44ed167d 5505 ping_timeo = nc->ping_timeo;
bb77d34e 5506 tcp_cork = nc->tcp_cork;
44ed167d
PR
5507 ping_int = nc->ping_int;
5508 rcu_read_unlock();
5509
bde89a9e
AG
5510 if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5511 if (drbd_send_ping(connection)) {
1ec861eb 5512 drbd_err(connection, "drbd_send_ping has failed\n");
b411b363 5513 goto reconnect;
841ce241 5514 }
bde89a9e 5515 connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
44ed167d 5516 ping_timeout_active = true;
b411b363
PR
5517 }
5518
32862ec7
PR
5519 /* TODO: conditionally cork; it may hurt latency if we cork without
5520 much to send */
bb77d34e 5521 if (tcp_cork)
bde89a9e
AG
5522 drbd_tcp_cork(connection->meta.socket);
5523 if (connection_finish_peer_reqs(connection)) {
1ec861eb 5524 drbd_err(connection, "connection_finish_peer_reqs() failed\n");
32862ec7 5525 goto reconnect;
b411b363
PR
5526 }
5527 /* but unconditionally uncork unless disabled */
bb77d34e 5528 if (tcp_cork)
bde89a9e 5529 drbd_tcp_uncork(connection->meta.socket);
b411b363
PR
5530
5531 /* short circuit, recv_msg would return EINTR anyways. */
5532 if (signal_pending(current))
5533 continue;
5534
bde89a9e
AG
5535 rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
5536 clear_bit(SIGNAL_ASENDER, &connection->flags);
b411b363
PR
5537
5538 flush_signals(current);
5539
5540 /* Note:
5541 * -EINTR (on meta) we got a signal
5542 * -EAGAIN (on meta) rcvtimeo expired
5543 * -ECONNRESET other side closed the connection
5544 * -ERESTARTSYS (on data) we got a signal
5545 * rv < 0 other than above: unexpected error!
5546 * rv == expected: full header or command
5547 * rv < expected: "woken" by signal during receive
5548 * rv == 0 : "connection shut down by peer"
5549 */
5550 if (likely(rv > 0)) {
5551 received += rv;
5552 buf += rv;
5553 } else if (rv == 0) {
bde89a9e 5554 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
b66623e3
PR
5555 long t;
5556 rcu_read_lock();
bde89a9e 5557 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
b66623e3
PR
5558 rcu_read_unlock();
5559
bde89a9e
AG
5560 t = wait_event_timeout(connection->ping_wait,
5561 connection->cstate < C_WF_REPORT_PARAMS,
b66623e3 5562 t);
599377ac
PR
5563 if (t)
5564 break;
5565 }
1ec861eb 5566 drbd_err(connection, "meta connection shut down by peer.\n");
b411b363
PR
5567 goto reconnect;
5568 } else if (rv == -EAGAIN) {
cb6518cb
LE
5569 /* If the data socket received something meanwhile,
5570 * that is good enough: peer is still alive. */
bde89a9e
AG
5571 if (time_after(connection->last_received,
5572 jiffies - connection->meta.socket->sk->sk_rcvtimeo))
cb6518cb 5573 continue;
f36af18c 5574 if (ping_timeout_active) {
1ec861eb 5575 drbd_err(connection, "PingAck did not arrive in time.\n");
b411b363
PR
5576 goto reconnect;
5577 }
bde89a9e 5578 set_bit(SEND_PING, &connection->flags);
b411b363
PR
5579 continue;
5580 } else if (rv == -EINTR) {
5581 continue;
5582 } else {
1ec861eb 5583 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
b411b363
PR
5584 goto reconnect;
5585 }
5586
5587 if (received == expect && cmd == NULL) {
bde89a9e 5588 if (decode_header(connection, connection->meta.rbuf, &pi))
b411b363 5589 goto reconnect;
7201b972 5590 cmd = &asender_tbl[pi.cmd];
1952e916 5591 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
1ec861eb 5592 drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
2fcb8f30 5593 cmdname(pi.cmd), pi.cmd);
b411b363
PR
5594 goto disconnect;
5595 }
e658983a 5596 expect = header_size + cmd->pkt_size;
52b061a4 5597 if (pi.size != expect - header_size) {
1ec861eb 5598 drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
77351055 5599 pi.cmd, pi.size);
b411b363 5600 goto reconnect;
257d0af6 5601 }
b411b363
PR
5602 }
5603 if (received == expect) {
2735a594 5604 bool err;
a4fbda8e 5605
bde89a9e 5606 err = cmd->fn(connection, &pi);
2735a594 5607 if (err) {
1ec861eb 5608 drbd_err(connection, "%pf failed\n", cmd->fn);
b411b363 5609 goto reconnect;
1952e916 5610 }
b411b363 5611
bde89a9e 5612 connection->last_received = jiffies;
f36af18c 5613
44ed167d
PR
5614 if (cmd == &asender_tbl[P_PING_ACK]) {
5615 /* restore idle timeout */
bde89a9e 5616 connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
44ed167d
PR
5617 ping_timeout_active = false;
5618 }
f36af18c 5619
bde89a9e 5620 buf = connection->meta.rbuf;
b411b363 5621 received = 0;
52b061a4 5622 expect = header_size;
b411b363
PR
5623 cmd = NULL;
5624 }
5625 }
5626
5627 if (0) {
5628reconnect:
bde89a9e
AG
5629 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5630 conn_md_sync(connection);
b411b363
PR
5631 }
5632 if (0) {
5633disconnect:
bde89a9e 5634 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363 5635 }
bde89a9e 5636 clear_bit(SIGNAL_ASENDER, &connection->flags);
b411b363 5637
1ec861eb 5638 drbd_info(connection, "asender terminated\n");
b411b363
PR
5639
5640 return 0;
5641}
This page took 0.727504 seconds and 5 git commands to generate.