libceph: use a do..while loop in con_work()
[deliverable/linux.git] / drivers / block / rbd.c
CommitLineData
602adf40
YS
1/*
2 rbd.c -- Export ceph rados objects as a Linux block device
3
4
5 based on drivers/block/osdblk.c:
6
7 Copyright 2009 Red Hat, Inc.
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; see the file COPYING. If not, write to
20 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
21
22
23
dfc5606d 24 For usage instructions, please refer to:
602adf40 25
dfc5606d 26 Documentation/ABI/testing/sysfs-bus-rbd
602adf40
YS
27
28 */
29
30#include <linux/ceph/libceph.h>
31#include <linux/ceph/osd_client.h>
32#include <linux/ceph/mon_client.h>
33#include <linux/ceph/decode.h>
59c2be1e 34#include <linux/parser.h>
602adf40
YS
35
36#include <linux/kernel.h>
37#include <linux/device.h>
38#include <linux/module.h>
39#include <linux/fs.h>
40#include <linux/blkdev.h>
41
42#include "rbd_types.h"
43
aafb230e
AE
44#define RBD_DEBUG /* Activate rbd_assert() calls */
45
593a9e7b
AE
46/*
47 * The basic unit of block I/O is a sector. It is interpreted in a
48 * number of contexts in Linux (blk, bio, genhd), but the default is
49 * universally 512 bytes. These symbols are just slightly more
50 * meaningful than the bare numbers they represent.
51 */
52#define SECTOR_SHIFT 9
53#define SECTOR_SIZE (1ULL << SECTOR_SHIFT)
54
2647ba38 55/* It might be useful to have these defined elsewhere */
df111be6 56
2647ba38
AE
57#define U8_MAX ((u8) (~0U))
58#define U16_MAX ((u16) (~0U))
59#define U32_MAX ((u32) (~0U))
60#define U64_MAX ((u64) (~0ULL))
df111be6 61
f0f8cef5
AE
62#define RBD_DRV_NAME "rbd"
63#define RBD_DRV_NAME_LONG "rbd (rados block device)"
602adf40
YS
64
65#define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */
66
d4b125e9
AE
67#define RBD_SNAP_DEV_NAME_PREFIX "snap_"
68#define RBD_MAX_SNAP_NAME_LEN \
69 (NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1))
70
35d489f9 71#define RBD_MAX_SNAP_COUNT 510 /* allows max snapc to fit in 4KB */
602adf40
YS
72
73#define RBD_SNAP_HEAD_NAME "-"
74
9e15b77d
AE
75/* This allows a single page to hold an image name sent by OSD */
76#define RBD_IMAGE_NAME_LEN_MAX (PAGE_SIZE - sizeof (__le32) - 1)
1e130199 77#define RBD_IMAGE_ID_LEN_MAX 64
9e15b77d 78
1e130199 79#define RBD_OBJ_PREFIX_LEN_MAX 64
589d30e0 80
d889140c
AE
81/* Feature bits */
82
83#define RBD_FEATURE_LAYERING 1
84
85/* Features supported by this (client software) implementation. */
86
87#define RBD_FEATURES_ALL (0)
88
81a89793
AE
89/*
90 * An RBD device name will be "rbd#", where the "rbd" comes from
91 * RBD_DRV_NAME above, and # is a unique integer identifier.
92 * MAX_INT_FORMAT_WIDTH is used in ensuring DEV_NAME_LEN is big
93 * enough to hold all possible device names.
94 */
602adf40 95#define DEV_NAME_LEN 32
81a89793 96#define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1)
602adf40
YS
97
98/*
99 * block device image metadata (in-memory version)
100 */
101struct rbd_image_header {
f84344f3 102 /* These four fields never change for a given rbd image */
849b4260 103 char *object_prefix;
34b13184 104 u64 features;
602adf40
YS
105 __u8 obj_order;
106 __u8 crypt_type;
107 __u8 comp_type;
602adf40 108
f84344f3
AE
109 /* The remaining fields need to be updated occasionally */
110 u64 image_size;
111 struct ceph_snap_context *snapc;
602adf40
YS
112 char *snap_names;
113 u64 *snap_sizes;
59c2be1e
YS
114
115 u64 obj_version;
116};
117
0d7dbfce
AE
118/*
119 * An rbd image specification.
120 *
121 * The tuple (pool_id, image_id, snap_id) is sufficient to uniquely
c66c6e0c
AE
122 * identify an image. Each rbd_dev structure includes a pointer to
123 * an rbd_spec structure that encapsulates this identity.
124 *
125 * Each of the id's in an rbd_spec has an associated name. For a
126 * user-mapped image, the names are supplied and the id's associated
127 * with them are looked up. For a layered image, a parent image is
128 * defined by the tuple, and the names are looked up.
129 *
130 * An rbd_dev structure contains a parent_spec pointer which is
131 * non-null if the image it represents is a child in a layered
132 * image. This pointer will refer to the rbd_spec structure used
133 * by the parent rbd_dev for its own identity (i.e., the structure
134 * is shared between the parent and child).
135 *
136 * Since these structures are populated once, during the discovery
137 * phase of image construction, they are effectively immutable so
138 * we make no effort to synchronize access to them.
139 *
140 * Note that code herein does not assume the image name is known (it
141 * could be a null pointer).
0d7dbfce
AE
142 */
143struct rbd_spec {
144 u64 pool_id;
145 char *pool_name;
146
147 char *image_id;
0d7dbfce 148 char *image_name;
0d7dbfce
AE
149
150 u64 snap_id;
151 char *snap_name;
152
153 struct kref kref;
154};
155
602adf40 156/*
f0f8cef5 157 * an instance of the client. multiple devices may share an rbd client.
602adf40
YS
158 */
159struct rbd_client {
160 struct ceph_client *client;
161 struct kref kref;
162 struct list_head node;
163};
164
bf0d5f50
AE
165struct rbd_img_request;
166typedef void (*rbd_img_callback_t)(struct rbd_img_request *);
167
168#define BAD_WHICH U32_MAX /* Good which or bad which, which? */
169
170struct rbd_obj_request;
171typedef void (*rbd_obj_callback_t)(struct rbd_obj_request *);
172
9969ebc5
AE
173enum obj_request_type {
174 OBJ_REQUEST_NODATA, OBJ_REQUEST_BIO, OBJ_REQUEST_PAGES
175};
bf0d5f50
AE
176
177struct rbd_obj_request {
178 const char *object_name;
179 u64 offset; /* object start byte */
180 u64 length; /* bytes from offset */
181
182 struct rbd_img_request *img_request;
183 struct list_head links; /* img_request->obj_requests */
184 u32 which; /* posn image request list */
185
186 enum obj_request_type type;
788e2df3
AE
187 union {
188 struct bio *bio_list;
189 struct {
190 struct page **pages;
191 u32 page_count;
192 };
193 };
bf0d5f50
AE
194
195 struct ceph_osd_request *osd_req;
196
197 u64 xferred; /* bytes transferred */
198 u64 version;
199 s32 result;
200 atomic_t done;
201
202 rbd_obj_callback_t callback;
788e2df3 203 struct completion completion;
bf0d5f50
AE
204
205 struct kref kref;
206};
207
208struct rbd_img_request {
209 struct request *rq;
210 struct rbd_device *rbd_dev;
211 u64 offset; /* starting image byte offset */
212 u64 length; /* byte count from offset */
213 bool write_request; /* false for read */
214 union {
215 struct ceph_snap_context *snapc; /* for writes */
216 u64 snap_id; /* for reads */
217 };
218 spinlock_t completion_lock;/* protects next_completion */
219 u32 next_completion;
220 rbd_img_callback_t callback;
221
222 u32 obj_request_count;
223 struct list_head obj_requests; /* rbd_obj_request structs */
224
225 struct kref kref;
226};
227
228#define for_each_obj_request(ireq, oreq) \
ef06f4d3 229 list_for_each_entry(oreq, &(ireq)->obj_requests, links)
bf0d5f50 230#define for_each_obj_request_from(ireq, oreq) \
ef06f4d3 231 list_for_each_entry_from(oreq, &(ireq)->obj_requests, links)
bf0d5f50 232#define for_each_obj_request_safe(ireq, oreq, n) \
ef06f4d3 233 list_for_each_entry_safe_reverse(oreq, n, &(ireq)->obj_requests, links)
bf0d5f50 234
dfc5606d
YS
235struct rbd_snap {
236 struct device dev;
237 const char *name;
3591538f 238 u64 size;
dfc5606d
YS
239 struct list_head node;
240 u64 id;
34b13184 241 u64 features;
dfc5606d
YS
242};
243
f84344f3 244struct rbd_mapping {
99c1f08f 245 u64 size;
34b13184 246 u64 features;
f84344f3
AE
247 bool read_only;
248};
249
602adf40
YS
250/*
251 * a single device
252 */
253struct rbd_device {
de71a297 254 int dev_id; /* blkdev unique id */
602adf40
YS
255
256 int major; /* blkdev assigned major */
257 struct gendisk *disk; /* blkdev's gendisk and rq */
602adf40 258
a30b71b9 259 u32 image_format; /* Either 1 or 2 */
602adf40
YS
260 struct rbd_client *rbd_client;
261
262 char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */
263
b82d167b 264 spinlock_t lock; /* queue, flags, open_count */
602adf40
YS
265
266 struct rbd_image_header header;
b82d167b 267 unsigned long flags; /* possibly lock protected */
0d7dbfce 268 struct rbd_spec *spec;
602adf40 269
0d7dbfce 270 char *header_name;
971f839a 271
0903e875
AE
272 struct ceph_file_layout layout;
273
59c2be1e 274 struct ceph_osd_event *watch_event;
975241af 275 struct rbd_obj_request *watch_request;
59c2be1e 276
86b00e0d
AE
277 struct rbd_spec *parent_spec;
278 u64 parent_overlap;
279
c666601a
JD
280 /* protects updating the header */
281 struct rw_semaphore header_rwsem;
f84344f3
AE
282
283 struct rbd_mapping mapping;
602adf40
YS
284
285 struct list_head node;
dfc5606d
YS
286
287 /* list of snapshots */
288 struct list_head snaps;
289
290 /* sysfs related */
291 struct device dev;
b82d167b 292 unsigned long open_count; /* protected by lock */
dfc5606d
YS
293};
294
b82d167b
AE
295/*
296 * Flag bits for rbd_dev->flags. If atomicity is required,
297 * rbd_dev->lock is used to protect access.
298 *
299 * Currently, only the "removing" flag (which is coupled with the
300 * "open_count" field) requires atomic access.
301 */
6d292906
AE
302enum rbd_dev_flags {
303 RBD_DEV_FLAG_EXISTS, /* mapped snapshot has not been deleted */
b82d167b 304 RBD_DEV_FLAG_REMOVING, /* this mapping is being removed */
6d292906
AE
305};
306
602adf40 307static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */
e124a82f 308
602adf40 309static LIST_HEAD(rbd_dev_list); /* devices */
e124a82f
AE
310static DEFINE_SPINLOCK(rbd_dev_list_lock);
311
432b8587
AE
312static LIST_HEAD(rbd_client_list); /* clients */
313static DEFINE_SPINLOCK(rbd_client_list_lock);
602adf40 314
304f6808
AE
315static int rbd_dev_snaps_update(struct rbd_device *rbd_dev);
316static int rbd_dev_snaps_register(struct rbd_device *rbd_dev);
317
dfc5606d 318static void rbd_dev_release(struct device *dev);
41f38c2b 319static void rbd_remove_snap_dev(struct rbd_snap *snap);
dfc5606d 320
f0f8cef5
AE
321static ssize_t rbd_add(struct bus_type *bus, const char *buf,
322 size_t count);
323static ssize_t rbd_remove(struct bus_type *bus, const char *buf,
324 size_t count);
325
326static struct bus_attribute rbd_bus_attrs[] = {
327 __ATTR(add, S_IWUSR, NULL, rbd_add),
328 __ATTR(remove, S_IWUSR, NULL, rbd_remove),
329 __ATTR_NULL
330};
331
332static struct bus_type rbd_bus_type = {
333 .name = "rbd",
334 .bus_attrs = rbd_bus_attrs,
335};
336
337static void rbd_root_dev_release(struct device *dev)
338{
339}
340
341static struct device rbd_root_dev = {
342 .init_name = "rbd",
343 .release = rbd_root_dev_release,
344};
345
06ecc6cb
AE
346static __printf(2, 3)
347void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...)
348{
349 struct va_format vaf;
350 va_list args;
351
352 va_start(args, fmt);
353 vaf.fmt = fmt;
354 vaf.va = &args;
355
356 if (!rbd_dev)
357 printk(KERN_WARNING "%s: %pV\n", RBD_DRV_NAME, &vaf);
358 else if (rbd_dev->disk)
359 printk(KERN_WARNING "%s: %s: %pV\n",
360 RBD_DRV_NAME, rbd_dev->disk->disk_name, &vaf);
361 else if (rbd_dev->spec && rbd_dev->spec->image_name)
362 printk(KERN_WARNING "%s: image %s: %pV\n",
363 RBD_DRV_NAME, rbd_dev->spec->image_name, &vaf);
364 else if (rbd_dev->spec && rbd_dev->spec->image_id)
365 printk(KERN_WARNING "%s: id %s: %pV\n",
366 RBD_DRV_NAME, rbd_dev->spec->image_id, &vaf);
367 else /* punt */
368 printk(KERN_WARNING "%s: rbd_dev %p: %pV\n",
369 RBD_DRV_NAME, rbd_dev, &vaf);
370 va_end(args);
371}
372
aafb230e
AE
373#ifdef RBD_DEBUG
374#define rbd_assert(expr) \
375 if (unlikely(!(expr))) { \
376 printk(KERN_ERR "\nAssertion failure in %s() " \
377 "at line %d:\n\n" \
378 "\trbd_assert(%s);\n\n", \
379 __func__, __LINE__, #expr); \
380 BUG(); \
381 }
382#else /* !RBD_DEBUG */
383# define rbd_assert(expr) ((void) 0)
384#endif /* !RBD_DEBUG */
dfc5606d 385
117973fb
AE
386static int rbd_dev_refresh(struct rbd_device *rbd_dev, u64 *hver);
387static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver);
59c2be1e 388
602adf40
YS
389static int rbd_open(struct block_device *bdev, fmode_t mode)
390{
f0f8cef5 391 struct rbd_device *rbd_dev = bdev->bd_disk->private_data;
b82d167b 392 bool removing = false;
602adf40 393
f84344f3 394 if ((mode & FMODE_WRITE) && rbd_dev->mapping.read_only)
602adf40
YS
395 return -EROFS;
396
a14ea269 397 spin_lock_irq(&rbd_dev->lock);
b82d167b
AE
398 if (test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags))
399 removing = true;
400 else
401 rbd_dev->open_count++;
a14ea269 402 spin_unlock_irq(&rbd_dev->lock);
b82d167b
AE
403 if (removing)
404 return -ENOENT;
405
42382b70 406 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
c3e946ce 407 (void) get_device(&rbd_dev->dev);
f84344f3 408 set_device_ro(bdev, rbd_dev->mapping.read_only);
42382b70 409 mutex_unlock(&ctl_mutex);
340c7a2b 410
602adf40
YS
411 return 0;
412}
413
dfc5606d
YS
414static int rbd_release(struct gendisk *disk, fmode_t mode)
415{
416 struct rbd_device *rbd_dev = disk->private_data;
b82d167b
AE
417 unsigned long open_count_before;
418
a14ea269 419 spin_lock_irq(&rbd_dev->lock);
b82d167b 420 open_count_before = rbd_dev->open_count--;
a14ea269 421 spin_unlock_irq(&rbd_dev->lock);
b82d167b 422 rbd_assert(open_count_before > 0);
dfc5606d 423
42382b70 424 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
c3e946ce 425 put_device(&rbd_dev->dev);
42382b70 426 mutex_unlock(&ctl_mutex);
dfc5606d
YS
427
428 return 0;
429}
430
602adf40
YS
431static const struct block_device_operations rbd_bd_ops = {
432 .owner = THIS_MODULE,
433 .open = rbd_open,
dfc5606d 434 .release = rbd_release,
602adf40
YS
435};
436
437/*
438 * Initialize an rbd client instance.
43ae4701 439 * We own *ceph_opts.
602adf40 440 */
f8c38929 441static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts)
602adf40
YS
442{
443 struct rbd_client *rbdc;
444 int ret = -ENOMEM;
445
37206ee5 446 dout("%s:\n", __func__);
602adf40
YS
447 rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL);
448 if (!rbdc)
449 goto out_opt;
450
451 kref_init(&rbdc->kref);
452 INIT_LIST_HEAD(&rbdc->node);
453
bc534d86
AE
454 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
455
43ae4701 456 rbdc->client = ceph_create_client(ceph_opts, rbdc, 0, 0);
602adf40 457 if (IS_ERR(rbdc->client))
bc534d86 458 goto out_mutex;
43ae4701 459 ceph_opts = NULL; /* Now rbdc->client is responsible for ceph_opts */
602adf40
YS
460
461 ret = ceph_open_session(rbdc->client);
462 if (ret < 0)
463 goto out_err;
464
432b8587 465 spin_lock(&rbd_client_list_lock);
602adf40 466 list_add_tail(&rbdc->node, &rbd_client_list);
432b8587 467 spin_unlock(&rbd_client_list_lock);
602adf40 468
bc534d86 469 mutex_unlock(&ctl_mutex);
37206ee5 470 dout("%s: rbdc %p\n", __func__, rbdc);
bc534d86 471
602adf40
YS
472 return rbdc;
473
474out_err:
475 ceph_destroy_client(rbdc->client);
bc534d86
AE
476out_mutex:
477 mutex_unlock(&ctl_mutex);
602adf40
YS
478 kfree(rbdc);
479out_opt:
43ae4701
AE
480 if (ceph_opts)
481 ceph_destroy_options(ceph_opts);
37206ee5
AE
482 dout("%s: error %d\n", __func__, ret);
483
28f259b7 484 return ERR_PTR(ret);
602adf40
YS
485}
486
487/*
1f7ba331
AE
488 * Find a ceph client with specific addr and configuration. If
489 * found, bump its reference count.
602adf40 490 */
1f7ba331 491static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts)
602adf40
YS
492{
493 struct rbd_client *client_node;
1f7ba331 494 bool found = false;
602adf40 495
43ae4701 496 if (ceph_opts->flags & CEPH_OPT_NOSHARE)
602adf40
YS
497 return NULL;
498
1f7ba331
AE
499 spin_lock(&rbd_client_list_lock);
500 list_for_each_entry(client_node, &rbd_client_list, node) {
501 if (!ceph_compare_options(ceph_opts, client_node->client)) {
502 kref_get(&client_node->kref);
503 found = true;
504 break;
505 }
506 }
507 spin_unlock(&rbd_client_list_lock);
508
509 return found ? client_node : NULL;
602adf40
YS
510}
511
59c2be1e
YS
512/*
513 * mount options
514 */
515enum {
59c2be1e
YS
516 Opt_last_int,
517 /* int args above */
518 Opt_last_string,
519 /* string args above */
cc0538b6
AE
520 Opt_read_only,
521 Opt_read_write,
522 /* Boolean args above */
523 Opt_last_bool,
59c2be1e
YS
524};
525
43ae4701 526static match_table_t rbd_opts_tokens = {
59c2be1e
YS
527 /* int args above */
528 /* string args above */
be466c1c 529 {Opt_read_only, "read_only"},
cc0538b6
AE
530 {Opt_read_only, "ro"}, /* Alternate spelling */
531 {Opt_read_write, "read_write"},
532 {Opt_read_write, "rw"}, /* Alternate spelling */
533 /* Boolean args above */
59c2be1e
YS
534 {-1, NULL}
535};
536
98571b5a
AE
537struct rbd_options {
538 bool read_only;
539};
540
541#define RBD_READ_ONLY_DEFAULT false
542
59c2be1e
YS
543static int parse_rbd_opts_token(char *c, void *private)
544{
43ae4701 545 struct rbd_options *rbd_opts = private;
59c2be1e
YS
546 substring_t argstr[MAX_OPT_ARGS];
547 int token, intval, ret;
548
43ae4701 549 token = match_token(c, rbd_opts_tokens, argstr);
59c2be1e
YS
550 if (token < 0)
551 return -EINVAL;
552
553 if (token < Opt_last_int) {
554 ret = match_int(&argstr[0], &intval);
555 if (ret < 0) {
556 pr_err("bad mount option arg (not int) "
557 "at '%s'\n", c);
558 return ret;
559 }
560 dout("got int token %d val %d\n", token, intval);
561 } else if (token > Opt_last_int && token < Opt_last_string) {
562 dout("got string token %d val %s\n", token,
563 argstr[0].from);
cc0538b6
AE
564 } else if (token > Opt_last_string && token < Opt_last_bool) {
565 dout("got Boolean token %d\n", token);
59c2be1e
YS
566 } else {
567 dout("got token %d\n", token);
568 }
569
570 switch (token) {
cc0538b6
AE
571 case Opt_read_only:
572 rbd_opts->read_only = true;
573 break;
574 case Opt_read_write:
575 rbd_opts->read_only = false;
576 break;
59c2be1e 577 default:
aafb230e
AE
578 rbd_assert(false);
579 break;
59c2be1e
YS
580 }
581 return 0;
582}
583
602adf40
YS
584/*
585 * Get a ceph client with specific addr and configuration, if one does
586 * not exist create it.
587 */
9d3997fd 588static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts)
602adf40 589{
f8c38929 590 struct rbd_client *rbdc;
59c2be1e 591
1f7ba331 592 rbdc = rbd_client_find(ceph_opts);
9d3997fd 593 if (rbdc) /* using an existing client */
43ae4701 594 ceph_destroy_options(ceph_opts);
9d3997fd 595 else
f8c38929 596 rbdc = rbd_client_create(ceph_opts);
602adf40 597
9d3997fd 598 return rbdc;
602adf40
YS
599}
600
601/*
602 * Destroy ceph client
d23a4b3f 603 *
432b8587 604 * Caller must hold rbd_client_list_lock.
602adf40
YS
605 */
606static void rbd_client_release(struct kref *kref)
607{
608 struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref);
609
37206ee5 610 dout("%s: rbdc %p\n", __func__, rbdc);
cd9d9f5d 611 spin_lock(&rbd_client_list_lock);
602adf40 612 list_del(&rbdc->node);
cd9d9f5d 613 spin_unlock(&rbd_client_list_lock);
602adf40
YS
614
615 ceph_destroy_client(rbdc->client);
616 kfree(rbdc);
617}
618
619/*
620 * Drop reference to ceph client node. If it's not referenced anymore, release
621 * it.
622 */
9d3997fd 623static void rbd_put_client(struct rbd_client *rbdc)
602adf40 624{
c53d5893
AE
625 if (rbdc)
626 kref_put(&rbdc->kref, rbd_client_release);
602adf40
YS
627}
628
a30b71b9
AE
629static bool rbd_image_format_valid(u32 image_format)
630{
631 return image_format == 1 || image_format == 2;
632}
633
8e94af8e
AE
634static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk)
635{
103a150f
AE
636 size_t size;
637 u32 snap_count;
638
639 /* The header has to start with the magic rbd header text */
640 if (memcmp(&ondisk->text, RBD_HEADER_TEXT, sizeof (RBD_HEADER_TEXT)))
641 return false;
642
db2388b6
AE
643 /* The bio layer requires at least sector-sized I/O */
644
645 if (ondisk->options.order < SECTOR_SHIFT)
646 return false;
647
648 /* If we use u64 in a few spots we may be able to loosen this */
649
650 if (ondisk->options.order > 8 * sizeof (int) - 1)
651 return false;
652
103a150f
AE
653 /*
654 * The size of a snapshot header has to fit in a size_t, and
655 * that limits the number of snapshots.
656 */
657 snap_count = le32_to_cpu(ondisk->snap_count);
658 size = SIZE_MAX - sizeof (struct ceph_snap_context);
659 if (snap_count > size / sizeof (__le64))
660 return false;
661
662 /*
663 * Not only that, but the size of the entire the snapshot
664 * header must also be representable in a size_t.
665 */
666 size -= snap_count * sizeof (__le64);
667 if ((u64) size < le64_to_cpu(ondisk->snap_names_len))
668 return false;
669
670 return true;
8e94af8e
AE
671}
672
602adf40
YS
673/*
674 * Create a new header structure, translate header format from the on-disk
675 * header.
676 */
677static int rbd_header_from_disk(struct rbd_image_header *header,
4156d998 678 struct rbd_image_header_ondisk *ondisk)
602adf40 679{
ccece235 680 u32 snap_count;
58c17b0e 681 size_t len;
d2bb24e5 682 size_t size;
621901d6 683 u32 i;
602adf40 684
6a52325f
AE
685 memset(header, 0, sizeof (*header));
686
103a150f
AE
687 snap_count = le32_to_cpu(ondisk->snap_count);
688
58c17b0e
AE
689 len = strnlen(ondisk->object_prefix, sizeof (ondisk->object_prefix));
690 header->object_prefix = kmalloc(len + 1, GFP_KERNEL);
6a52325f 691 if (!header->object_prefix)
602adf40 692 return -ENOMEM;
58c17b0e
AE
693 memcpy(header->object_prefix, ondisk->object_prefix, len);
694 header->object_prefix[len] = '\0';
00f1f36f 695
602adf40 696 if (snap_count) {
f785cc1d
AE
697 u64 snap_names_len = le64_to_cpu(ondisk->snap_names_len);
698
621901d6
AE
699 /* Save a copy of the snapshot names */
700
f785cc1d
AE
701 if (snap_names_len > (u64) SIZE_MAX)
702 return -EIO;
703 header->snap_names = kmalloc(snap_names_len, GFP_KERNEL);
602adf40 704 if (!header->snap_names)
6a52325f 705 goto out_err;
f785cc1d
AE
706 /*
707 * Note that rbd_dev_v1_header_read() guarantees
708 * the ondisk buffer we're working with has
709 * snap_names_len bytes beyond the end of the
710 * snapshot id array, this memcpy() is safe.
711 */
712 memcpy(header->snap_names, &ondisk->snaps[snap_count],
713 snap_names_len);
6a52325f 714
621901d6
AE
715 /* Record each snapshot's size */
716
d2bb24e5
AE
717 size = snap_count * sizeof (*header->snap_sizes);
718 header->snap_sizes = kmalloc(size, GFP_KERNEL);
602adf40 719 if (!header->snap_sizes)
6a52325f 720 goto out_err;
621901d6
AE
721 for (i = 0; i < snap_count; i++)
722 header->snap_sizes[i] =
723 le64_to_cpu(ondisk->snaps[i].image_size);
602adf40 724 } else {
ccece235 725 WARN_ON(ondisk->snap_names_len);
602adf40
YS
726 header->snap_names = NULL;
727 header->snap_sizes = NULL;
728 }
849b4260 729
34b13184 730 header->features = 0; /* No features support in v1 images */
602adf40
YS
731 header->obj_order = ondisk->options.order;
732 header->crypt_type = ondisk->options.crypt_type;
733 header->comp_type = ondisk->options.comp_type;
6a52325f 734
621901d6
AE
735 /* Allocate and fill in the snapshot context */
736
f84344f3 737 header->image_size = le64_to_cpu(ondisk->image_size);
6a52325f
AE
738 size = sizeof (struct ceph_snap_context);
739 size += snap_count * sizeof (header->snapc->snaps[0]);
740 header->snapc = kzalloc(size, GFP_KERNEL);
741 if (!header->snapc)
742 goto out_err;
602adf40
YS
743
744 atomic_set(&header->snapc->nref, 1);
505cbb9b 745 header->snapc->seq = le64_to_cpu(ondisk->snap_seq);
602adf40 746 header->snapc->num_snaps = snap_count;
621901d6
AE
747 for (i = 0; i < snap_count; i++)
748 header->snapc->snaps[i] =
749 le64_to_cpu(ondisk->snaps[i].id);
602adf40
YS
750
751 return 0;
752
6a52325f 753out_err:
849b4260 754 kfree(header->snap_sizes);
ccece235 755 header->snap_sizes = NULL;
602adf40 756 kfree(header->snap_names);
ccece235 757 header->snap_names = NULL;
6a52325f
AE
758 kfree(header->object_prefix);
759 header->object_prefix = NULL;
ccece235 760
00f1f36f 761 return -ENOMEM;
602adf40
YS
762}
763
9e15b77d
AE
764static const char *rbd_snap_name(struct rbd_device *rbd_dev, u64 snap_id)
765{
766 struct rbd_snap *snap;
767
768 if (snap_id == CEPH_NOSNAP)
769 return RBD_SNAP_HEAD_NAME;
770
771 list_for_each_entry(snap, &rbd_dev->snaps, node)
772 if (snap_id == snap->id)
773 return snap->name;
774
775 return NULL;
776}
777
8836b995 778static int snap_by_name(struct rbd_device *rbd_dev, const char *snap_name)
602adf40 779{
602adf40 780
e86924a8 781 struct rbd_snap *snap;
602adf40 782
e86924a8
AE
783 list_for_each_entry(snap, &rbd_dev->snaps, node) {
784 if (!strcmp(snap_name, snap->name)) {
0d7dbfce 785 rbd_dev->spec->snap_id = snap->id;
e86924a8 786 rbd_dev->mapping.size = snap->size;
34b13184 787 rbd_dev->mapping.features = snap->features;
602adf40 788
e86924a8 789 return 0;
00f1f36f 790 }
00f1f36f 791 }
e86924a8 792
00f1f36f 793 return -ENOENT;
602adf40
YS
794}
795
819d52bf 796static int rbd_dev_set_mapping(struct rbd_device *rbd_dev)
602adf40 797{
78dc447d 798 int ret;
602adf40 799
0d7dbfce 800 if (!memcmp(rbd_dev->spec->snap_name, RBD_SNAP_HEAD_NAME,
cc9d734c 801 sizeof (RBD_SNAP_HEAD_NAME))) {
0d7dbfce 802 rbd_dev->spec->snap_id = CEPH_NOSNAP;
99c1f08f 803 rbd_dev->mapping.size = rbd_dev->header.image_size;
34b13184 804 rbd_dev->mapping.features = rbd_dev->header.features;
e86924a8 805 ret = 0;
602adf40 806 } else {
0d7dbfce 807 ret = snap_by_name(rbd_dev, rbd_dev->spec->snap_name);
602adf40
YS
808 if (ret < 0)
809 goto done;
f84344f3 810 rbd_dev->mapping.read_only = true;
602adf40 811 }
6d292906
AE
812 set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
813
602adf40 814done:
602adf40
YS
815 return ret;
816}
817
818static void rbd_header_free(struct rbd_image_header *header)
819{
849b4260 820 kfree(header->object_prefix);
d78fd7ae 821 header->object_prefix = NULL;
602adf40 822 kfree(header->snap_sizes);
d78fd7ae 823 header->snap_sizes = NULL;
849b4260 824 kfree(header->snap_names);
d78fd7ae 825 header->snap_names = NULL;
d1d25646 826 ceph_put_snap_context(header->snapc);
d78fd7ae 827 header->snapc = NULL;
602adf40
YS
828}
829
98571b5a 830static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset)
602adf40 831{
65ccfe21
AE
832 char *name;
833 u64 segment;
834 int ret;
602adf40 835
2fd82b9e 836 name = kmalloc(MAX_OBJ_NAME_SIZE + 1, GFP_NOIO);
65ccfe21
AE
837 if (!name)
838 return NULL;
839 segment = offset >> rbd_dev->header.obj_order;
2fd82b9e 840 ret = snprintf(name, MAX_OBJ_NAME_SIZE + 1, "%s.%012llx",
65ccfe21 841 rbd_dev->header.object_prefix, segment);
2fd82b9e 842 if (ret < 0 || ret > MAX_OBJ_NAME_SIZE) {
65ccfe21
AE
843 pr_err("error formatting segment name for #%llu (%d)\n",
844 segment, ret);
845 kfree(name);
846 name = NULL;
847 }
602adf40 848
65ccfe21
AE
849 return name;
850}
602adf40 851
65ccfe21
AE
852static u64 rbd_segment_offset(struct rbd_device *rbd_dev, u64 offset)
853{
854 u64 segment_size = (u64) 1 << rbd_dev->header.obj_order;
602adf40 855
65ccfe21
AE
856 return offset & (segment_size - 1);
857}
858
859static u64 rbd_segment_length(struct rbd_device *rbd_dev,
860 u64 offset, u64 length)
861{
862 u64 segment_size = (u64) 1 << rbd_dev->header.obj_order;
863
864 offset &= segment_size - 1;
865
aafb230e 866 rbd_assert(length <= U64_MAX - offset);
65ccfe21
AE
867 if (offset + length > segment_size)
868 length = segment_size - offset;
869
870 return length;
602adf40
YS
871}
872
029bcbd8
JD
873/*
874 * returns the size of an object in the image
875 */
876static u64 rbd_obj_bytes(struct rbd_image_header *header)
877{
878 return 1 << header->obj_order;
879}
880
602adf40
YS
881/*
882 * bio helpers
883 */
884
885static void bio_chain_put(struct bio *chain)
886{
887 struct bio *tmp;
888
889 while (chain) {
890 tmp = chain;
891 chain = chain->bi_next;
892 bio_put(tmp);
893 }
894}
895
896/*
897 * zeros a bio chain, starting at specific offset
898 */
899static void zero_bio_chain(struct bio *chain, int start_ofs)
900{
901 struct bio_vec *bv;
902 unsigned long flags;
903 void *buf;
904 int i;
905 int pos = 0;
906
907 while (chain) {
908 bio_for_each_segment(bv, chain, i) {
909 if (pos + bv->bv_len > start_ofs) {
910 int remainder = max(start_ofs - pos, 0);
911 buf = bvec_kmap_irq(bv, &flags);
912 memset(buf + remainder, 0,
913 bv->bv_len - remainder);
85b5aaa6 914 bvec_kunmap_irq(buf, &flags);
602adf40
YS
915 }
916 pos += bv->bv_len;
917 }
918
919 chain = chain->bi_next;
920 }
921}
922
923/*
f7760dad
AE
924 * Clone a portion of a bio, starting at the given byte offset
925 * and continuing for the number of bytes indicated.
602adf40 926 */
f7760dad
AE
927static struct bio *bio_clone_range(struct bio *bio_src,
928 unsigned int offset,
929 unsigned int len,
930 gfp_t gfpmask)
602adf40 931{
f7760dad
AE
932 struct bio_vec *bv;
933 unsigned int resid;
934 unsigned short idx;
935 unsigned int voff;
936 unsigned short end_idx;
937 unsigned short vcnt;
938 struct bio *bio;
939
940 /* Handle the easy case for the caller */
941
942 if (!offset && len == bio_src->bi_size)
943 return bio_clone(bio_src, gfpmask);
944
945 if (WARN_ON_ONCE(!len))
946 return NULL;
947 if (WARN_ON_ONCE(len > bio_src->bi_size))
948 return NULL;
949 if (WARN_ON_ONCE(offset > bio_src->bi_size - len))
950 return NULL;
951
952 /* Find first affected segment... */
953
954 resid = offset;
955 __bio_for_each_segment(bv, bio_src, idx, 0) {
956 if (resid < bv->bv_len)
957 break;
958 resid -= bv->bv_len;
602adf40 959 }
f7760dad 960 voff = resid;
602adf40 961
f7760dad 962 /* ...and the last affected segment */
602adf40 963
f7760dad
AE
964 resid += len;
965 __bio_for_each_segment(bv, bio_src, end_idx, idx) {
966 if (resid <= bv->bv_len)
967 break;
968 resid -= bv->bv_len;
969 }
970 vcnt = end_idx - idx + 1;
971
972 /* Build the clone */
973
974 bio = bio_alloc(gfpmask, (unsigned int) vcnt);
975 if (!bio)
976 return NULL; /* ENOMEM */
602adf40 977
f7760dad
AE
978 bio->bi_bdev = bio_src->bi_bdev;
979 bio->bi_sector = bio_src->bi_sector + (offset >> SECTOR_SHIFT);
980 bio->bi_rw = bio_src->bi_rw;
981 bio->bi_flags |= 1 << BIO_CLONED;
982
983 /*
984 * Copy over our part of the bio_vec, then update the first
985 * and last (or only) entries.
986 */
987 memcpy(&bio->bi_io_vec[0], &bio_src->bi_io_vec[idx],
988 vcnt * sizeof (struct bio_vec));
989 bio->bi_io_vec[0].bv_offset += voff;
990 if (vcnt > 1) {
991 bio->bi_io_vec[0].bv_len -= voff;
992 bio->bi_io_vec[vcnt - 1].bv_len = resid;
993 } else {
994 bio->bi_io_vec[0].bv_len = len;
602adf40
YS
995 }
996
f7760dad
AE
997 bio->bi_vcnt = vcnt;
998 bio->bi_size = len;
999 bio->bi_idx = 0;
1000
1001 return bio;
1002}
1003
1004/*
1005 * Clone a portion of a bio chain, starting at the given byte offset
1006 * into the first bio in the source chain and continuing for the
1007 * number of bytes indicated. The result is another bio chain of
1008 * exactly the given length, or a null pointer on error.
1009 *
1010 * The bio_src and offset parameters are both in-out. On entry they
1011 * refer to the first source bio and the offset into that bio where
1012 * the start of data to be cloned is located.
1013 *
1014 * On return, bio_src is updated to refer to the bio in the source
1015 * chain that contains first un-cloned byte, and *offset will
1016 * contain the offset of that byte within that bio.
1017 */
1018static struct bio *bio_chain_clone_range(struct bio **bio_src,
1019 unsigned int *offset,
1020 unsigned int len,
1021 gfp_t gfpmask)
1022{
1023 struct bio *bi = *bio_src;
1024 unsigned int off = *offset;
1025 struct bio *chain = NULL;
1026 struct bio **end;
1027
1028 /* Build up a chain of clone bios up to the limit */
1029
1030 if (!bi || off >= bi->bi_size || !len)
1031 return NULL; /* Nothing to clone */
602adf40 1032
f7760dad
AE
1033 end = &chain;
1034 while (len) {
1035 unsigned int bi_size;
1036 struct bio *bio;
1037
f5400b7a
AE
1038 if (!bi) {
1039 rbd_warn(NULL, "bio_chain exhausted with %u left", len);
f7760dad 1040 goto out_err; /* EINVAL; ran out of bio's */
f5400b7a 1041 }
f7760dad
AE
1042 bi_size = min_t(unsigned int, bi->bi_size - off, len);
1043 bio = bio_clone_range(bi, off, bi_size, gfpmask);
1044 if (!bio)
1045 goto out_err; /* ENOMEM */
1046
1047 *end = bio;
1048 end = &bio->bi_next;
602adf40 1049
f7760dad
AE
1050 off += bi_size;
1051 if (off == bi->bi_size) {
1052 bi = bi->bi_next;
1053 off = 0;
1054 }
1055 len -= bi_size;
1056 }
1057 *bio_src = bi;
1058 *offset = off;
1059
1060 return chain;
1061out_err:
1062 bio_chain_put(chain);
602adf40 1063
602adf40
YS
1064 return NULL;
1065}
1066
bf0d5f50
AE
1067static void rbd_obj_request_get(struct rbd_obj_request *obj_request)
1068{
37206ee5
AE
1069 dout("%s: obj %p (was %d)\n", __func__, obj_request,
1070 atomic_read(&obj_request->kref.refcount));
bf0d5f50
AE
1071 kref_get(&obj_request->kref);
1072}
1073
1074static void rbd_obj_request_destroy(struct kref *kref);
1075static void rbd_obj_request_put(struct rbd_obj_request *obj_request)
1076{
1077 rbd_assert(obj_request != NULL);
37206ee5
AE
1078 dout("%s: obj %p (was %d)\n", __func__, obj_request,
1079 atomic_read(&obj_request->kref.refcount));
bf0d5f50
AE
1080 kref_put(&obj_request->kref, rbd_obj_request_destroy);
1081}
1082
1083static void rbd_img_request_get(struct rbd_img_request *img_request)
1084{
37206ee5
AE
1085 dout("%s: img %p (was %d)\n", __func__, img_request,
1086 atomic_read(&img_request->kref.refcount));
bf0d5f50
AE
1087 kref_get(&img_request->kref);
1088}
1089
1090static void rbd_img_request_destroy(struct kref *kref);
1091static void rbd_img_request_put(struct rbd_img_request *img_request)
1092{
1093 rbd_assert(img_request != NULL);
37206ee5
AE
1094 dout("%s: img %p (was %d)\n", __func__, img_request,
1095 atomic_read(&img_request->kref.refcount));
bf0d5f50
AE
1096 kref_put(&img_request->kref, rbd_img_request_destroy);
1097}
1098
1099static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request,
1100 struct rbd_obj_request *obj_request)
1101{
25dcf954
AE
1102 rbd_assert(obj_request->img_request == NULL);
1103
bf0d5f50
AE
1104 rbd_obj_request_get(obj_request);
1105 obj_request->img_request = img_request;
25dcf954 1106 obj_request->which = img_request->obj_request_count;
bf0d5f50 1107 rbd_assert(obj_request->which != BAD_WHICH);
25dcf954
AE
1108 img_request->obj_request_count++;
1109 list_add_tail(&obj_request->links, &img_request->obj_requests);
37206ee5
AE
1110 dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request,
1111 obj_request->which);
bf0d5f50
AE
1112}
1113
1114static inline void rbd_img_obj_request_del(struct rbd_img_request *img_request,
1115 struct rbd_obj_request *obj_request)
1116{
1117 rbd_assert(obj_request->which != BAD_WHICH);
25dcf954 1118
37206ee5
AE
1119 dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request,
1120 obj_request->which);
bf0d5f50 1121 list_del(&obj_request->links);
25dcf954
AE
1122 rbd_assert(img_request->obj_request_count > 0);
1123 img_request->obj_request_count--;
1124 rbd_assert(obj_request->which == img_request->obj_request_count);
1125 obj_request->which = BAD_WHICH;
bf0d5f50 1126 rbd_assert(obj_request->img_request == img_request);
bf0d5f50 1127 obj_request->img_request = NULL;
25dcf954 1128 obj_request->callback = NULL;
bf0d5f50
AE
1129 rbd_obj_request_put(obj_request);
1130}
1131
1132static bool obj_request_type_valid(enum obj_request_type type)
1133{
1134 switch (type) {
9969ebc5 1135 case OBJ_REQUEST_NODATA:
bf0d5f50 1136 case OBJ_REQUEST_BIO:
788e2df3 1137 case OBJ_REQUEST_PAGES:
bf0d5f50
AE
1138 return true;
1139 default:
1140 return false;
1141 }
1142}
1143
cc344fa1 1144static struct ceph_osd_req_op *rbd_osd_req_op_create(u16 opcode, ...)
8d23bf29
AE
1145{
1146 struct ceph_osd_req_op *op;
1147 va_list args;
2647ba38 1148 size_t size;
8d23bf29
AE
1149
1150 op = kzalloc(sizeof (*op), GFP_NOIO);
1151 if (!op)
1152 return NULL;
1153 op->op = opcode;
1154 va_start(args, opcode);
1155 switch (opcode) {
1156 case CEPH_OSD_OP_READ:
1157 case CEPH_OSD_OP_WRITE:
1158 /* rbd_osd_req_op_create(READ, offset, length) */
1159 /* rbd_osd_req_op_create(WRITE, offset, length) */
1160 op->extent.offset = va_arg(args, u64);
1161 op->extent.length = va_arg(args, u64);
1162 if (opcode == CEPH_OSD_OP_WRITE)
1163 op->payload_len = op->extent.length;
1164 break;
fbfab539
AE
1165 case CEPH_OSD_OP_STAT:
1166 break;
2647ba38
AE
1167 case CEPH_OSD_OP_CALL:
1168 /* rbd_osd_req_op_create(CALL, class, method, data, datalen) */
1169 op->cls.class_name = va_arg(args, char *);
1170 size = strlen(op->cls.class_name);
1171 rbd_assert(size <= (size_t) U8_MAX);
1172 op->cls.class_len = size;
1173 op->payload_len = size;
1174
1175 op->cls.method_name = va_arg(args, char *);
1176 size = strlen(op->cls.method_name);
1177 rbd_assert(size <= (size_t) U8_MAX);
1178 op->cls.method_len = size;
1179 op->payload_len += size;
1180
1181 op->cls.argc = 0;
1182 op->cls.indata = va_arg(args, void *);
1183 size = va_arg(args, size_t);
1184 rbd_assert(size <= (size_t) U32_MAX);
1185 op->cls.indata_len = (u32) size;
1186 op->payload_len += size;
1187 break;
5efea49a
AE
1188 case CEPH_OSD_OP_NOTIFY_ACK:
1189 case CEPH_OSD_OP_WATCH:
1190 /* rbd_osd_req_op_create(NOTIFY_ACK, cookie, version) */
1191 /* rbd_osd_req_op_create(WATCH, cookie, version, flag) */
1192 op->watch.cookie = va_arg(args, u64);
1193 op->watch.ver = va_arg(args, u64);
1194 op->watch.ver = cpu_to_le64(op->watch.ver);
1195 if (opcode == CEPH_OSD_OP_WATCH && va_arg(args, int))
1196 op->watch.flag = (u8) 1;
1197 break;
8d23bf29
AE
1198 default:
1199 rbd_warn(NULL, "unsupported opcode %hu\n", opcode);
1200 kfree(op);
1201 op = NULL;
1202 break;
1203 }
1204 va_end(args);
1205
1206 return op;
1207}
1208
1209static void rbd_osd_req_op_destroy(struct ceph_osd_req_op *op)
1210{
1211 kfree(op);
1212}
1213
bf0d5f50
AE
1214static int rbd_obj_request_submit(struct ceph_osd_client *osdc,
1215 struct rbd_obj_request *obj_request)
1216{
37206ee5
AE
1217 dout("%s: osdc %p obj %p\n", __func__, osdc, obj_request);
1218
bf0d5f50
AE
1219 return ceph_osdc_start_request(osdc, obj_request->osd_req, false);
1220}
1221
1222static void rbd_img_request_complete(struct rbd_img_request *img_request)
1223{
37206ee5 1224 dout("%s: img %p\n", __func__, img_request);
bf0d5f50
AE
1225 if (img_request->callback)
1226 img_request->callback(img_request);
1227 else
1228 rbd_img_request_put(img_request);
1229}
1230
788e2df3
AE
1231/* Caller is responsible for rbd_obj_request_destroy(obj_request) */
1232
1233static int rbd_obj_request_wait(struct rbd_obj_request *obj_request)
1234{
37206ee5
AE
1235 dout("%s: obj %p\n", __func__, obj_request);
1236
788e2df3
AE
1237 return wait_for_completion_interruptible(&obj_request->completion);
1238}
1239
07741308
AE
1240static void obj_request_done_init(struct rbd_obj_request *obj_request)
1241{
1242 atomic_set(&obj_request->done, 0);
1243 smp_wmb();
1244}
1245
1246static void obj_request_done_set(struct rbd_obj_request *obj_request)
1247{
632b88ca
AE
1248 int done;
1249
1250 done = atomic_inc_return(&obj_request->done);
1251 if (done > 1) {
1252 struct rbd_img_request *img_request = obj_request->img_request;
1253 struct rbd_device *rbd_dev;
1254
1255 rbd_dev = img_request ? img_request->rbd_dev : NULL;
1256 rbd_warn(rbd_dev, "obj_request %p was already done\n",
1257 obj_request);
1258 }
07741308
AE
1259}
1260
1261static bool obj_request_done_test(struct rbd_obj_request *obj_request)
1262{
632b88ca 1263 smp_mb();
07741308
AE
1264 return atomic_read(&obj_request->done) != 0;
1265}
1266
9969ebc5
AE
1267static void rbd_osd_trivial_callback(struct rbd_obj_request *obj_request,
1268 struct ceph_osd_op *op)
1269{
37206ee5 1270 dout("%s: obj %p\n", __func__, obj_request);
07741308 1271 obj_request_done_set(obj_request);
9969ebc5
AE
1272}
1273
bf0d5f50
AE
1274static void rbd_obj_request_complete(struct rbd_obj_request *obj_request)
1275{
37206ee5
AE
1276 dout("%s: obj %p cb %p\n", __func__, obj_request,
1277 obj_request->callback);
bf0d5f50
AE
1278 if (obj_request->callback)
1279 obj_request->callback(obj_request);
788e2df3
AE
1280 else
1281 complete_all(&obj_request->completion);
bf0d5f50
AE
1282}
1283
bf0d5f50
AE
1284static void rbd_osd_read_callback(struct rbd_obj_request *obj_request,
1285 struct ceph_osd_op *op)
1286{
1287 u64 xferred;
1288
1289 /*
1290 * We support a 64-bit length, but ultimately it has to be
1291 * passed to blk_end_request(), which takes an unsigned int.
1292 */
1293 xferred = le64_to_cpu(op->extent.length);
1294 rbd_assert(xferred < (u64) UINT_MAX);
37206ee5
AE
1295 dout("%s: obj %p result %d %llu/%llu\n", __func__, obj_request,
1296 obj_request->result, xferred, obj_request->length);
bf0d5f50
AE
1297 if (obj_request->result == (s32) -ENOENT) {
1298 zero_bio_chain(obj_request->bio_list, 0);
1299 obj_request->result = 0;
1300 } else if (xferred < obj_request->length && !obj_request->result) {
1301 zero_bio_chain(obj_request->bio_list, xferred);
1302 xferred = obj_request->length;
1303 }
1304 obj_request->xferred = xferred;
07741308 1305 obj_request_done_set(obj_request);
bf0d5f50
AE
1306}
1307
1308static void rbd_osd_write_callback(struct rbd_obj_request *obj_request,
1309 struct ceph_osd_op *op)
1310{
37206ee5 1311
bf0d5f50 1312 obj_request->xferred = le64_to_cpu(op->extent.length);
37206ee5
AE
1313 dout("%s: obj %p result %d %llu/%llu\n", __func__, obj_request,
1314 obj_request->result, obj_request->xferred, obj_request->length);
1315
1316 /* A short write really shouldn't occur. Warn if we see one */
1317
1318 if (obj_request->xferred != obj_request->length) {
1319 struct rbd_img_request *img_request = obj_request->img_request;
1320 struct rbd_device *rbd_dev;
1321
1322 rbd_dev = img_request ? img_request->rbd_dev : NULL;
1323 rbd_warn(rbd_dev, "wrote %llu want %llu\n",
1324 obj_request->xferred, obj_request->length);
1325 }
1326
07741308 1327 obj_request_done_set(obj_request);
bf0d5f50
AE
1328}
1329
fbfab539
AE
1330/*
1331 * For a simple stat call there's nothing to do. We'll do more if
1332 * this is part of a write sequence for a layered image.
1333 */
1334static void rbd_osd_stat_callback(struct rbd_obj_request *obj_request,
1335 struct ceph_osd_op *op)
1336{
37206ee5 1337 dout("%s: obj %p\n", __func__, obj_request);
fbfab539
AE
1338 obj_request_done_set(obj_request);
1339}
1340
bf0d5f50
AE
1341static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
1342 struct ceph_msg *msg)
1343{
1344 struct rbd_obj_request *obj_request = osd_req->r_priv;
1345 struct ceph_osd_reply_head *reply_head;
1346 struct ceph_osd_op *op;
1347 u32 num_ops;
1348 u16 opcode;
1349
37206ee5 1350 dout("%s: osd_req %p msg %p\n", __func__, osd_req, msg);
bf0d5f50
AE
1351 rbd_assert(osd_req == obj_request->osd_req);
1352 rbd_assert(!!obj_request->img_request ^
1353 (obj_request->which == BAD_WHICH));
1354
1355 obj_request->xferred = le32_to_cpu(msg->hdr.data_len);
1356 reply_head = msg->front.iov_base;
1357 obj_request->result = (s32) le32_to_cpu(reply_head->result);
1358 obj_request->version = le64_to_cpu(osd_req->r_reassert_version.version);
1359
1360 num_ops = le32_to_cpu(reply_head->num_ops);
1361 WARN_ON(num_ops != 1); /* For now */
1362
1363 op = &reply_head->ops[0];
1364 opcode = le16_to_cpu(op->op);
1365 switch (opcode) {
1366 case CEPH_OSD_OP_READ:
1367 rbd_osd_read_callback(obj_request, op);
1368 break;
1369 case CEPH_OSD_OP_WRITE:
1370 rbd_osd_write_callback(obj_request, op);
1371 break;
fbfab539
AE
1372 case CEPH_OSD_OP_STAT:
1373 rbd_osd_stat_callback(obj_request, op);
1374 break;
36be9a76 1375 case CEPH_OSD_OP_CALL:
b8d70035 1376 case CEPH_OSD_OP_NOTIFY_ACK:
9969ebc5
AE
1377 case CEPH_OSD_OP_WATCH:
1378 rbd_osd_trivial_callback(obj_request, op);
1379 break;
bf0d5f50
AE
1380 default:
1381 rbd_warn(NULL, "%s: unsupported op %hu\n",
1382 obj_request->object_name, (unsigned short) opcode);
1383 break;
1384 }
1385
07741308 1386 if (obj_request_done_test(obj_request))
bf0d5f50
AE
1387 rbd_obj_request_complete(obj_request);
1388}
1389
1390static struct ceph_osd_request *rbd_osd_req_create(
1391 struct rbd_device *rbd_dev,
1392 bool write_request,
1393 struct rbd_obj_request *obj_request,
1394 struct ceph_osd_req_op *op)
1395{
1396 struct rbd_img_request *img_request = obj_request->img_request;
1397 struct ceph_snap_context *snapc = NULL;
1398 struct ceph_osd_client *osdc;
1399 struct ceph_osd_request *osd_req;
1400 struct timespec now;
1401 struct timespec *mtime;
1402 u64 snap_id = CEPH_NOSNAP;
1403 u64 offset = obj_request->offset;
1404 u64 length = obj_request->length;
1405
1406 if (img_request) {
1407 rbd_assert(img_request->write_request == write_request);
1408 if (img_request->write_request)
1409 snapc = img_request->snapc;
1410 else
1411 snap_id = img_request->snap_id;
1412 }
1413
1414 /* Allocate and initialize the request, for the single op */
1415
1416 osdc = &rbd_dev->rbd_client->client->osdc;
1417 osd_req = ceph_osdc_alloc_request(osdc, snapc, 1, false, GFP_ATOMIC);
1418 if (!osd_req)
1419 return NULL; /* ENOMEM */
1420
1421 rbd_assert(obj_request_type_valid(obj_request->type));
1422 switch (obj_request->type) {
9969ebc5
AE
1423 case OBJ_REQUEST_NODATA:
1424 break; /* Nothing to do */
bf0d5f50
AE
1425 case OBJ_REQUEST_BIO:
1426 rbd_assert(obj_request->bio_list != NULL);
1427 osd_req->r_bio = obj_request->bio_list;
bf0d5f50 1428 break;
788e2df3
AE
1429 case OBJ_REQUEST_PAGES:
1430 osd_req->r_pages = obj_request->pages;
1431 osd_req->r_num_pages = obj_request->page_count;
1432 osd_req->r_page_alignment = offset & ~PAGE_MASK;
1433 break;
bf0d5f50
AE
1434 }
1435
1436 if (write_request) {
1437 osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK;
1438 now = CURRENT_TIME;
1439 mtime = &now;
1440 } else {
1441 osd_req->r_flags = CEPH_OSD_FLAG_READ;
1442 mtime = NULL; /* not needed for reads */
1443 offset = 0; /* These are not used... */
1444 length = 0; /* ...for osd read requests */
1445 }
1446
1447 osd_req->r_callback = rbd_osd_req_callback;
1448 osd_req->r_priv = obj_request;
1449
1450 osd_req->r_oid_len = strlen(obj_request->object_name);
1451 rbd_assert(osd_req->r_oid_len < sizeof (osd_req->r_oid));
1452 memcpy(osd_req->r_oid, obj_request->object_name, osd_req->r_oid_len);
1453
1454 osd_req->r_file_layout = rbd_dev->layout; /* struct */
1455
1456 /* osd_req will get its own reference to snapc (if non-null) */
1457
1458 ceph_osdc_build_request(osd_req, offset, length, 1, op,
1459 snapc, snap_id, mtime);
1460
1461 return osd_req;
1462}
1463
1464static void rbd_osd_req_destroy(struct ceph_osd_request *osd_req)
1465{
1466 ceph_osdc_put_request(osd_req);
1467}
1468
1469/* object_name is assumed to be a non-null pointer and NUL-terminated */
1470
1471static struct rbd_obj_request *rbd_obj_request_create(const char *object_name,
1472 u64 offset, u64 length,
1473 enum obj_request_type type)
1474{
1475 struct rbd_obj_request *obj_request;
1476 size_t size;
1477 char *name;
1478
1479 rbd_assert(obj_request_type_valid(type));
1480
1481 size = strlen(object_name) + 1;
1482 obj_request = kzalloc(sizeof (*obj_request) + size, GFP_KERNEL);
1483 if (!obj_request)
1484 return NULL;
1485
1486 name = (char *)(obj_request + 1);
1487 obj_request->object_name = memcpy(name, object_name, size);
1488 obj_request->offset = offset;
1489 obj_request->length = length;
1490 obj_request->which = BAD_WHICH;
1491 obj_request->type = type;
1492 INIT_LIST_HEAD(&obj_request->links);
07741308 1493 obj_request_done_init(obj_request);
788e2df3 1494 init_completion(&obj_request->completion);
bf0d5f50
AE
1495 kref_init(&obj_request->kref);
1496
37206ee5
AE
1497 dout("%s: \"%s\" %llu/%llu %d -> obj %p\n", __func__, object_name,
1498 offset, length, (int)type, obj_request);
1499
bf0d5f50
AE
1500 return obj_request;
1501}
1502
1503static void rbd_obj_request_destroy(struct kref *kref)
1504{
1505 struct rbd_obj_request *obj_request;
1506
1507 obj_request = container_of(kref, struct rbd_obj_request, kref);
1508
37206ee5
AE
1509 dout("%s: obj %p\n", __func__, obj_request);
1510
bf0d5f50
AE
1511 rbd_assert(obj_request->img_request == NULL);
1512 rbd_assert(obj_request->which == BAD_WHICH);
1513
1514 if (obj_request->osd_req)
1515 rbd_osd_req_destroy(obj_request->osd_req);
1516
1517 rbd_assert(obj_request_type_valid(obj_request->type));
1518 switch (obj_request->type) {
9969ebc5
AE
1519 case OBJ_REQUEST_NODATA:
1520 break; /* Nothing to do */
bf0d5f50
AE
1521 case OBJ_REQUEST_BIO:
1522 if (obj_request->bio_list)
1523 bio_chain_put(obj_request->bio_list);
1524 break;
788e2df3
AE
1525 case OBJ_REQUEST_PAGES:
1526 if (obj_request->pages)
1527 ceph_release_page_vector(obj_request->pages,
1528 obj_request->page_count);
1529 break;
bf0d5f50
AE
1530 }
1531
1532 kfree(obj_request);
1533}
1534
1535/*
1536 * Caller is responsible for filling in the list of object requests
1537 * that comprises the image request, and the Linux request pointer
1538 * (if there is one).
1539 */
cc344fa1
AE
1540static struct rbd_img_request *rbd_img_request_create(
1541 struct rbd_device *rbd_dev,
bf0d5f50
AE
1542 u64 offset, u64 length,
1543 bool write_request)
1544{
1545 struct rbd_img_request *img_request;
1546 struct ceph_snap_context *snapc = NULL;
1547
1548 img_request = kmalloc(sizeof (*img_request), GFP_ATOMIC);
1549 if (!img_request)
1550 return NULL;
1551
1552 if (write_request) {
1553 down_read(&rbd_dev->header_rwsem);
1554 snapc = ceph_get_snap_context(rbd_dev->header.snapc);
1555 up_read(&rbd_dev->header_rwsem);
1556 if (WARN_ON(!snapc)) {
1557 kfree(img_request);
1558 return NULL; /* Shouldn't happen */
1559 }
1560 }
1561
1562 img_request->rq = NULL;
1563 img_request->rbd_dev = rbd_dev;
1564 img_request->offset = offset;
1565 img_request->length = length;
1566 img_request->write_request = write_request;
1567 if (write_request)
1568 img_request->snapc = snapc;
1569 else
1570 img_request->snap_id = rbd_dev->spec->snap_id;
1571 spin_lock_init(&img_request->completion_lock);
1572 img_request->next_completion = 0;
1573 img_request->callback = NULL;
1574 img_request->obj_request_count = 0;
1575 INIT_LIST_HEAD(&img_request->obj_requests);
1576 kref_init(&img_request->kref);
1577
1578 rbd_img_request_get(img_request); /* Avoid a warning */
1579 rbd_img_request_put(img_request); /* TEMPORARY */
1580
37206ee5
AE
1581 dout("%s: rbd_dev %p %s %llu/%llu -> img %p\n", __func__, rbd_dev,
1582 write_request ? "write" : "read", offset, length,
1583 img_request);
1584
bf0d5f50
AE
1585 return img_request;
1586}
1587
1588static void rbd_img_request_destroy(struct kref *kref)
1589{
1590 struct rbd_img_request *img_request;
1591 struct rbd_obj_request *obj_request;
1592 struct rbd_obj_request *next_obj_request;
1593
1594 img_request = container_of(kref, struct rbd_img_request, kref);
1595
37206ee5
AE
1596 dout("%s: img %p\n", __func__, img_request);
1597
bf0d5f50
AE
1598 for_each_obj_request_safe(img_request, obj_request, next_obj_request)
1599 rbd_img_obj_request_del(img_request, obj_request);
25dcf954 1600 rbd_assert(img_request->obj_request_count == 0);
bf0d5f50
AE
1601
1602 if (img_request->write_request)
1603 ceph_put_snap_context(img_request->snapc);
1604
1605 kfree(img_request);
1606}
1607
1608static int rbd_img_request_fill_bio(struct rbd_img_request *img_request,
1609 struct bio *bio_list)
1610{
1611 struct rbd_device *rbd_dev = img_request->rbd_dev;
1612 struct rbd_obj_request *obj_request = NULL;
1613 struct rbd_obj_request *next_obj_request;
1614 unsigned int bio_offset;
1615 u64 image_offset;
1616 u64 resid;
1617 u16 opcode;
1618
37206ee5
AE
1619 dout("%s: img %p bio %p\n", __func__, img_request, bio_list);
1620
bf0d5f50
AE
1621 opcode = img_request->write_request ? CEPH_OSD_OP_WRITE
1622 : CEPH_OSD_OP_READ;
1623 bio_offset = 0;
1624 image_offset = img_request->offset;
1625 rbd_assert(image_offset == bio_list->bi_sector << SECTOR_SHIFT);
1626 resid = img_request->length;
4dda41d3 1627 rbd_assert(resid > 0);
bf0d5f50
AE
1628 while (resid) {
1629 const char *object_name;
1630 unsigned int clone_size;
1631 struct ceph_osd_req_op *op;
1632 u64 offset;
1633 u64 length;
1634
1635 object_name = rbd_segment_name(rbd_dev, image_offset);
1636 if (!object_name)
1637 goto out_unwind;
1638 offset = rbd_segment_offset(rbd_dev, image_offset);
1639 length = rbd_segment_length(rbd_dev, image_offset, resid);
1640 obj_request = rbd_obj_request_create(object_name,
1641 offset, length,
1642 OBJ_REQUEST_BIO);
1643 kfree(object_name); /* object request has its own copy */
1644 if (!obj_request)
1645 goto out_unwind;
1646
1647 rbd_assert(length <= (u64) UINT_MAX);
1648 clone_size = (unsigned int) length;
1649 obj_request->bio_list = bio_chain_clone_range(&bio_list,
1650 &bio_offset, clone_size,
1651 GFP_ATOMIC);
1652 if (!obj_request->bio_list)
1653 goto out_partial;
1654
1655 /*
1656 * Build up the op to use in building the osd
1657 * request. Note that the contents of the op are
1658 * copied by rbd_osd_req_create().
1659 */
1660 op = rbd_osd_req_op_create(opcode, offset, length);
1661 if (!op)
1662 goto out_partial;
1663 obj_request->osd_req = rbd_osd_req_create(rbd_dev,
1664 img_request->write_request,
1665 obj_request, op);
1666 rbd_osd_req_op_destroy(op);
1667 if (!obj_request->osd_req)
1668 goto out_partial;
1669 /* status and version are initially zero-filled */
1670
1671 rbd_img_obj_request_add(img_request, obj_request);
1672
1673 image_offset += length;
1674 resid -= length;
1675 }
1676
1677 return 0;
1678
1679out_partial:
1680 rbd_obj_request_put(obj_request);
1681out_unwind:
1682 for_each_obj_request_safe(img_request, obj_request, next_obj_request)
1683 rbd_obj_request_put(obj_request);
1684
1685 return -ENOMEM;
1686}
1687
1688static void rbd_img_obj_callback(struct rbd_obj_request *obj_request)
1689{
1690 struct rbd_img_request *img_request;
1691 u32 which = obj_request->which;
1692 bool more = true;
1693
1694 img_request = obj_request->img_request;
4dda41d3 1695
37206ee5 1696 dout("%s: img %p obj %p\n", __func__, img_request, obj_request);
bf0d5f50
AE
1697 rbd_assert(img_request != NULL);
1698 rbd_assert(img_request->rq != NULL);
4dda41d3 1699 rbd_assert(img_request->obj_request_count > 0);
bf0d5f50
AE
1700 rbd_assert(which != BAD_WHICH);
1701 rbd_assert(which < img_request->obj_request_count);
1702 rbd_assert(which >= img_request->next_completion);
1703
1704 spin_lock_irq(&img_request->completion_lock);
1705 if (which != img_request->next_completion)
1706 goto out;
1707
1708 for_each_obj_request_from(img_request, obj_request) {
1709 unsigned int xferred;
1710 int result;
1711
1712 rbd_assert(more);
1713 rbd_assert(which < img_request->obj_request_count);
1714
07741308 1715 if (!obj_request_done_test(obj_request))
bf0d5f50
AE
1716 break;
1717
1718 rbd_assert(obj_request->xferred <= (u64) UINT_MAX);
1719 xferred = (unsigned int) obj_request->xferred;
1720 result = (int) obj_request->result;
1721 if (result)
1722 rbd_warn(NULL, "obj_request %s result %d xferred %u\n",
1723 img_request->write_request ? "write" : "read",
1724 result, xferred);
1725
1726 more = blk_end_request(img_request->rq, result, xferred);
1727 which++;
1728 }
1729 rbd_assert(more ^ (which == img_request->obj_request_count));
1730 img_request->next_completion = which;
1731out:
1732 spin_unlock_irq(&img_request->completion_lock);
1733
1734 if (!more)
1735 rbd_img_request_complete(img_request);
1736}
1737
1738static int rbd_img_request_submit(struct rbd_img_request *img_request)
1739{
1740 struct rbd_device *rbd_dev = img_request->rbd_dev;
1741 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
1742 struct rbd_obj_request *obj_request;
1743
37206ee5 1744 dout("%s: img %p\n", __func__, img_request);
bf0d5f50
AE
1745 for_each_obj_request(img_request, obj_request) {
1746 int ret;
1747
1748 obj_request->callback = rbd_img_obj_callback;
1749 ret = rbd_obj_request_submit(osdc, obj_request);
1750 if (ret)
1751 return ret;
1752 /*
1753 * The image request has its own reference to each
1754 * of its object requests, so we can safely drop the
1755 * initial one here.
1756 */
1757 rbd_obj_request_put(obj_request);
1758 }
1759
1760 return 0;
1761}
1762
cf81b60e 1763static int rbd_obj_notify_ack(struct rbd_device *rbd_dev,
b8d70035
AE
1764 u64 ver, u64 notify_id)
1765{
1766 struct rbd_obj_request *obj_request;
1767 struct ceph_osd_req_op *op;
1768 struct ceph_osd_client *osdc;
1769 int ret;
1770
1771 obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0,
1772 OBJ_REQUEST_NODATA);
1773 if (!obj_request)
1774 return -ENOMEM;
1775
1776 ret = -ENOMEM;
1777 op = rbd_osd_req_op_create(CEPH_OSD_OP_NOTIFY_ACK, notify_id, ver);
1778 if (!op)
1779 goto out;
1780 obj_request->osd_req = rbd_osd_req_create(rbd_dev, false,
1781 obj_request, op);
1782 rbd_osd_req_op_destroy(op);
1783 if (!obj_request->osd_req)
1784 goto out;
1785
1786 osdc = &rbd_dev->rbd_client->client->osdc;
cf81b60e 1787 obj_request->callback = rbd_obj_request_put;
b8d70035 1788 ret = rbd_obj_request_submit(osdc, obj_request);
b8d70035 1789out:
cf81b60e
AE
1790 if (ret)
1791 rbd_obj_request_put(obj_request);
b8d70035
AE
1792
1793 return ret;
1794}
1795
1796static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
1797{
1798 struct rbd_device *rbd_dev = (struct rbd_device *)data;
1799 u64 hver;
1800 int rc;
1801
1802 if (!rbd_dev)
1803 return;
1804
37206ee5 1805 dout("%s: \"%s\" notify_id %llu opcode %u\n", __func__,
b8d70035
AE
1806 rbd_dev->header_name, (unsigned long long) notify_id,
1807 (unsigned int) opcode);
1808 rc = rbd_dev_refresh(rbd_dev, &hver);
1809 if (rc)
1810 rbd_warn(rbd_dev, "got notification but failed to "
1811 " update snaps: %d\n", rc);
1812
cf81b60e 1813 rbd_obj_notify_ack(rbd_dev, hver, notify_id);
b8d70035
AE
1814}
1815
9969ebc5
AE
1816/*
1817 * Request sync osd watch/unwatch. The value of "start" determines
1818 * whether a watch request is being initiated or torn down.
1819 */
1820static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, int start)
1821{
1822 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
1823 struct rbd_obj_request *obj_request;
1824 struct ceph_osd_req_op *op;
1825 int ret;
1826
1827 rbd_assert(start ^ !!rbd_dev->watch_event);
1828 rbd_assert(start ^ !!rbd_dev->watch_request);
1829
1830 if (start) {
3c663bbd 1831 ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev,
9969ebc5
AE
1832 &rbd_dev->watch_event);
1833 if (ret < 0)
1834 return ret;
8eb87565 1835 rbd_assert(rbd_dev->watch_event != NULL);
9969ebc5
AE
1836 }
1837
1838 ret = -ENOMEM;
1839 obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0,
1840 OBJ_REQUEST_NODATA);
1841 if (!obj_request)
1842 goto out_cancel;
1843
1844 op = rbd_osd_req_op_create(CEPH_OSD_OP_WATCH,
1845 rbd_dev->watch_event->cookie,
1846 rbd_dev->header.obj_version, start);
1847 if (!op)
1848 goto out_cancel;
1849 obj_request->osd_req = rbd_osd_req_create(rbd_dev, true,
1850 obj_request, op);
1851 rbd_osd_req_op_destroy(op);
1852 if (!obj_request->osd_req)
1853 goto out_cancel;
1854
8eb87565 1855 if (start)
975241af 1856 ceph_osdc_set_request_linger(osdc, obj_request->osd_req);
8eb87565 1857 else
6977c3f9 1858 ceph_osdc_unregister_linger_request(osdc,
975241af 1859 rbd_dev->watch_request->osd_req);
9969ebc5
AE
1860 ret = rbd_obj_request_submit(osdc, obj_request);
1861 if (ret)
1862 goto out_cancel;
1863 ret = rbd_obj_request_wait(obj_request);
1864 if (ret)
1865 goto out_cancel;
9969ebc5
AE
1866 ret = obj_request->result;
1867 if (ret)
1868 goto out_cancel;
1869
8eb87565
AE
1870 /*
1871 * A watch request is set to linger, so the underlying osd
1872 * request won't go away until we unregister it. We retain
1873 * a pointer to the object request during that time (in
1874 * rbd_dev->watch_request), so we'll keep a reference to
1875 * it. We'll drop that reference (below) after we've
1876 * unregistered it.
1877 */
1878 if (start) {
1879 rbd_dev->watch_request = obj_request;
1880
1881 return 0;
1882 }
1883
1884 /* We have successfully torn down the watch request */
1885
1886 rbd_obj_request_put(rbd_dev->watch_request);
1887 rbd_dev->watch_request = NULL;
9969ebc5
AE
1888out_cancel:
1889 /* Cancel the event if we're tearing down, or on error */
1890 ceph_osdc_cancel_event(rbd_dev->watch_event);
1891 rbd_dev->watch_event = NULL;
9969ebc5
AE
1892 if (obj_request)
1893 rbd_obj_request_put(obj_request);
1894
1895 return ret;
1896}
1897
36be9a76
AE
1898/*
1899 * Synchronous osd object method call
1900 */
1901static int rbd_obj_method_sync(struct rbd_device *rbd_dev,
1902 const char *object_name,
1903 const char *class_name,
1904 const char *method_name,
1905 const char *outbound,
1906 size_t outbound_size,
1907 char *inbound,
1908 size_t inbound_size,
1909 u64 *version)
1910{
1911 struct rbd_obj_request *obj_request;
1912 struct ceph_osd_client *osdc;
1913 struct ceph_osd_req_op *op;
1914 struct page **pages;
1915 u32 page_count;
1916 int ret;
1917
1918 /*
1919 * Method calls are ultimately read operations but they
1920 * don't involve object data (so no offset or length).
1921 * The result should placed into the inbound buffer
1922 * provided. They also supply outbound data--parameters for
1923 * the object method. Currently if this is present it will
1924 * be a snapshot id.
1925 */
1926 page_count = (u32) calc_pages_for(0, inbound_size);
1927 pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
1928 if (IS_ERR(pages))
1929 return PTR_ERR(pages);
1930
1931 ret = -ENOMEM;
1932 obj_request = rbd_obj_request_create(object_name, 0, 0,
1933 OBJ_REQUEST_PAGES);
1934 if (!obj_request)
1935 goto out;
1936
1937 obj_request->pages = pages;
1938 obj_request->page_count = page_count;
1939
1940 op = rbd_osd_req_op_create(CEPH_OSD_OP_CALL, class_name,
1941 method_name, outbound, outbound_size);
1942 if (!op)
1943 goto out;
1944 obj_request->osd_req = rbd_osd_req_create(rbd_dev, false,
1945 obj_request, op);
1946 rbd_osd_req_op_destroy(op);
1947 if (!obj_request->osd_req)
1948 goto out;
1949
1950 osdc = &rbd_dev->rbd_client->client->osdc;
1951 ret = rbd_obj_request_submit(osdc, obj_request);
1952 if (ret)
1953 goto out;
1954 ret = rbd_obj_request_wait(obj_request);
1955 if (ret)
1956 goto out;
1957
1958 ret = obj_request->result;
1959 if (ret < 0)
1960 goto out;
23ed6e13 1961 ret = 0;
903bb32e 1962 ceph_copy_from_page_vector(pages, inbound, 0, obj_request->xferred);
36be9a76
AE
1963 if (version)
1964 *version = obj_request->version;
1965out:
1966 if (obj_request)
1967 rbd_obj_request_put(obj_request);
1968 else
1969 ceph_release_page_vector(pages, page_count);
1970
1971 return ret;
1972}
1973
bf0d5f50 1974static void rbd_request_fn(struct request_queue *q)
cc344fa1 1975 __releases(q->queue_lock) __acquires(q->queue_lock)
bf0d5f50
AE
1976{
1977 struct rbd_device *rbd_dev = q->queuedata;
1978 bool read_only = rbd_dev->mapping.read_only;
1979 struct request *rq;
1980 int result;
1981
1982 while ((rq = blk_fetch_request(q))) {
1983 bool write_request = rq_data_dir(rq) == WRITE;
1984 struct rbd_img_request *img_request;
1985 u64 offset;
1986 u64 length;
1987
1988 /* Ignore any non-FS requests that filter through. */
1989
1990 if (rq->cmd_type != REQ_TYPE_FS) {
4dda41d3
AE
1991 dout("%s: non-fs request type %d\n", __func__,
1992 (int) rq->cmd_type);
1993 __blk_end_request_all(rq, 0);
1994 continue;
1995 }
1996
1997 /* Ignore/skip any zero-length requests */
1998
1999 offset = (u64) blk_rq_pos(rq) << SECTOR_SHIFT;
2000 length = (u64) blk_rq_bytes(rq);
2001
2002 if (!length) {
2003 dout("%s: zero-length request\n", __func__);
bf0d5f50
AE
2004 __blk_end_request_all(rq, 0);
2005 continue;
2006 }
2007
2008 spin_unlock_irq(q->queue_lock);
2009
2010 /* Disallow writes to a read-only device */
2011
2012 if (write_request) {
2013 result = -EROFS;
2014 if (read_only)
2015 goto end_request;
2016 rbd_assert(rbd_dev->spec->snap_id == CEPH_NOSNAP);
2017 }
2018
6d292906
AE
2019 /*
2020 * Quit early if the mapped snapshot no longer
2021 * exists. It's still possible the snapshot will
2022 * have disappeared by the time our request arrives
2023 * at the osd, but there's no sense in sending it if
2024 * we already know.
2025 */
2026 if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags)) {
bf0d5f50
AE
2027 dout("request for non-existent snapshot");
2028 rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP);
2029 result = -ENXIO;
2030 goto end_request;
2031 }
2032
bf0d5f50
AE
2033 result = -EINVAL;
2034 if (WARN_ON(offset && length > U64_MAX - offset + 1))
2035 goto end_request; /* Shouldn't happen */
2036
2037 result = -ENOMEM;
2038 img_request = rbd_img_request_create(rbd_dev, offset, length,
2039 write_request);
2040 if (!img_request)
2041 goto end_request;
2042
2043 img_request->rq = rq;
2044
2045 result = rbd_img_request_fill_bio(img_request, rq->bio);
2046 if (!result)
2047 result = rbd_img_request_submit(img_request);
2048 if (result)
2049 rbd_img_request_put(img_request);
2050end_request:
2051 spin_lock_irq(q->queue_lock);
2052 if (result < 0) {
2053 rbd_warn(rbd_dev, "obj_request %s result %d\n",
2054 write_request ? "write" : "read", result);
2055 __blk_end_request_all(rq, result);
2056 }
2057 }
2058}
2059
602adf40
YS
2060/*
2061 * a queue callback. Makes sure that we don't create a bio that spans across
2062 * multiple osd objects. One exception would be with a single page bios,
f7760dad 2063 * which we handle later at bio_chain_clone_range()
602adf40
YS
2064 */
2065static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd,
2066 struct bio_vec *bvec)
2067{
2068 struct rbd_device *rbd_dev = q->queuedata;
e5cfeed2
AE
2069 sector_t sector_offset;
2070 sector_t sectors_per_obj;
2071 sector_t obj_sector_offset;
2072 int ret;
2073
2074 /*
2075 * Find how far into its rbd object the partition-relative
2076 * bio start sector is to offset relative to the enclosing
2077 * device.
2078 */
2079 sector_offset = get_start_sect(bmd->bi_bdev) + bmd->bi_sector;
2080 sectors_per_obj = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT);
2081 obj_sector_offset = sector_offset & (sectors_per_obj - 1);
2082
2083 /*
2084 * Compute the number of bytes from that offset to the end
2085 * of the object. Account for what's already used by the bio.
2086 */
2087 ret = (int) (sectors_per_obj - obj_sector_offset) << SECTOR_SHIFT;
2088 if (ret > bmd->bi_size)
2089 ret -= bmd->bi_size;
2090 else
2091 ret = 0;
2092
2093 /*
2094 * Don't send back more than was asked for. And if the bio
2095 * was empty, let the whole thing through because: "Note
2096 * that a block device *must* allow a single page to be
2097 * added to an empty bio."
2098 */
2099 rbd_assert(bvec->bv_len <= PAGE_SIZE);
2100 if (ret > (int) bvec->bv_len || !bmd->bi_size)
2101 ret = (int) bvec->bv_len;
2102
2103 return ret;
602adf40
YS
2104}
2105
2106static void rbd_free_disk(struct rbd_device *rbd_dev)
2107{
2108 struct gendisk *disk = rbd_dev->disk;
2109
2110 if (!disk)
2111 return;
2112
602adf40
YS
2113 if (disk->flags & GENHD_FL_UP)
2114 del_gendisk(disk);
2115 if (disk->queue)
2116 blk_cleanup_queue(disk->queue);
2117 put_disk(disk);
2118}
2119
788e2df3
AE
2120static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
2121 const char *object_name,
2122 u64 offset, u64 length,
2123 char *buf, u64 *version)
2124
2125{
2126 struct ceph_osd_req_op *op;
2127 struct rbd_obj_request *obj_request;
2128 struct ceph_osd_client *osdc;
2129 struct page **pages = NULL;
2130 u32 page_count;
1ceae7ef 2131 size_t size;
788e2df3
AE
2132 int ret;
2133
2134 page_count = (u32) calc_pages_for(offset, length);
2135 pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
2136 if (IS_ERR(pages))
2137 ret = PTR_ERR(pages);
2138
2139 ret = -ENOMEM;
2140 obj_request = rbd_obj_request_create(object_name, offset, length,
36be9a76 2141 OBJ_REQUEST_PAGES);
788e2df3
AE
2142 if (!obj_request)
2143 goto out;
2144
2145 obj_request->pages = pages;
2146 obj_request->page_count = page_count;
2147
2148 op = rbd_osd_req_op_create(CEPH_OSD_OP_READ, offset, length);
2149 if (!op)
2150 goto out;
2151 obj_request->osd_req = rbd_osd_req_create(rbd_dev, false,
2152 obj_request, op);
2153 rbd_osd_req_op_destroy(op);
2154 if (!obj_request->osd_req)
2155 goto out;
2156
2157 osdc = &rbd_dev->rbd_client->client->osdc;
2158 ret = rbd_obj_request_submit(osdc, obj_request);
2159 if (ret)
2160 goto out;
2161 ret = rbd_obj_request_wait(obj_request);
2162 if (ret)
2163 goto out;
2164
2165 ret = obj_request->result;
2166 if (ret < 0)
2167 goto out;
1ceae7ef
AE
2168
2169 rbd_assert(obj_request->xferred <= (u64) SIZE_MAX);
2170 size = (size_t) obj_request->xferred;
903bb32e 2171 ceph_copy_from_page_vector(pages, buf, 0, size);
23ed6e13
AE
2172 rbd_assert(size <= (size_t) INT_MAX);
2173 ret = (int) size;
788e2df3
AE
2174 if (version)
2175 *version = obj_request->version;
2176out:
2177 if (obj_request)
2178 rbd_obj_request_put(obj_request);
2179 else
2180 ceph_release_page_vector(pages, page_count);
2181
2182 return ret;
2183}
2184
602adf40 2185/*
4156d998
AE
2186 * Read the complete header for the given rbd device.
2187 *
2188 * Returns a pointer to a dynamically-allocated buffer containing
2189 * the complete and validated header. Caller can pass the address
2190 * of a variable that will be filled in with the version of the
2191 * header object at the time it was read.
2192 *
2193 * Returns a pointer-coded errno if a failure occurs.
602adf40 2194 */
4156d998
AE
2195static struct rbd_image_header_ondisk *
2196rbd_dev_v1_header_read(struct rbd_device *rbd_dev, u64 *version)
602adf40 2197{
4156d998 2198 struct rbd_image_header_ondisk *ondisk = NULL;
50f7c4c9 2199 u32 snap_count = 0;
4156d998
AE
2200 u64 names_size = 0;
2201 u32 want_count;
2202 int ret;
602adf40 2203
00f1f36f 2204 /*
4156d998
AE
2205 * The complete header will include an array of its 64-bit
2206 * snapshot ids, followed by the names of those snapshots as
2207 * a contiguous block of NUL-terminated strings. Note that
2208 * the number of snapshots could change by the time we read
2209 * it in, in which case we re-read it.
00f1f36f 2210 */
4156d998
AE
2211 do {
2212 size_t size;
2213
2214 kfree(ondisk);
2215
2216 size = sizeof (*ondisk);
2217 size += snap_count * sizeof (struct rbd_image_snap_ondisk);
2218 size += names_size;
2219 ondisk = kmalloc(size, GFP_KERNEL);
2220 if (!ondisk)
2221 return ERR_PTR(-ENOMEM);
2222
788e2df3 2223 ret = rbd_obj_read_sync(rbd_dev, rbd_dev->header_name,
4156d998
AE
2224 0, size,
2225 (char *) ondisk, version);
4156d998
AE
2226 if (ret < 0)
2227 goto out_err;
2228 if (WARN_ON((size_t) ret < size)) {
2229 ret = -ENXIO;
06ecc6cb
AE
2230 rbd_warn(rbd_dev, "short header read (want %zd got %d)",
2231 size, ret);
4156d998
AE
2232 goto out_err;
2233 }
2234 if (!rbd_dev_ondisk_valid(ondisk)) {
2235 ret = -ENXIO;
06ecc6cb 2236 rbd_warn(rbd_dev, "invalid header");
4156d998 2237 goto out_err;
81e759fb 2238 }
602adf40 2239
4156d998
AE
2240 names_size = le64_to_cpu(ondisk->snap_names_len);
2241 want_count = snap_count;
2242 snap_count = le32_to_cpu(ondisk->snap_count);
2243 } while (snap_count != want_count);
00f1f36f 2244
4156d998 2245 return ondisk;
00f1f36f 2246
4156d998
AE
2247out_err:
2248 kfree(ondisk);
2249
2250 return ERR_PTR(ret);
2251}
2252
2253/*
2254 * reload the ondisk the header
2255 */
2256static int rbd_read_header(struct rbd_device *rbd_dev,
2257 struct rbd_image_header *header)
2258{
2259 struct rbd_image_header_ondisk *ondisk;
2260 u64 ver = 0;
2261 int ret;
602adf40 2262
4156d998
AE
2263 ondisk = rbd_dev_v1_header_read(rbd_dev, &ver);
2264 if (IS_ERR(ondisk))
2265 return PTR_ERR(ondisk);
2266 ret = rbd_header_from_disk(header, ondisk);
2267 if (ret >= 0)
2268 header->obj_version = ver;
2269 kfree(ondisk);
2270
2271 return ret;
602adf40
YS
2272}
2273
41f38c2b 2274static void rbd_remove_all_snaps(struct rbd_device *rbd_dev)
dfc5606d
YS
2275{
2276 struct rbd_snap *snap;
a0593290 2277 struct rbd_snap *next;
dfc5606d 2278
a0593290 2279 list_for_each_entry_safe(snap, next, &rbd_dev->snaps, node)
41f38c2b 2280 rbd_remove_snap_dev(snap);
dfc5606d
YS
2281}
2282
9478554a
AE
2283static void rbd_update_mapping_size(struct rbd_device *rbd_dev)
2284{
2285 sector_t size;
2286
0d7dbfce 2287 if (rbd_dev->spec->snap_id != CEPH_NOSNAP)
9478554a
AE
2288 return;
2289
2290 size = (sector_t) rbd_dev->header.image_size / SECTOR_SIZE;
2291 dout("setting size to %llu sectors", (unsigned long long) size);
2292 rbd_dev->mapping.size = (u64) size;
2293 set_capacity(rbd_dev->disk, size);
2294}
2295
602adf40
YS
2296/*
2297 * only read the first part of the ondisk header, without the snaps info
2298 */
117973fb 2299static int rbd_dev_v1_refresh(struct rbd_device *rbd_dev, u64 *hver)
602adf40
YS
2300{
2301 int ret;
2302 struct rbd_image_header h;
602adf40
YS
2303
2304 ret = rbd_read_header(rbd_dev, &h);
2305 if (ret < 0)
2306 return ret;
2307
a51aa0c0
JD
2308 down_write(&rbd_dev->header_rwsem);
2309
9478554a
AE
2310 /* Update image size, and check for resize of mapped image */
2311 rbd_dev->header.image_size = h.image_size;
2312 rbd_update_mapping_size(rbd_dev);
9db4b3e3 2313
849b4260 2314 /* rbd_dev->header.object_prefix shouldn't change */
602adf40 2315 kfree(rbd_dev->header.snap_sizes);
849b4260 2316 kfree(rbd_dev->header.snap_names);
d1d25646
JD
2317 /* osd requests may still refer to snapc */
2318 ceph_put_snap_context(rbd_dev->header.snapc);
602adf40 2319
b813623a
AE
2320 if (hver)
2321 *hver = h.obj_version;
a71b891b 2322 rbd_dev->header.obj_version = h.obj_version;
93a24e08 2323 rbd_dev->header.image_size = h.image_size;
602adf40
YS
2324 rbd_dev->header.snapc = h.snapc;
2325 rbd_dev->header.snap_names = h.snap_names;
2326 rbd_dev->header.snap_sizes = h.snap_sizes;
849b4260
AE
2327 /* Free the extra copy of the object prefix */
2328 WARN_ON(strcmp(rbd_dev->header.object_prefix, h.object_prefix));
2329 kfree(h.object_prefix);
2330
304f6808
AE
2331 ret = rbd_dev_snaps_update(rbd_dev);
2332 if (!ret)
2333 ret = rbd_dev_snaps_register(rbd_dev);
dfc5606d 2334
c666601a 2335 up_write(&rbd_dev->header_rwsem);
602adf40 2336
dfc5606d 2337 return ret;
602adf40
YS
2338}
2339
117973fb 2340static int rbd_dev_refresh(struct rbd_device *rbd_dev, u64 *hver)
1fe5e993
AE
2341{
2342 int ret;
2343
117973fb 2344 rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
1fe5e993 2345 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
117973fb
AE
2346 if (rbd_dev->image_format == 1)
2347 ret = rbd_dev_v1_refresh(rbd_dev, hver);
2348 else
2349 ret = rbd_dev_v2_refresh(rbd_dev, hver);
1fe5e993
AE
2350 mutex_unlock(&ctl_mutex);
2351
2352 return ret;
2353}
2354
602adf40
YS
2355static int rbd_init_disk(struct rbd_device *rbd_dev)
2356{
2357 struct gendisk *disk;
2358 struct request_queue *q;
593a9e7b 2359 u64 segment_size;
602adf40 2360
602adf40 2361 /* create gendisk info */
602adf40
YS
2362 disk = alloc_disk(RBD_MINORS_PER_MAJOR);
2363 if (!disk)
1fcdb8aa 2364 return -ENOMEM;
602adf40 2365
f0f8cef5 2366 snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d",
de71a297 2367 rbd_dev->dev_id);
602adf40
YS
2368 disk->major = rbd_dev->major;
2369 disk->first_minor = 0;
2370 disk->fops = &rbd_bd_ops;
2371 disk->private_data = rbd_dev;
2372
bf0d5f50 2373 q = blk_init_queue(rbd_request_fn, &rbd_dev->lock);
602adf40
YS
2374 if (!q)
2375 goto out_disk;
029bcbd8 2376
593a9e7b
AE
2377 /* We use the default size, but let's be explicit about it. */
2378 blk_queue_physical_block_size(q, SECTOR_SIZE);
2379
029bcbd8 2380 /* set io sizes to object size */
593a9e7b
AE
2381 segment_size = rbd_obj_bytes(&rbd_dev->header);
2382 blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
2383 blk_queue_max_segment_size(q, segment_size);
2384 blk_queue_io_min(q, segment_size);
2385 blk_queue_io_opt(q, segment_size);
029bcbd8 2386
602adf40
YS
2387 blk_queue_merge_bvec(q, rbd_merge_bvec);
2388 disk->queue = q;
2389
2390 q->queuedata = rbd_dev;
2391
2392 rbd_dev->disk = disk;
602adf40 2393
12f02944
AE
2394 set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE);
2395
602adf40 2396 return 0;
602adf40
YS
2397out_disk:
2398 put_disk(disk);
1fcdb8aa
AE
2399
2400 return -ENOMEM;
602adf40
YS
2401}
2402
dfc5606d
YS
2403/*
2404 sysfs
2405*/
2406
593a9e7b
AE
2407static struct rbd_device *dev_to_rbd_dev(struct device *dev)
2408{
2409 return container_of(dev, struct rbd_device, dev);
2410}
2411
dfc5606d
YS
2412static ssize_t rbd_size_show(struct device *dev,
2413 struct device_attribute *attr, char *buf)
2414{
593a9e7b 2415 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
a51aa0c0
JD
2416 sector_t size;
2417
2418 down_read(&rbd_dev->header_rwsem);
2419 size = get_capacity(rbd_dev->disk);
2420 up_read(&rbd_dev->header_rwsem);
dfc5606d 2421
a51aa0c0 2422 return sprintf(buf, "%llu\n", (unsigned long long) size * SECTOR_SIZE);
dfc5606d
YS
2423}
2424
34b13184
AE
2425/*
2426 * Note this shows the features for whatever's mapped, which is not
2427 * necessarily the base image.
2428 */
2429static ssize_t rbd_features_show(struct device *dev,
2430 struct device_attribute *attr, char *buf)
2431{
2432 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
2433
2434 return sprintf(buf, "0x%016llx\n",
2435 (unsigned long long) rbd_dev->mapping.features);
2436}
2437
dfc5606d
YS
2438static ssize_t rbd_major_show(struct device *dev,
2439 struct device_attribute *attr, char *buf)
2440{
593a9e7b 2441 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
602adf40 2442
dfc5606d
YS
2443 return sprintf(buf, "%d\n", rbd_dev->major);
2444}
2445
2446static ssize_t rbd_client_id_show(struct device *dev,
2447 struct device_attribute *attr, char *buf)
602adf40 2448{
593a9e7b 2449 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
dfc5606d 2450
1dbb4399
AE
2451 return sprintf(buf, "client%lld\n",
2452 ceph_client_id(rbd_dev->rbd_client->client));
602adf40
YS
2453}
2454
dfc5606d
YS
2455static ssize_t rbd_pool_show(struct device *dev,
2456 struct device_attribute *attr, char *buf)
602adf40 2457{
593a9e7b 2458 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
dfc5606d 2459
0d7dbfce 2460 return sprintf(buf, "%s\n", rbd_dev->spec->pool_name);
dfc5606d
YS
2461}
2462
9bb2f334
AE
2463static ssize_t rbd_pool_id_show(struct device *dev,
2464 struct device_attribute *attr, char *buf)
2465{
2466 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
2467
0d7dbfce
AE
2468 return sprintf(buf, "%llu\n",
2469 (unsigned long long) rbd_dev->spec->pool_id);
9bb2f334
AE
2470}
2471
dfc5606d
YS
2472static ssize_t rbd_name_show(struct device *dev,
2473 struct device_attribute *attr, char *buf)
2474{
593a9e7b 2475 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
dfc5606d 2476
a92ffdf8
AE
2477 if (rbd_dev->spec->image_name)
2478 return sprintf(buf, "%s\n", rbd_dev->spec->image_name);
2479
2480 return sprintf(buf, "(unknown)\n");
dfc5606d
YS
2481}
2482
589d30e0
AE
2483static ssize_t rbd_image_id_show(struct device *dev,
2484 struct device_attribute *attr, char *buf)
2485{
2486 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
2487
0d7dbfce 2488 return sprintf(buf, "%s\n", rbd_dev->spec->image_id);
589d30e0
AE
2489}
2490
34b13184
AE
2491/*
2492 * Shows the name of the currently-mapped snapshot (or
2493 * RBD_SNAP_HEAD_NAME for the base image).
2494 */
dfc5606d
YS
2495static ssize_t rbd_snap_show(struct device *dev,
2496 struct device_attribute *attr,
2497 char *buf)
2498{
593a9e7b 2499 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
dfc5606d 2500
0d7dbfce 2501 return sprintf(buf, "%s\n", rbd_dev->spec->snap_name);
dfc5606d
YS
2502}
2503
86b00e0d
AE
2504/*
2505 * For an rbd v2 image, shows the pool id, image id, and snapshot id
2506 * for the parent image. If there is no parent, simply shows
2507 * "(no parent image)".
2508 */
2509static ssize_t rbd_parent_show(struct device *dev,
2510 struct device_attribute *attr,
2511 char *buf)
2512{
2513 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
2514 struct rbd_spec *spec = rbd_dev->parent_spec;
2515 int count;
2516 char *bufp = buf;
2517
2518 if (!spec)
2519 return sprintf(buf, "(no parent image)\n");
2520
2521 count = sprintf(bufp, "pool_id %llu\npool_name %s\n",
2522 (unsigned long long) spec->pool_id, spec->pool_name);
2523 if (count < 0)
2524 return count;
2525 bufp += count;
2526
2527 count = sprintf(bufp, "image_id %s\nimage_name %s\n", spec->image_id,
2528 spec->image_name ? spec->image_name : "(unknown)");
2529 if (count < 0)
2530 return count;
2531 bufp += count;
2532
2533 count = sprintf(bufp, "snap_id %llu\nsnap_name %s\n",
2534 (unsigned long long) spec->snap_id, spec->snap_name);
2535 if (count < 0)
2536 return count;
2537 bufp += count;
2538
2539 count = sprintf(bufp, "overlap %llu\n", rbd_dev->parent_overlap);
2540 if (count < 0)
2541 return count;
2542 bufp += count;
2543
2544 return (ssize_t) (bufp - buf);
2545}
2546
dfc5606d
YS
2547static ssize_t rbd_image_refresh(struct device *dev,
2548 struct device_attribute *attr,
2549 const char *buf,
2550 size_t size)
2551{
593a9e7b 2552 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
b813623a 2553 int ret;
602adf40 2554
117973fb 2555 ret = rbd_dev_refresh(rbd_dev, NULL);
b813623a
AE
2556
2557 return ret < 0 ? ret : size;
dfc5606d 2558}
602adf40 2559
dfc5606d 2560static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL);
34b13184 2561static DEVICE_ATTR(features, S_IRUGO, rbd_features_show, NULL);
dfc5606d
YS
2562static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL);
2563static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL);
2564static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL);
9bb2f334 2565static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL);
dfc5606d 2566static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
589d30e0 2567static DEVICE_ATTR(image_id, S_IRUGO, rbd_image_id_show, NULL);
dfc5606d
YS
2568static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
2569static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
86b00e0d 2570static DEVICE_ATTR(parent, S_IRUGO, rbd_parent_show, NULL);
dfc5606d
YS
2571
2572static struct attribute *rbd_attrs[] = {
2573 &dev_attr_size.attr,
34b13184 2574 &dev_attr_features.attr,
dfc5606d
YS
2575 &dev_attr_major.attr,
2576 &dev_attr_client_id.attr,
2577 &dev_attr_pool.attr,
9bb2f334 2578 &dev_attr_pool_id.attr,
dfc5606d 2579 &dev_attr_name.attr,
589d30e0 2580 &dev_attr_image_id.attr,
dfc5606d 2581 &dev_attr_current_snap.attr,
86b00e0d 2582 &dev_attr_parent.attr,
dfc5606d 2583 &dev_attr_refresh.attr,
dfc5606d
YS
2584 NULL
2585};
2586
2587static struct attribute_group rbd_attr_group = {
2588 .attrs = rbd_attrs,
2589};
2590
2591static const struct attribute_group *rbd_attr_groups[] = {
2592 &rbd_attr_group,
2593 NULL
2594};
2595
2596static void rbd_sysfs_dev_release(struct device *dev)
2597{
2598}
2599
2600static struct device_type rbd_device_type = {
2601 .name = "rbd",
2602 .groups = rbd_attr_groups,
2603 .release = rbd_sysfs_dev_release,
2604};
2605
2606
2607/*
2608 sysfs - snapshots
2609*/
2610
2611static ssize_t rbd_snap_size_show(struct device *dev,
2612 struct device_attribute *attr,
2613 char *buf)
2614{
2615 struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
2616
3591538f 2617 return sprintf(buf, "%llu\n", (unsigned long long)snap->size);
dfc5606d
YS
2618}
2619
2620static ssize_t rbd_snap_id_show(struct device *dev,
2621 struct device_attribute *attr,
2622 char *buf)
2623{
2624 struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
2625
3591538f 2626 return sprintf(buf, "%llu\n", (unsigned long long)snap->id);
dfc5606d
YS
2627}
2628
34b13184
AE
2629static ssize_t rbd_snap_features_show(struct device *dev,
2630 struct device_attribute *attr,
2631 char *buf)
2632{
2633 struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
2634
2635 return sprintf(buf, "0x%016llx\n",
2636 (unsigned long long) snap->features);
2637}
2638
dfc5606d
YS
2639static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL);
2640static DEVICE_ATTR(snap_id, S_IRUGO, rbd_snap_id_show, NULL);
34b13184 2641static DEVICE_ATTR(snap_features, S_IRUGO, rbd_snap_features_show, NULL);
dfc5606d
YS
2642
2643static struct attribute *rbd_snap_attrs[] = {
2644 &dev_attr_snap_size.attr,
2645 &dev_attr_snap_id.attr,
34b13184 2646 &dev_attr_snap_features.attr,
dfc5606d
YS
2647 NULL,
2648};
2649
2650static struct attribute_group rbd_snap_attr_group = {
2651 .attrs = rbd_snap_attrs,
2652};
2653
2654static void rbd_snap_dev_release(struct device *dev)
2655{
2656 struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
2657 kfree(snap->name);
2658 kfree(snap);
2659}
2660
2661static const struct attribute_group *rbd_snap_attr_groups[] = {
2662 &rbd_snap_attr_group,
2663 NULL
2664};
2665
2666static struct device_type rbd_snap_device_type = {
2667 .groups = rbd_snap_attr_groups,
2668 .release = rbd_snap_dev_release,
2669};
2670
8b8fb99c
AE
2671static struct rbd_spec *rbd_spec_get(struct rbd_spec *spec)
2672{
2673 kref_get(&spec->kref);
2674
2675 return spec;
2676}
2677
2678static void rbd_spec_free(struct kref *kref);
2679static void rbd_spec_put(struct rbd_spec *spec)
2680{
2681 if (spec)
2682 kref_put(&spec->kref, rbd_spec_free);
2683}
2684
2685static struct rbd_spec *rbd_spec_alloc(void)
2686{
2687 struct rbd_spec *spec;
2688
2689 spec = kzalloc(sizeof (*spec), GFP_KERNEL);
2690 if (!spec)
2691 return NULL;
2692 kref_init(&spec->kref);
2693
2694 rbd_spec_put(rbd_spec_get(spec)); /* TEMPORARY */
2695
2696 return spec;
2697}
2698
2699static void rbd_spec_free(struct kref *kref)
2700{
2701 struct rbd_spec *spec = container_of(kref, struct rbd_spec, kref);
2702
2703 kfree(spec->pool_name);
2704 kfree(spec->image_id);
2705 kfree(spec->image_name);
2706 kfree(spec->snap_name);
2707 kfree(spec);
2708}
2709
cc344fa1 2710static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
c53d5893
AE
2711 struct rbd_spec *spec)
2712{
2713 struct rbd_device *rbd_dev;
2714
2715 rbd_dev = kzalloc(sizeof (*rbd_dev), GFP_KERNEL);
2716 if (!rbd_dev)
2717 return NULL;
2718
2719 spin_lock_init(&rbd_dev->lock);
6d292906 2720 rbd_dev->flags = 0;
c53d5893
AE
2721 INIT_LIST_HEAD(&rbd_dev->node);
2722 INIT_LIST_HEAD(&rbd_dev->snaps);
2723 init_rwsem(&rbd_dev->header_rwsem);
2724
2725 rbd_dev->spec = spec;
2726 rbd_dev->rbd_client = rbdc;
2727
0903e875
AE
2728 /* Initialize the layout used for all rbd requests */
2729
2730 rbd_dev->layout.fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
2731 rbd_dev->layout.fl_stripe_count = cpu_to_le32(1);
2732 rbd_dev->layout.fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
2733 rbd_dev->layout.fl_pg_pool = cpu_to_le32((u32) spec->pool_id);
2734
c53d5893
AE
2735 return rbd_dev;
2736}
2737
2738static void rbd_dev_destroy(struct rbd_device *rbd_dev)
2739{
86b00e0d 2740 rbd_spec_put(rbd_dev->parent_spec);
c53d5893
AE
2741 kfree(rbd_dev->header_name);
2742 rbd_put_client(rbd_dev->rbd_client);
2743 rbd_spec_put(rbd_dev->spec);
2744 kfree(rbd_dev);
2745}
2746
304f6808
AE
2747static bool rbd_snap_registered(struct rbd_snap *snap)
2748{
2749 bool ret = snap->dev.type == &rbd_snap_device_type;
2750 bool reg = device_is_registered(&snap->dev);
2751
2752 rbd_assert(!ret ^ reg);
2753
2754 return ret;
2755}
2756
41f38c2b 2757static void rbd_remove_snap_dev(struct rbd_snap *snap)
dfc5606d
YS
2758{
2759 list_del(&snap->node);
304f6808
AE
2760 if (device_is_registered(&snap->dev))
2761 device_unregister(&snap->dev);
dfc5606d
YS
2762}
2763
14e7085d 2764static int rbd_register_snap_dev(struct rbd_snap *snap,
dfc5606d
YS
2765 struct device *parent)
2766{
2767 struct device *dev = &snap->dev;
2768 int ret;
2769
2770 dev->type = &rbd_snap_device_type;
2771 dev->parent = parent;
2772 dev->release = rbd_snap_dev_release;
d4b125e9 2773 dev_set_name(dev, "%s%s", RBD_SNAP_DEV_NAME_PREFIX, snap->name);
304f6808
AE
2774 dout("%s: registering device for snapshot %s\n", __func__, snap->name);
2775
dfc5606d
YS
2776 ret = device_register(dev);
2777
2778 return ret;
2779}
2780
4e891e0a 2781static struct rbd_snap *__rbd_add_snap_dev(struct rbd_device *rbd_dev,
c8d18425 2782 const char *snap_name,
34b13184
AE
2783 u64 snap_id, u64 snap_size,
2784 u64 snap_features)
dfc5606d 2785{
4e891e0a 2786 struct rbd_snap *snap;
dfc5606d 2787 int ret;
4e891e0a
AE
2788
2789 snap = kzalloc(sizeof (*snap), GFP_KERNEL);
dfc5606d 2790 if (!snap)
4e891e0a
AE
2791 return ERR_PTR(-ENOMEM);
2792
2793 ret = -ENOMEM;
c8d18425 2794 snap->name = kstrdup(snap_name, GFP_KERNEL);
4e891e0a
AE
2795 if (!snap->name)
2796 goto err;
2797
c8d18425
AE
2798 snap->id = snap_id;
2799 snap->size = snap_size;
34b13184 2800 snap->features = snap_features;
4e891e0a
AE
2801
2802 return snap;
2803
dfc5606d
YS
2804err:
2805 kfree(snap->name);
2806 kfree(snap);
4e891e0a
AE
2807
2808 return ERR_PTR(ret);
dfc5606d
YS
2809}
2810
cd892126
AE
2811static char *rbd_dev_v1_snap_info(struct rbd_device *rbd_dev, u32 which,
2812 u64 *snap_size, u64 *snap_features)
2813{
2814 char *snap_name;
2815
2816 rbd_assert(which < rbd_dev->header.snapc->num_snaps);
2817
2818 *snap_size = rbd_dev->header.snap_sizes[which];
2819 *snap_features = 0; /* No features for v1 */
2820
2821 /* Skip over names until we find the one we are looking for */
2822
2823 snap_name = rbd_dev->header.snap_names;
2824 while (which--)
2825 snap_name += strlen(snap_name) + 1;
2826
2827 return snap_name;
2828}
2829
9d475de5
AE
2830/*
2831 * Get the size and object order for an image snapshot, or if
2832 * snap_id is CEPH_NOSNAP, gets this information for the base
2833 * image.
2834 */
2835static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
2836 u8 *order, u64 *snap_size)
2837{
2838 __le64 snapid = cpu_to_le64(snap_id);
2839 int ret;
2840 struct {
2841 u8 order;
2842 __le64 size;
2843 } __attribute__ ((packed)) size_buf = { 0 };
2844
36be9a76 2845 ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
9d475de5
AE
2846 "rbd", "get_size",
2847 (char *) &snapid, sizeof (snapid),
07b2391f 2848 (char *) &size_buf, sizeof (size_buf), NULL);
36be9a76 2849 dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
9d475de5
AE
2850 if (ret < 0)
2851 return ret;
2852
2853 *order = size_buf.order;
2854 *snap_size = le64_to_cpu(size_buf.size);
2855
2856 dout(" snap_id 0x%016llx order = %u, snap_size = %llu\n",
2857 (unsigned long long) snap_id, (unsigned int) *order,
2858 (unsigned long long) *snap_size);
2859
2860 return 0;
2861}
2862
2863static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev)
2864{
2865 return _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP,
2866 &rbd_dev->header.obj_order,
2867 &rbd_dev->header.image_size);
2868}
2869
1e130199
AE
2870static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
2871{
2872 void *reply_buf;
2873 int ret;
2874 void *p;
2875
2876 reply_buf = kzalloc(RBD_OBJ_PREFIX_LEN_MAX, GFP_KERNEL);
2877 if (!reply_buf)
2878 return -ENOMEM;
2879
36be9a76 2880 ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
1e130199
AE
2881 "rbd", "get_object_prefix",
2882 NULL, 0,
07b2391f 2883 reply_buf, RBD_OBJ_PREFIX_LEN_MAX, NULL);
36be9a76 2884 dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
1e130199
AE
2885 if (ret < 0)
2886 goto out;
2887
2888 p = reply_buf;
2889 rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p,
2890 p + RBD_OBJ_PREFIX_LEN_MAX,
2891 NULL, GFP_NOIO);
2892
2893 if (IS_ERR(rbd_dev->header.object_prefix)) {
2894 ret = PTR_ERR(rbd_dev->header.object_prefix);
2895 rbd_dev->header.object_prefix = NULL;
2896 } else {
2897 dout(" object_prefix = %s\n", rbd_dev->header.object_prefix);
2898 }
2899
2900out:
2901 kfree(reply_buf);
2902
2903 return ret;
2904}
2905
b1b5402a
AE
2906static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
2907 u64 *snap_features)
2908{
2909 __le64 snapid = cpu_to_le64(snap_id);
2910 struct {
2911 __le64 features;
2912 __le64 incompat;
2913 } features_buf = { 0 };
d889140c 2914 u64 incompat;
b1b5402a
AE
2915 int ret;
2916
36be9a76 2917 ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
b1b5402a
AE
2918 "rbd", "get_features",
2919 (char *) &snapid, sizeof (snapid),
2920 (char *) &features_buf, sizeof (features_buf),
07b2391f 2921 NULL);
36be9a76 2922 dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
b1b5402a
AE
2923 if (ret < 0)
2924 return ret;
d889140c
AE
2925
2926 incompat = le64_to_cpu(features_buf.incompat);
2927 if (incompat & ~RBD_FEATURES_ALL)
b8f5c6ed 2928 return -ENXIO;
d889140c 2929
b1b5402a
AE
2930 *snap_features = le64_to_cpu(features_buf.features);
2931
2932 dout(" snap_id 0x%016llx features = 0x%016llx incompat = 0x%016llx\n",
2933 (unsigned long long) snap_id,
2934 (unsigned long long) *snap_features,
2935 (unsigned long long) le64_to_cpu(features_buf.incompat));
2936
2937 return 0;
2938}
2939
2940static int rbd_dev_v2_features(struct rbd_device *rbd_dev)
2941{
2942 return _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP,
2943 &rbd_dev->header.features);
2944}
2945
86b00e0d
AE
2946static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
2947{
2948 struct rbd_spec *parent_spec;
2949 size_t size;
2950 void *reply_buf = NULL;
2951 __le64 snapid;
2952 void *p;
2953 void *end;
2954 char *image_id;
2955 u64 overlap;
86b00e0d
AE
2956 int ret;
2957
2958 parent_spec = rbd_spec_alloc();
2959 if (!parent_spec)
2960 return -ENOMEM;
2961
2962 size = sizeof (__le64) + /* pool_id */
2963 sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX + /* image_id */
2964 sizeof (__le64) + /* snap_id */
2965 sizeof (__le64); /* overlap */
2966 reply_buf = kmalloc(size, GFP_KERNEL);
2967 if (!reply_buf) {
2968 ret = -ENOMEM;
2969 goto out_err;
2970 }
2971
2972 snapid = cpu_to_le64(CEPH_NOSNAP);
36be9a76 2973 ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
86b00e0d
AE
2974 "rbd", "get_parent",
2975 (char *) &snapid, sizeof (snapid),
07b2391f 2976 (char *) reply_buf, size, NULL);
36be9a76 2977 dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
86b00e0d
AE
2978 if (ret < 0)
2979 goto out_err;
2980
2981 ret = -ERANGE;
2982 p = reply_buf;
2983 end = (char *) reply_buf + size;
2984 ceph_decode_64_safe(&p, end, parent_spec->pool_id, out_err);
2985 if (parent_spec->pool_id == CEPH_NOPOOL)
2986 goto out; /* No parent? No problem. */
2987
0903e875
AE
2988 /* The ceph file layout needs to fit pool id in 32 bits */
2989
2990 ret = -EIO;
2991 if (WARN_ON(parent_spec->pool_id > (u64) U32_MAX))
2992 goto out;
2993
979ed480 2994 image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL);
86b00e0d
AE
2995 if (IS_ERR(image_id)) {
2996 ret = PTR_ERR(image_id);
2997 goto out_err;
2998 }
2999 parent_spec->image_id = image_id;
3000 ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err);
3001 ceph_decode_64_safe(&p, end, overlap, out_err);
3002
3003 rbd_dev->parent_overlap = overlap;
3004 rbd_dev->parent_spec = parent_spec;
3005 parent_spec = NULL; /* rbd_dev now owns this */
3006out:
3007 ret = 0;
3008out_err:
3009 kfree(reply_buf);
3010 rbd_spec_put(parent_spec);
3011
3012 return ret;
3013}
3014
9e15b77d
AE
3015static char *rbd_dev_image_name(struct rbd_device *rbd_dev)
3016{
3017 size_t image_id_size;
3018 char *image_id;
3019 void *p;
3020 void *end;
3021 size_t size;
3022 void *reply_buf = NULL;
3023 size_t len = 0;
3024 char *image_name = NULL;
3025 int ret;
3026
3027 rbd_assert(!rbd_dev->spec->image_name);
3028
69e7a02f
AE
3029 len = strlen(rbd_dev->spec->image_id);
3030 image_id_size = sizeof (__le32) + len;
9e15b77d
AE
3031 image_id = kmalloc(image_id_size, GFP_KERNEL);
3032 if (!image_id)
3033 return NULL;
3034
3035 p = image_id;
3036 end = (char *) image_id + image_id_size;
69e7a02f 3037 ceph_encode_string(&p, end, rbd_dev->spec->image_id, (u32) len);
9e15b77d
AE
3038
3039 size = sizeof (__le32) + RBD_IMAGE_NAME_LEN_MAX;
3040 reply_buf = kmalloc(size, GFP_KERNEL);
3041 if (!reply_buf)
3042 goto out;
3043
36be9a76 3044 ret = rbd_obj_method_sync(rbd_dev, RBD_DIRECTORY,
9e15b77d
AE
3045 "rbd", "dir_get_name",
3046 image_id, image_id_size,
07b2391f 3047 (char *) reply_buf, size, NULL);
9e15b77d
AE
3048 if (ret < 0)
3049 goto out;
3050 p = reply_buf;
3051 end = (char *) reply_buf + size;
3052 image_name = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL);
3053 if (IS_ERR(image_name))
3054 image_name = NULL;
3055 else
3056 dout("%s: name is %s len is %zd\n", __func__, image_name, len);
3057out:
3058 kfree(reply_buf);
3059 kfree(image_id);
3060
3061 return image_name;
3062}
3063
3064/*
3065 * When a parent image gets probed, we only have the pool, image,
3066 * and snapshot ids but not the names of any of them. This call
3067 * is made later to fill in those names. It has to be done after
3068 * rbd_dev_snaps_update() has completed because some of the
3069 * information (in particular, snapshot name) is not available
3070 * until then.
3071 */
3072static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev)
3073{
3074 struct ceph_osd_client *osdc;
3075 const char *name;
3076 void *reply_buf = NULL;
3077 int ret;
3078
3079 if (rbd_dev->spec->pool_name)
3080 return 0; /* Already have the names */
3081
3082 /* Look up the pool name */
3083
3084 osdc = &rbd_dev->rbd_client->client->osdc;
3085 name = ceph_pg_pool_name_by_id(osdc->osdmap, rbd_dev->spec->pool_id);
935dc89f
AE
3086 if (!name) {
3087 rbd_warn(rbd_dev, "there is no pool with id %llu",
3088 rbd_dev->spec->pool_id); /* Really a BUG() */
3089 return -EIO;
3090 }
9e15b77d
AE
3091
3092 rbd_dev->spec->pool_name = kstrdup(name, GFP_KERNEL);
3093 if (!rbd_dev->spec->pool_name)
3094 return -ENOMEM;
3095
3096 /* Fetch the image name; tolerate failure here */
3097
3098 name = rbd_dev_image_name(rbd_dev);
69e7a02f 3099 if (name)
9e15b77d 3100 rbd_dev->spec->image_name = (char *) name;
69e7a02f 3101 else
06ecc6cb 3102 rbd_warn(rbd_dev, "unable to get image name");
9e15b77d
AE
3103
3104 /* Look up the snapshot name. */
3105
3106 name = rbd_snap_name(rbd_dev, rbd_dev->spec->snap_id);
3107 if (!name) {
935dc89f
AE
3108 rbd_warn(rbd_dev, "no snapshot with id %llu",
3109 rbd_dev->spec->snap_id); /* Really a BUG() */
9e15b77d
AE
3110 ret = -EIO;
3111 goto out_err;
3112 }
3113 rbd_dev->spec->snap_name = kstrdup(name, GFP_KERNEL);
3114 if(!rbd_dev->spec->snap_name)
3115 goto out_err;
3116
3117 return 0;
3118out_err:
3119 kfree(reply_buf);
3120 kfree(rbd_dev->spec->pool_name);
3121 rbd_dev->spec->pool_name = NULL;
3122
3123 return ret;
3124}
3125
6e14b1a6 3126static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver)
35d489f9
AE
3127{
3128 size_t size;
3129 int ret;
3130 void *reply_buf;
3131 void *p;
3132 void *end;
3133 u64 seq;
3134 u32 snap_count;
3135 struct ceph_snap_context *snapc;
3136 u32 i;
3137
3138 /*
3139 * We'll need room for the seq value (maximum snapshot id),
3140 * snapshot count, and array of that many snapshot ids.
3141 * For now we have a fixed upper limit on the number we're
3142 * prepared to receive.
3143 */
3144 size = sizeof (__le64) + sizeof (__le32) +
3145 RBD_MAX_SNAP_COUNT * sizeof (__le64);
3146 reply_buf = kzalloc(size, GFP_KERNEL);
3147 if (!reply_buf)
3148 return -ENOMEM;
3149
36be9a76 3150 ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
35d489f9
AE
3151 "rbd", "get_snapcontext",
3152 NULL, 0,
07b2391f 3153 reply_buf, size, ver);
36be9a76 3154 dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
35d489f9
AE
3155 if (ret < 0)
3156 goto out;
3157
3158 ret = -ERANGE;
3159 p = reply_buf;
3160 end = (char *) reply_buf + size;
3161 ceph_decode_64_safe(&p, end, seq, out);
3162 ceph_decode_32_safe(&p, end, snap_count, out);
3163
3164 /*
3165 * Make sure the reported number of snapshot ids wouldn't go
3166 * beyond the end of our buffer. But before checking that,
3167 * make sure the computed size of the snapshot context we
3168 * allocate is representable in a size_t.
3169 */
3170 if (snap_count > (SIZE_MAX - sizeof (struct ceph_snap_context))
3171 / sizeof (u64)) {
3172 ret = -EINVAL;
3173 goto out;
3174 }
3175 if (!ceph_has_room(&p, end, snap_count * sizeof (__le64)))
3176 goto out;
3177
3178 size = sizeof (struct ceph_snap_context) +
3179 snap_count * sizeof (snapc->snaps[0]);
3180 snapc = kmalloc(size, GFP_KERNEL);
3181 if (!snapc) {
3182 ret = -ENOMEM;
3183 goto out;
3184 }
3185
3186 atomic_set(&snapc->nref, 1);
3187 snapc->seq = seq;
3188 snapc->num_snaps = snap_count;
3189 for (i = 0; i < snap_count; i++)
3190 snapc->snaps[i] = ceph_decode_64(&p);
3191
3192 rbd_dev->header.snapc = snapc;
3193
3194 dout(" snap context seq = %llu, snap_count = %u\n",
3195 (unsigned long long) seq, (unsigned int) snap_count);
3196
3197out:
3198 kfree(reply_buf);
3199
3200 return 0;
3201}
3202
b8b1e2db
AE
3203static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which)
3204{
3205 size_t size;
3206 void *reply_buf;
3207 __le64 snap_id;
3208 int ret;
3209 void *p;
3210 void *end;
b8b1e2db
AE
3211 char *snap_name;
3212
3213 size = sizeof (__le32) + RBD_MAX_SNAP_NAME_LEN;
3214 reply_buf = kmalloc(size, GFP_KERNEL);
3215 if (!reply_buf)
3216 return ERR_PTR(-ENOMEM);
3217
3218 snap_id = cpu_to_le64(rbd_dev->header.snapc->snaps[which]);
36be9a76 3219 ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
b8b1e2db
AE
3220 "rbd", "get_snapshot_name",
3221 (char *) &snap_id, sizeof (snap_id),
07b2391f 3222 reply_buf, size, NULL);
36be9a76 3223 dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
b8b1e2db
AE
3224 if (ret < 0)
3225 goto out;
3226
3227 p = reply_buf;
3228 end = (char *) reply_buf + size;
e5c35534 3229 snap_name = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL);
b8b1e2db
AE
3230 if (IS_ERR(snap_name)) {
3231 ret = PTR_ERR(snap_name);
3232 goto out;
3233 } else {
3234 dout(" snap_id 0x%016llx snap_name = %s\n",
3235 (unsigned long long) le64_to_cpu(snap_id), snap_name);
3236 }
3237 kfree(reply_buf);
3238
3239 return snap_name;
3240out:
3241 kfree(reply_buf);
3242
3243 return ERR_PTR(ret);
3244}
3245
3246static char *rbd_dev_v2_snap_info(struct rbd_device *rbd_dev, u32 which,
3247 u64 *snap_size, u64 *snap_features)
3248{
e0b49868 3249 u64 snap_id;
b8b1e2db
AE
3250 u8 order;
3251 int ret;
3252
3253 snap_id = rbd_dev->header.snapc->snaps[which];
3254 ret = _rbd_dev_v2_snap_size(rbd_dev, snap_id, &order, snap_size);
3255 if (ret)
3256 return ERR_PTR(ret);
3257 ret = _rbd_dev_v2_snap_features(rbd_dev, snap_id, snap_features);
3258 if (ret)
3259 return ERR_PTR(ret);
3260
3261 return rbd_dev_v2_snap_name(rbd_dev, which);
3262}
3263
3264static char *rbd_dev_snap_info(struct rbd_device *rbd_dev, u32 which,
3265 u64 *snap_size, u64 *snap_features)
3266{
3267 if (rbd_dev->image_format == 1)
3268 return rbd_dev_v1_snap_info(rbd_dev, which,
3269 snap_size, snap_features);
3270 if (rbd_dev->image_format == 2)
3271 return rbd_dev_v2_snap_info(rbd_dev, which,
3272 snap_size, snap_features);
3273 return ERR_PTR(-EINVAL);
3274}
3275
117973fb
AE
3276static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver)
3277{
3278 int ret;
3279 __u8 obj_order;
3280
3281 down_write(&rbd_dev->header_rwsem);
3282
3283 /* Grab old order first, to see if it changes */
3284
3285 obj_order = rbd_dev->header.obj_order,
3286 ret = rbd_dev_v2_image_size(rbd_dev);
3287 if (ret)
3288 goto out;
3289 if (rbd_dev->header.obj_order != obj_order) {
3290 ret = -EIO;
3291 goto out;
3292 }
3293 rbd_update_mapping_size(rbd_dev);
3294
3295 ret = rbd_dev_v2_snap_context(rbd_dev, hver);
3296 dout("rbd_dev_v2_snap_context returned %d\n", ret);
3297 if (ret)
3298 goto out;
3299 ret = rbd_dev_snaps_update(rbd_dev);
3300 dout("rbd_dev_snaps_update returned %d\n", ret);
3301 if (ret)
3302 goto out;
3303 ret = rbd_dev_snaps_register(rbd_dev);
3304 dout("rbd_dev_snaps_register returned %d\n", ret);
3305out:
3306 up_write(&rbd_dev->header_rwsem);
3307
3308 return ret;
3309}
3310
dfc5606d 3311/*
35938150
AE
3312 * Scan the rbd device's current snapshot list and compare it to the
3313 * newly-received snapshot context. Remove any existing snapshots
3314 * not present in the new snapshot context. Add a new snapshot for
3315 * any snaphots in the snapshot context not in the current list.
3316 * And verify there are no changes to snapshots we already know
3317 * about.
3318 *
3319 * Assumes the snapshots in the snapshot context are sorted by
3320 * snapshot id, highest id first. (Snapshots in the rbd_dev's list
3321 * are also maintained in that order.)
dfc5606d 3322 */
304f6808 3323static int rbd_dev_snaps_update(struct rbd_device *rbd_dev)
dfc5606d 3324{
35938150
AE
3325 struct ceph_snap_context *snapc = rbd_dev->header.snapc;
3326 const u32 snap_count = snapc->num_snaps;
35938150
AE
3327 struct list_head *head = &rbd_dev->snaps;
3328 struct list_head *links = head->next;
3329 u32 index = 0;
dfc5606d 3330
9fcbb800 3331 dout("%s: snap count is %u\n", __func__, (unsigned int) snap_count);
35938150
AE
3332 while (index < snap_count || links != head) {
3333 u64 snap_id;
3334 struct rbd_snap *snap;
cd892126
AE
3335 char *snap_name;
3336 u64 snap_size = 0;
3337 u64 snap_features = 0;
dfc5606d 3338
35938150
AE
3339 snap_id = index < snap_count ? snapc->snaps[index]
3340 : CEPH_NOSNAP;
3341 snap = links != head ? list_entry(links, struct rbd_snap, node)
3342 : NULL;
aafb230e 3343 rbd_assert(!snap || snap->id != CEPH_NOSNAP);
dfc5606d 3344
35938150
AE
3345 if (snap_id == CEPH_NOSNAP || (snap && snap->id > snap_id)) {
3346 struct list_head *next = links->next;
dfc5606d 3347
6d292906
AE
3348 /*
3349 * A previously-existing snapshot is not in
3350 * the new snap context.
3351 *
3352 * If the now missing snapshot is the one the
3353 * image is mapped to, clear its exists flag
3354 * so we can avoid sending any more requests
3355 * to it.
3356 */
0d7dbfce 3357 if (rbd_dev->spec->snap_id == snap->id)
6d292906 3358 clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
41f38c2b 3359 rbd_remove_snap_dev(snap);
9fcbb800 3360 dout("%ssnap id %llu has been removed\n",
0d7dbfce
AE
3361 rbd_dev->spec->snap_id == snap->id ?
3362 "mapped " : "",
9fcbb800 3363 (unsigned long long) snap->id);
35938150
AE
3364
3365 /* Done with this list entry; advance */
3366
3367 links = next;
dfc5606d
YS
3368 continue;
3369 }
35938150 3370
b8b1e2db
AE
3371 snap_name = rbd_dev_snap_info(rbd_dev, index,
3372 &snap_size, &snap_features);
cd892126
AE
3373 if (IS_ERR(snap_name))
3374 return PTR_ERR(snap_name);
3375
9fcbb800
AE
3376 dout("entry %u: snap_id = %llu\n", (unsigned int) snap_count,
3377 (unsigned long long) snap_id);
35938150
AE
3378 if (!snap || (snap_id != CEPH_NOSNAP && snap->id < snap_id)) {
3379 struct rbd_snap *new_snap;
3380
3381 /* We haven't seen this snapshot before */
3382
c8d18425 3383 new_snap = __rbd_add_snap_dev(rbd_dev, snap_name,
cd892126 3384 snap_id, snap_size, snap_features);
9fcbb800
AE
3385 if (IS_ERR(new_snap)) {
3386 int err = PTR_ERR(new_snap);
3387
3388 dout(" failed to add dev, error %d\n", err);
3389
3390 return err;
3391 }
35938150
AE
3392
3393 /* New goes before existing, or at end of list */
3394
9fcbb800 3395 dout(" added dev%s\n", snap ? "" : " at end\n");
35938150
AE
3396 if (snap)
3397 list_add_tail(&new_snap->node, &snap->node);
3398 else
523f3258 3399 list_add_tail(&new_snap->node, head);
35938150
AE
3400 } else {
3401 /* Already have this one */
3402
9fcbb800
AE
3403 dout(" already present\n");
3404
cd892126 3405 rbd_assert(snap->size == snap_size);
aafb230e 3406 rbd_assert(!strcmp(snap->name, snap_name));
cd892126 3407 rbd_assert(snap->features == snap_features);
35938150
AE
3408
3409 /* Done with this list entry; advance */
3410
3411 links = links->next;
dfc5606d 3412 }
35938150
AE
3413
3414 /* Advance to the next entry in the snapshot context */
3415
3416 index++;
dfc5606d 3417 }
9fcbb800 3418 dout("%s: done\n", __func__);
dfc5606d
YS
3419
3420 return 0;
3421}
3422
304f6808
AE
3423/*
3424 * Scan the list of snapshots and register the devices for any that
3425 * have not already been registered.
3426 */
3427static int rbd_dev_snaps_register(struct rbd_device *rbd_dev)
3428{
3429 struct rbd_snap *snap;
3430 int ret = 0;
3431
37206ee5 3432 dout("%s:\n", __func__);
86ff77bb
AE
3433 if (WARN_ON(!device_is_registered(&rbd_dev->dev)))
3434 return -EIO;
304f6808
AE
3435
3436 list_for_each_entry(snap, &rbd_dev->snaps, node) {
3437 if (!rbd_snap_registered(snap)) {
3438 ret = rbd_register_snap_dev(snap, &rbd_dev->dev);
3439 if (ret < 0)
3440 break;
3441 }
3442 }
3443 dout("%s: returning %d\n", __func__, ret);
3444
3445 return ret;
3446}
3447
dfc5606d
YS
3448static int rbd_bus_add_dev(struct rbd_device *rbd_dev)
3449{
dfc5606d 3450 struct device *dev;
cd789ab9 3451 int ret;
dfc5606d
YS
3452
3453 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
dfc5606d 3454
cd789ab9 3455 dev = &rbd_dev->dev;
dfc5606d
YS
3456 dev->bus = &rbd_bus_type;
3457 dev->type = &rbd_device_type;
3458 dev->parent = &rbd_root_dev;
3459 dev->release = rbd_dev_release;
de71a297 3460 dev_set_name(dev, "%d", rbd_dev->dev_id);
dfc5606d 3461 ret = device_register(dev);
dfc5606d 3462
dfc5606d 3463 mutex_unlock(&ctl_mutex);
cd789ab9 3464
dfc5606d 3465 return ret;
602adf40
YS
3466}
3467
dfc5606d
YS
3468static void rbd_bus_del_dev(struct rbd_device *rbd_dev)
3469{
3470 device_unregister(&rbd_dev->dev);
3471}
3472
e2839308 3473static atomic64_t rbd_dev_id_max = ATOMIC64_INIT(0);
1ddbe94e
AE
3474
3475/*
499afd5b
AE
3476 * Get a unique rbd identifier for the given new rbd_dev, and add
3477 * the rbd_dev to the global list. The minimum rbd id is 1.
1ddbe94e 3478 */
e2839308 3479static void rbd_dev_id_get(struct rbd_device *rbd_dev)
b7f23c36 3480{
e2839308 3481 rbd_dev->dev_id = atomic64_inc_return(&rbd_dev_id_max);
499afd5b
AE
3482
3483 spin_lock(&rbd_dev_list_lock);
3484 list_add_tail(&rbd_dev->node, &rbd_dev_list);
3485 spin_unlock(&rbd_dev_list_lock);
e2839308
AE
3486 dout("rbd_dev %p given dev id %llu\n", rbd_dev,
3487 (unsigned long long) rbd_dev->dev_id);
1ddbe94e 3488}
b7f23c36 3489
1ddbe94e 3490/*
499afd5b
AE
3491 * Remove an rbd_dev from the global list, and record that its
3492 * identifier is no longer in use.
1ddbe94e 3493 */
e2839308 3494static void rbd_dev_id_put(struct rbd_device *rbd_dev)
1ddbe94e 3495{
d184f6bf 3496 struct list_head *tmp;
de71a297 3497 int rbd_id = rbd_dev->dev_id;
d184f6bf
AE
3498 int max_id;
3499
aafb230e 3500 rbd_assert(rbd_id > 0);
499afd5b 3501
e2839308
AE
3502 dout("rbd_dev %p released dev id %llu\n", rbd_dev,
3503 (unsigned long long) rbd_dev->dev_id);
499afd5b
AE
3504 spin_lock(&rbd_dev_list_lock);
3505 list_del_init(&rbd_dev->node);
d184f6bf
AE
3506
3507 /*
3508 * If the id being "put" is not the current maximum, there
3509 * is nothing special we need to do.
3510 */
e2839308 3511 if (rbd_id != atomic64_read(&rbd_dev_id_max)) {
d184f6bf
AE
3512 spin_unlock(&rbd_dev_list_lock);
3513 return;
3514 }
3515
3516 /*
3517 * We need to update the current maximum id. Search the
3518 * list to find out what it is. We're more likely to find
3519 * the maximum at the end, so search the list backward.
3520 */
3521 max_id = 0;
3522 list_for_each_prev(tmp, &rbd_dev_list) {
3523 struct rbd_device *rbd_dev;
3524
3525 rbd_dev = list_entry(tmp, struct rbd_device, node);
b213e0b1
AE
3526 if (rbd_dev->dev_id > max_id)
3527 max_id = rbd_dev->dev_id;
d184f6bf 3528 }
499afd5b 3529 spin_unlock(&rbd_dev_list_lock);
b7f23c36 3530
1ddbe94e 3531 /*
e2839308 3532 * The max id could have been updated by rbd_dev_id_get(), in
d184f6bf
AE
3533 * which case it now accurately reflects the new maximum.
3534 * Be careful not to overwrite the maximum value in that
3535 * case.
1ddbe94e 3536 */
e2839308
AE
3537 atomic64_cmpxchg(&rbd_dev_id_max, rbd_id, max_id);
3538 dout(" max dev id has been reset\n");
b7f23c36
AE
3539}
3540
e28fff26
AE
3541/*
3542 * Skips over white space at *buf, and updates *buf to point to the
3543 * first found non-space character (if any). Returns the length of
593a9e7b
AE
3544 * the token (string of non-white space characters) found. Note
3545 * that *buf must be terminated with '\0'.
e28fff26
AE
3546 */
3547static inline size_t next_token(const char **buf)
3548{
3549 /*
3550 * These are the characters that produce nonzero for
3551 * isspace() in the "C" and "POSIX" locales.
3552 */
3553 const char *spaces = " \f\n\r\t\v";
3554
3555 *buf += strspn(*buf, spaces); /* Find start of token */
3556
3557 return strcspn(*buf, spaces); /* Return token length */
3558}
3559
3560/*
3561 * Finds the next token in *buf, and if the provided token buffer is
3562 * big enough, copies the found token into it. The result, if
593a9e7b
AE
3563 * copied, is guaranteed to be terminated with '\0'. Note that *buf
3564 * must be terminated with '\0' on entry.
e28fff26
AE
3565 *
3566 * Returns the length of the token found (not including the '\0').
3567 * Return value will be 0 if no token is found, and it will be >=
3568 * token_size if the token would not fit.
3569 *
593a9e7b 3570 * The *buf pointer will be updated to point beyond the end of the
e28fff26
AE
3571 * found token. Note that this occurs even if the token buffer is
3572 * too small to hold it.
3573 */
3574static inline size_t copy_token(const char **buf,
3575 char *token,
3576 size_t token_size)
3577{
3578 size_t len;
3579
3580 len = next_token(buf);
3581 if (len < token_size) {
3582 memcpy(token, *buf, len);
3583 *(token + len) = '\0';
3584 }
3585 *buf += len;
3586
3587 return len;
3588}
3589
ea3352f4
AE
3590/*
3591 * Finds the next token in *buf, dynamically allocates a buffer big
3592 * enough to hold a copy of it, and copies the token into the new
3593 * buffer. The copy is guaranteed to be terminated with '\0'. Note
3594 * that a duplicate buffer is created even for a zero-length token.
3595 *
3596 * Returns a pointer to the newly-allocated duplicate, or a null
3597 * pointer if memory for the duplicate was not available. If
3598 * the lenp argument is a non-null pointer, the length of the token
3599 * (not including the '\0') is returned in *lenp.
3600 *
3601 * If successful, the *buf pointer will be updated to point beyond
3602 * the end of the found token.
3603 *
3604 * Note: uses GFP_KERNEL for allocation.
3605 */
3606static inline char *dup_token(const char **buf, size_t *lenp)
3607{
3608 char *dup;
3609 size_t len;
3610
3611 len = next_token(buf);
4caf35f9 3612 dup = kmemdup(*buf, len + 1, GFP_KERNEL);
ea3352f4
AE
3613 if (!dup)
3614 return NULL;
ea3352f4
AE
3615 *(dup + len) = '\0';
3616 *buf += len;
3617
3618 if (lenp)
3619 *lenp = len;
3620
3621 return dup;
3622}
3623
a725f65e 3624/*
859c31df
AE
3625 * Parse the options provided for an "rbd add" (i.e., rbd image
3626 * mapping) request. These arrive via a write to /sys/bus/rbd/add,
3627 * and the data written is passed here via a NUL-terminated buffer.
3628 * Returns 0 if successful or an error code otherwise.
d22f76e7 3629 *
859c31df
AE
3630 * The information extracted from these options is recorded in
3631 * the other parameters which return dynamically-allocated
3632 * structures:
3633 * ceph_opts
3634 * The address of a pointer that will refer to a ceph options
3635 * structure. Caller must release the returned pointer using
3636 * ceph_destroy_options() when it is no longer needed.
3637 * rbd_opts
3638 * Address of an rbd options pointer. Fully initialized by
3639 * this function; caller must release with kfree().
3640 * spec
3641 * Address of an rbd image specification pointer. Fully
3642 * initialized by this function based on parsed options.
3643 * Caller must release with rbd_spec_put().
3644 *
3645 * The options passed take this form:
3646 * <mon_addrs> <options> <pool_name> <image_name> [<snap_id>]
3647 * where:
3648 * <mon_addrs>
3649 * A comma-separated list of one or more monitor addresses.
3650 * A monitor address is an ip address, optionally followed
3651 * by a port number (separated by a colon).
3652 * I.e.: ip1[:port1][,ip2[:port2]...]
3653 * <options>
3654 * A comma-separated list of ceph and/or rbd options.
3655 * <pool_name>
3656 * The name of the rados pool containing the rbd image.
3657 * <image_name>
3658 * The name of the image in that pool to map.
3659 * <snap_id>
3660 * An optional snapshot id. If provided, the mapping will
3661 * present data from the image at the time that snapshot was
3662 * created. The image head is used if no snapshot id is
3663 * provided. Snapshot mappings are always read-only.
a725f65e 3664 */
859c31df 3665static int rbd_add_parse_args(const char *buf,
dc79b113 3666 struct ceph_options **ceph_opts,
859c31df
AE
3667 struct rbd_options **opts,
3668 struct rbd_spec **rbd_spec)
e28fff26 3669{
d22f76e7 3670 size_t len;
859c31df 3671 char *options;
0ddebc0c
AE
3672 const char *mon_addrs;
3673 size_t mon_addrs_size;
859c31df 3674 struct rbd_spec *spec = NULL;
4e9afeba 3675 struct rbd_options *rbd_opts = NULL;
859c31df 3676 struct ceph_options *copts;
dc79b113 3677 int ret;
e28fff26
AE
3678
3679 /* The first four tokens are required */
3680
7ef3214a 3681 len = next_token(&buf);
4fb5d671
AE
3682 if (!len) {
3683 rbd_warn(NULL, "no monitor address(es) provided");
3684 return -EINVAL;
3685 }
0ddebc0c 3686 mon_addrs = buf;
f28e565a 3687 mon_addrs_size = len + 1;
7ef3214a 3688 buf += len;
a725f65e 3689
dc79b113 3690 ret = -EINVAL;
f28e565a
AE
3691 options = dup_token(&buf, NULL);
3692 if (!options)
dc79b113 3693 return -ENOMEM;
4fb5d671
AE
3694 if (!*options) {
3695 rbd_warn(NULL, "no options provided");
3696 goto out_err;
3697 }
e28fff26 3698
859c31df
AE
3699 spec = rbd_spec_alloc();
3700 if (!spec)
f28e565a 3701 goto out_mem;
859c31df
AE
3702
3703 spec->pool_name = dup_token(&buf, NULL);
3704 if (!spec->pool_name)
3705 goto out_mem;
4fb5d671
AE
3706 if (!*spec->pool_name) {
3707 rbd_warn(NULL, "no pool name provided");
3708 goto out_err;
3709 }
e28fff26 3710
69e7a02f 3711 spec->image_name = dup_token(&buf, NULL);
859c31df 3712 if (!spec->image_name)
f28e565a 3713 goto out_mem;
4fb5d671
AE
3714 if (!*spec->image_name) {
3715 rbd_warn(NULL, "no image name provided");
3716 goto out_err;
3717 }
d4b125e9 3718
f28e565a
AE
3719 /*
3720 * Snapshot name is optional; default is to use "-"
3721 * (indicating the head/no snapshot).
3722 */
3feeb894 3723 len = next_token(&buf);
820a5f3e 3724 if (!len) {
3feeb894
AE
3725 buf = RBD_SNAP_HEAD_NAME; /* No snapshot supplied */
3726 len = sizeof (RBD_SNAP_HEAD_NAME) - 1;
f28e565a 3727 } else if (len > RBD_MAX_SNAP_NAME_LEN) {
dc79b113 3728 ret = -ENAMETOOLONG;
f28e565a 3729 goto out_err;
849b4260 3730 }
4caf35f9 3731 spec->snap_name = kmemdup(buf, len + 1, GFP_KERNEL);
859c31df 3732 if (!spec->snap_name)
f28e565a 3733 goto out_mem;
859c31df 3734 *(spec->snap_name + len) = '\0';
e5c35534 3735
0ddebc0c 3736 /* Initialize all rbd options to the defaults */
e28fff26 3737
4e9afeba
AE
3738 rbd_opts = kzalloc(sizeof (*rbd_opts), GFP_KERNEL);
3739 if (!rbd_opts)
3740 goto out_mem;
3741
3742 rbd_opts->read_only = RBD_READ_ONLY_DEFAULT;
d22f76e7 3743
859c31df 3744 copts = ceph_parse_options(options, mon_addrs,
0ddebc0c 3745 mon_addrs + mon_addrs_size - 1,
4e9afeba 3746 parse_rbd_opts_token, rbd_opts);
859c31df
AE
3747 if (IS_ERR(copts)) {
3748 ret = PTR_ERR(copts);
dc79b113
AE
3749 goto out_err;
3750 }
859c31df
AE
3751 kfree(options);
3752
3753 *ceph_opts = copts;
4e9afeba 3754 *opts = rbd_opts;
859c31df 3755 *rbd_spec = spec;
0ddebc0c 3756
dc79b113 3757 return 0;
f28e565a 3758out_mem:
dc79b113 3759 ret = -ENOMEM;
d22f76e7 3760out_err:
859c31df
AE
3761 kfree(rbd_opts);
3762 rbd_spec_put(spec);
f28e565a 3763 kfree(options);
d22f76e7 3764
dc79b113 3765 return ret;
a725f65e
AE
3766}
3767
589d30e0
AE
3768/*
3769 * An rbd format 2 image has a unique identifier, distinct from the
3770 * name given to it by the user. Internally, that identifier is
3771 * what's used to specify the names of objects related to the image.
3772 *
3773 * A special "rbd id" object is used to map an rbd image name to its
3774 * id. If that object doesn't exist, then there is no v2 rbd image
3775 * with the supplied name.
3776 *
3777 * This function will record the given rbd_dev's image_id field if
3778 * it can be determined, and in that case will return 0. If any
3779 * errors occur a negative errno will be returned and the rbd_dev's
3780 * image_id field will be unchanged (and should be NULL).
3781 */
3782static int rbd_dev_image_id(struct rbd_device *rbd_dev)
3783{
3784 int ret;
3785 size_t size;
3786 char *object_name;
3787 void *response;
3788 void *p;
3789
2c0d0a10
AE
3790 /*
3791 * When probing a parent image, the image id is already
3792 * known (and the image name likely is not). There's no
3793 * need to fetch the image id again in this case.
3794 */
3795 if (rbd_dev->spec->image_id)
3796 return 0;
3797
589d30e0
AE
3798 /*
3799 * First, see if the format 2 image id file exists, and if
3800 * so, get the image's persistent id from it.
3801 */
69e7a02f 3802 size = sizeof (RBD_ID_PREFIX) + strlen(rbd_dev->spec->image_name);
589d30e0
AE
3803 object_name = kmalloc(size, GFP_NOIO);
3804 if (!object_name)
3805 return -ENOMEM;
0d7dbfce 3806 sprintf(object_name, "%s%s", RBD_ID_PREFIX, rbd_dev->spec->image_name);
589d30e0
AE
3807 dout("rbd id object name is %s\n", object_name);
3808
3809 /* Response will be an encoded string, which includes a length */
3810
3811 size = sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX;
3812 response = kzalloc(size, GFP_NOIO);
3813 if (!response) {
3814 ret = -ENOMEM;
3815 goto out;
3816 }
3817
36be9a76 3818 ret = rbd_obj_method_sync(rbd_dev, object_name,
589d30e0
AE
3819 "rbd", "get_id",
3820 NULL, 0,
07b2391f 3821 response, RBD_IMAGE_ID_LEN_MAX, NULL);
36be9a76 3822 dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
589d30e0
AE
3823 if (ret < 0)
3824 goto out;
3825
3826 p = response;
0d7dbfce 3827 rbd_dev->spec->image_id = ceph_extract_encoded_string(&p,
589d30e0 3828 p + RBD_IMAGE_ID_LEN_MAX,
979ed480 3829 NULL, GFP_NOIO);
0d7dbfce
AE
3830 if (IS_ERR(rbd_dev->spec->image_id)) {
3831 ret = PTR_ERR(rbd_dev->spec->image_id);
3832 rbd_dev->spec->image_id = NULL;
589d30e0 3833 } else {
0d7dbfce 3834 dout("image_id is %s\n", rbd_dev->spec->image_id);
589d30e0
AE
3835 }
3836out:
3837 kfree(response);
3838 kfree(object_name);
3839
3840 return ret;
3841}
3842
a30b71b9
AE
3843static int rbd_dev_v1_probe(struct rbd_device *rbd_dev)
3844{
3845 int ret;
3846 size_t size;
3847
3848 /* Version 1 images have no id; empty string is used */
3849
0d7dbfce
AE
3850 rbd_dev->spec->image_id = kstrdup("", GFP_KERNEL);
3851 if (!rbd_dev->spec->image_id)
a30b71b9 3852 return -ENOMEM;
a30b71b9
AE
3853
3854 /* Record the header object name for this rbd image. */
3855
69e7a02f 3856 size = strlen(rbd_dev->spec->image_name) + sizeof (RBD_SUFFIX);
a30b71b9
AE
3857 rbd_dev->header_name = kmalloc(size, GFP_KERNEL);
3858 if (!rbd_dev->header_name) {
3859 ret = -ENOMEM;
3860 goto out_err;
3861 }
0d7dbfce
AE
3862 sprintf(rbd_dev->header_name, "%s%s",
3863 rbd_dev->spec->image_name, RBD_SUFFIX);
a30b71b9
AE
3864
3865 /* Populate rbd image metadata */
3866
3867 ret = rbd_read_header(rbd_dev, &rbd_dev->header);
3868 if (ret < 0)
3869 goto out_err;
86b00e0d
AE
3870
3871 /* Version 1 images have no parent (no layering) */
3872
3873 rbd_dev->parent_spec = NULL;
3874 rbd_dev->parent_overlap = 0;
3875
a30b71b9
AE
3876 rbd_dev->image_format = 1;
3877
3878 dout("discovered version 1 image, header name is %s\n",
3879 rbd_dev->header_name);
3880
3881 return 0;
3882
3883out_err:
3884 kfree(rbd_dev->header_name);
3885 rbd_dev->header_name = NULL;
0d7dbfce
AE
3886 kfree(rbd_dev->spec->image_id);
3887 rbd_dev->spec->image_id = NULL;
a30b71b9
AE
3888
3889 return ret;
3890}
3891
3892static int rbd_dev_v2_probe(struct rbd_device *rbd_dev)
3893{
3894 size_t size;
9d475de5 3895 int ret;
6e14b1a6 3896 u64 ver = 0;
a30b71b9
AE
3897
3898 /*
3899 * Image id was filled in by the caller. Record the header
3900 * object name for this rbd image.
3901 */
979ed480 3902 size = sizeof (RBD_HEADER_PREFIX) + strlen(rbd_dev->spec->image_id);
a30b71b9
AE
3903 rbd_dev->header_name = kmalloc(size, GFP_KERNEL);
3904 if (!rbd_dev->header_name)
3905 return -ENOMEM;
3906 sprintf(rbd_dev->header_name, "%s%s",
0d7dbfce 3907 RBD_HEADER_PREFIX, rbd_dev->spec->image_id);
9d475de5
AE
3908
3909 /* Get the size and object order for the image */
3910
3911 ret = rbd_dev_v2_image_size(rbd_dev);
1e130199
AE
3912 if (ret < 0)
3913 goto out_err;
3914
3915 /* Get the object prefix (a.k.a. block_name) for the image */
3916
3917 ret = rbd_dev_v2_object_prefix(rbd_dev);
b1b5402a
AE
3918 if (ret < 0)
3919 goto out_err;
3920
d889140c 3921 /* Get the and check features for the image */
b1b5402a
AE
3922
3923 ret = rbd_dev_v2_features(rbd_dev);
9d475de5
AE
3924 if (ret < 0)
3925 goto out_err;
35d489f9 3926
86b00e0d
AE
3927 /* If the image supports layering, get the parent info */
3928
3929 if (rbd_dev->header.features & RBD_FEATURE_LAYERING) {
3930 ret = rbd_dev_v2_parent_info(rbd_dev);
3931 if (ret < 0)
3932 goto out_err;
3933 }
3934
6e14b1a6
AE
3935 /* crypto and compression type aren't (yet) supported for v2 images */
3936
3937 rbd_dev->header.crypt_type = 0;
3938 rbd_dev->header.comp_type = 0;
35d489f9 3939
6e14b1a6
AE
3940 /* Get the snapshot context, plus the header version */
3941
3942 ret = rbd_dev_v2_snap_context(rbd_dev, &ver);
35d489f9
AE
3943 if (ret)
3944 goto out_err;
6e14b1a6
AE
3945 rbd_dev->header.obj_version = ver;
3946
a30b71b9
AE
3947 rbd_dev->image_format = 2;
3948
3949 dout("discovered version 2 image, header name is %s\n",
3950 rbd_dev->header_name);
3951
35152979 3952 return 0;
9d475de5 3953out_err:
86b00e0d
AE
3954 rbd_dev->parent_overlap = 0;
3955 rbd_spec_put(rbd_dev->parent_spec);
3956 rbd_dev->parent_spec = NULL;
9d475de5
AE
3957 kfree(rbd_dev->header_name);
3958 rbd_dev->header_name = NULL;
1e130199
AE
3959 kfree(rbd_dev->header.object_prefix);
3960 rbd_dev->header.object_prefix = NULL;
9d475de5
AE
3961
3962 return ret;
a30b71b9
AE
3963}
3964
83a06263
AE
3965static int rbd_dev_probe_finish(struct rbd_device *rbd_dev)
3966{
3967 int ret;
3968
3969 /* no need to lock here, as rbd_dev is not registered yet */
3970 ret = rbd_dev_snaps_update(rbd_dev);
3971 if (ret)
3972 return ret;
3973
9e15b77d
AE
3974 ret = rbd_dev_probe_update_spec(rbd_dev);
3975 if (ret)
3976 goto err_out_snaps;
3977
83a06263
AE
3978 ret = rbd_dev_set_mapping(rbd_dev);
3979 if (ret)
3980 goto err_out_snaps;
3981
3982 /* generate unique id: find highest unique id, add one */
3983 rbd_dev_id_get(rbd_dev);
3984
3985 /* Fill in the device name, now that we have its id. */
3986 BUILD_BUG_ON(DEV_NAME_LEN
3987 < sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH);
3988 sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->dev_id);
3989
3990 /* Get our block major device number. */
3991
3992 ret = register_blkdev(0, rbd_dev->name);
3993 if (ret < 0)
3994 goto err_out_id;
3995 rbd_dev->major = ret;
3996
3997 /* Set up the blkdev mapping. */
3998
3999 ret = rbd_init_disk(rbd_dev);
4000 if (ret)
4001 goto err_out_blkdev;
4002
4003 ret = rbd_bus_add_dev(rbd_dev);
4004 if (ret)
4005 goto err_out_disk;
4006
4007 /*
4008 * At this point cleanup in the event of an error is the job
4009 * of the sysfs code (initiated by rbd_bus_del_dev()).
4010 */
4011 down_write(&rbd_dev->header_rwsem);
4012 ret = rbd_dev_snaps_register(rbd_dev);
4013 up_write(&rbd_dev->header_rwsem);
4014 if (ret)
4015 goto err_out_bus;
4016
9969ebc5 4017 ret = rbd_dev_header_watch_sync(rbd_dev, 1);
83a06263
AE
4018 if (ret)
4019 goto err_out_bus;
4020
4021 /* Everything's ready. Announce the disk to the world. */
4022
4023 add_disk(rbd_dev->disk);
4024
4025 pr_info("%s: added with size 0x%llx\n", rbd_dev->disk->disk_name,
4026 (unsigned long long) rbd_dev->mapping.size);
4027
4028 return ret;
4029err_out_bus:
4030 /* this will also clean up rest of rbd_dev stuff */
4031
4032 rbd_bus_del_dev(rbd_dev);
4033
4034 return ret;
4035err_out_disk:
4036 rbd_free_disk(rbd_dev);
4037err_out_blkdev:
4038 unregister_blkdev(rbd_dev->major, rbd_dev->name);
4039err_out_id:
4040 rbd_dev_id_put(rbd_dev);
4041err_out_snaps:
4042 rbd_remove_all_snaps(rbd_dev);
4043
4044 return ret;
4045}
4046
a30b71b9
AE
4047/*
4048 * Probe for the existence of the header object for the given rbd
4049 * device. For format 2 images this includes determining the image
4050 * id.
4051 */
4052static int rbd_dev_probe(struct rbd_device *rbd_dev)
4053{
4054 int ret;
4055
4056 /*
4057 * Get the id from the image id object. If it's not a
4058 * format 2 image, we'll get ENOENT back, and we'll assume
4059 * it's a format 1 image.
4060 */
4061 ret = rbd_dev_image_id(rbd_dev);
4062 if (ret)
4063 ret = rbd_dev_v1_probe(rbd_dev);
4064 else
4065 ret = rbd_dev_v2_probe(rbd_dev);
83a06263 4066 if (ret) {
a30b71b9
AE
4067 dout("probe failed, returning %d\n", ret);
4068
83a06263
AE
4069 return ret;
4070 }
4071
4072 ret = rbd_dev_probe_finish(rbd_dev);
4073 if (ret)
4074 rbd_header_free(&rbd_dev->header);
4075
a30b71b9
AE
4076 return ret;
4077}
4078
59c2be1e
YS
4079static ssize_t rbd_add(struct bus_type *bus,
4080 const char *buf,
4081 size_t count)
602adf40 4082{
cb8627c7 4083 struct rbd_device *rbd_dev = NULL;
dc79b113 4084 struct ceph_options *ceph_opts = NULL;
4e9afeba 4085 struct rbd_options *rbd_opts = NULL;
859c31df 4086 struct rbd_spec *spec = NULL;
9d3997fd 4087 struct rbd_client *rbdc;
27cc2594
AE
4088 struct ceph_osd_client *osdc;
4089 int rc = -ENOMEM;
602adf40
YS
4090
4091 if (!try_module_get(THIS_MODULE))
4092 return -ENODEV;
4093
602adf40 4094 /* parse add command */
859c31df 4095 rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec);
dc79b113 4096 if (rc < 0)
bd4ba655 4097 goto err_out_module;
78cea76e 4098
9d3997fd
AE
4099 rbdc = rbd_get_client(ceph_opts);
4100 if (IS_ERR(rbdc)) {
4101 rc = PTR_ERR(rbdc);
0ddebc0c 4102 goto err_out_args;
9d3997fd 4103 }
c53d5893 4104 ceph_opts = NULL; /* rbd_dev client now owns this */
602adf40 4105
602adf40 4106 /* pick the pool */
9d3997fd 4107 osdc = &rbdc->client->osdc;
859c31df 4108 rc = ceph_pg_poolid_by_name(osdc->osdmap, spec->pool_name);
602adf40
YS
4109 if (rc < 0)
4110 goto err_out_client;
859c31df
AE
4111 spec->pool_id = (u64) rc;
4112
0903e875
AE
4113 /* The ceph file layout needs to fit pool id in 32 bits */
4114
4115 if (WARN_ON(spec->pool_id > (u64) U32_MAX)) {
4116 rc = -EIO;
4117 goto err_out_client;
4118 }
4119
c53d5893 4120 rbd_dev = rbd_dev_create(rbdc, spec);
bd4ba655
AE
4121 if (!rbd_dev)
4122 goto err_out_client;
c53d5893
AE
4123 rbdc = NULL; /* rbd_dev now owns this */
4124 spec = NULL; /* rbd_dev now owns this */
602adf40 4125
bd4ba655 4126 rbd_dev->mapping.read_only = rbd_opts->read_only;
c53d5893
AE
4127 kfree(rbd_opts);
4128 rbd_opts = NULL; /* done with this */
bd4ba655 4129
a30b71b9
AE
4130 rc = rbd_dev_probe(rbd_dev);
4131 if (rc < 0)
c53d5893 4132 goto err_out_rbd_dev;
05fd6f6f 4133
602adf40 4134 return count;
c53d5893
AE
4135err_out_rbd_dev:
4136 rbd_dev_destroy(rbd_dev);
bd4ba655 4137err_out_client:
9d3997fd 4138 rbd_put_client(rbdc);
0ddebc0c 4139err_out_args:
78cea76e
AE
4140 if (ceph_opts)
4141 ceph_destroy_options(ceph_opts);
4e9afeba 4142 kfree(rbd_opts);
859c31df 4143 rbd_spec_put(spec);
bd4ba655
AE
4144err_out_module:
4145 module_put(THIS_MODULE);
27cc2594 4146
602adf40 4147 dout("Error adding device %s\n", buf);
27cc2594
AE
4148
4149 return (ssize_t) rc;
602adf40
YS
4150}
4151
de71a297 4152static struct rbd_device *__rbd_get_dev(unsigned long dev_id)
602adf40
YS
4153{
4154 struct list_head *tmp;
4155 struct rbd_device *rbd_dev;
4156
e124a82f 4157 spin_lock(&rbd_dev_list_lock);
602adf40
YS
4158 list_for_each(tmp, &rbd_dev_list) {
4159 rbd_dev = list_entry(tmp, struct rbd_device, node);
de71a297 4160 if (rbd_dev->dev_id == dev_id) {
e124a82f 4161 spin_unlock(&rbd_dev_list_lock);
602adf40 4162 return rbd_dev;
e124a82f 4163 }
602adf40 4164 }
e124a82f 4165 spin_unlock(&rbd_dev_list_lock);
602adf40
YS
4166 return NULL;
4167}
4168
dfc5606d 4169static void rbd_dev_release(struct device *dev)
602adf40 4170{
593a9e7b 4171 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
602adf40 4172
59c2be1e 4173 if (rbd_dev->watch_event)
9969ebc5 4174 rbd_dev_header_watch_sync(rbd_dev, 0);
602adf40
YS
4175
4176 /* clean up and free blkdev */
4177 rbd_free_disk(rbd_dev);
4178 unregister_blkdev(rbd_dev->major, rbd_dev->name);
32eec68d 4179
2ac4e75d
AE
4180 /* release allocated disk header fields */
4181 rbd_header_free(&rbd_dev->header);
4182
32eec68d 4183 /* done with the id, and with the rbd_dev */
e2839308 4184 rbd_dev_id_put(rbd_dev);
c53d5893
AE
4185 rbd_assert(rbd_dev->rbd_client != NULL);
4186 rbd_dev_destroy(rbd_dev);
602adf40
YS
4187
4188 /* release module ref */
4189 module_put(THIS_MODULE);
602adf40
YS
4190}
4191
dfc5606d
YS
4192static ssize_t rbd_remove(struct bus_type *bus,
4193 const char *buf,
4194 size_t count)
602adf40
YS
4195{
4196 struct rbd_device *rbd_dev = NULL;
4197 int target_id, rc;
4198 unsigned long ul;
4199 int ret = count;
4200
4201 rc = strict_strtoul(buf, 10, &ul);
4202 if (rc)
4203 return rc;
4204
4205 /* convert to int; abort if we lost anything in the conversion */
4206 target_id = (int) ul;
4207 if (target_id != ul)
4208 return -EINVAL;
4209
4210 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
4211
4212 rbd_dev = __rbd_get_dev(target_id);
4213 if (!rbd_dev) {
4214 ret = -ENOENT;
4215 goto done;
42382b70
AE
4216 }
4217
a14ea269 4218 spin_lock_irq(&rbd_dev->lock);
b82d167b 4219 if (rbd_dev->open_count)
42382b70 4220 ret = -EBUSY;
b82d167b
AE
4221 else
4222 set_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags);
a14ea269 4223 spin_unlock_irq(&rbd_dev->lock);
b82d167b 4224 if (ret < 0)
42382b70 4225 goto done;
602adf40 4226
41f38c2b 4227 rbd_remove_all_snaps(rbd_dev);
dfc5606d 4228 rbd_bus_del_dev(rbd_dev);
602adf40
YS
4229
4230done:
4231 mutex_unlock(&ctl_mutex);
aafb230e 4232
602adf40
YS
4233 return ret;
4234}
4235
602adf40
YS
4236/*
4237 * create control files in sysfs
dfc5606d 4238 * /sys/bus/rbd/...
602adf40
YS
4239 */
4240static int rbd_sysfs_init(void)
4241{
dfc5606d 4242 int ret;
602adf40 4243
fed4c143 4244 ret = device_register(&rbd_root_dev);
21079786 4245 if (ret < 0)
dfc5606d 4246 return ret;
602adf40 4247
fed4c143
AE
4248 ret = bus_register(&rbd_bus_type);
4249 if (ret < 0)
4250 device_unregister(&rbd_root_dev);
602adf40 4251
602adf40
YS
4252 return ret;
4253}
4254
4255static void rbd_sysfs_cleanup(void)
4256{
dfc5606d 4257 bus_unregister(&rbd_bus_type);
fed4c143 4258 device_unregister(&rbd_root_dev);
602adf40
YS
4259}
4260
cc344fa1 4261static int __init rbd_init(void)
602adf40
YS
4262{
4263 int rc;
4264
1e32d34c
AE
4265 if (!libceph_compatible(NULL)) {
4266 rbd_warn(NULL, "libceph incompatibility (quitting)");
4267
4268 return -EINVAL;
4269 }
602adf40
YS
4270 rc = rbd_sysfs_init();
4271 if (rc)
4272 return rc;
f0f8cef5 4273 pr_info("loaded " RBD_DRV_NAME_LONG "\n");
602adf40
YS
4274 return 0;
4275}
4276
cc344fa1 4277static void __exit rbd_exit(void)
602adf40
YS
4278{
4279 rbd_sysfs_cleanup();
4280}
4281
4282module_init(rbd_init);
4283module_exit(rbd_exit);
4284
4285MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
4286MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
4287MODULE_DESCRIPTION("rados block device");
4288
4289/* following authorship retained from original osdblk.c */
4290MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>");
4291
4292MODULE_LICENSE("GPL");
This page took 0.379076 seconds and 5 git commands to generate.