| 1 | /* |
| 2 | * bio-integrity.c - bio data integrity extensions |
| 3 | * |
| 4 | * Copyright (C) 2007, 2008, 2009 Oracle Corporation |
| 5 | * Written by: Martin K. Petersen <martin.petersen@oracle.com> |
| 6 | * |
| 7 | * This program is free software; you can redistribute it and/or |
| 8 | * modify it under the terms of the GNU General Public License version |
| 9 | * 2 as published by the Free Software Foundation. |
| 10 | * |
| 11 | * This program is distributed in the hope that it will be useful, but |
| 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 | * General Public License for more details. |
| 15 | * |
| 16 | * You should have received a copy of the GNU General Public License |
| 17 | * along with this program; see the file COPYING. If not, write to |
| 18 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, |
| 19 | * USA. |
| 20 | * |
| 21 | */ |
| 22 | |
| 23 | #include <linux/blkdev.h> |
| 24 | #include <linux/mempool.h> |
| 25 | #include <linux/export.h> |
| 26 | #include <linux/bio.h> |
| 27 | #include <linux/workqueue.h> |
| 28 | #include <linux/slab.h> |
| 29 | #include "blk.h" |
| 30 | |
| 31 | #define BIP_INLINE_VECS 4 |
| 32 | |
| 33 | static struct kmem_cache *bip_slab; |
| 34 | static struct workqueue_struct *kintegrityd_wq; |
| 35 | |
| 36 | void blk_flush_integrity(void) |
| 37 | { |
| 38 | flush_workqueue(kintegrityd_wq); |
| 39 | } |
| 40 | |
| 41 | /** |
| 42 | * bio_integrity_alloc - Allocate integrity payload and attach it to bio |
| 43 | * @bio: bio to attach integrity metadata to |
| 44 | * @gfp_mask: Memory allocation mask |
| 45 | * @nr_vecs: Number of integrity metadata scatter-gather elements |
| 46 | * |
| 47 | * Description: This function prepares a bio for attaching integrity |
| 48 | * metadata. nr_vecs specifies the maximum number of pages containing |
| 49 | * integrity metadata that can be attached. |
| 50 | */ |
| 51 | struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, |
| 52 | gfp_t gfp_mask, |
| 53 | unsigned int nr_vecs) |
| 54 | { |
| 55 | struct bio_integrity_payload *bip; |
| 56 | struct bio_set *bs = bio->bi_pool; |
| 57 | unsigned inline_vecs; |
| 58 | |
| 59 | if (!bs || !bs->bio_integrity_pool) { |
| 60 | bip = kmalloc(sizeof(struct bio_integrity_payload) + |
| 61 | sizeof(struct bio_vec) * nr_vecs, gfp_mask); |
| 62 | inline_vecs = nr_vecs; |
| 63 | } else { |
| 64 | bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask); |
| 65 | inline_vecs = BIP_INLINE_VECS; |
| 66 | } |
| 67 | |
| 68 | if (unlikely(!bip)) |
| 69 | return ERR_PTR(-ENOMEM); |
| 70 | |
| 71 | memset(bip, 0, sizeof(*bip)); |
| 72 | |
| 73 | if (nr_vecs > inline_vecs) { |
| 74 | unsigned long idx = 0; |
| 75 | |
| 76 | bip->bip_vec = bvec_alloc(gfp_mask, nr_vecs, &idx, |
| 77 | bs->bvec_integrity_pool); |
| 78 | if (!bip->bip_vec) |
| 79 | goto err; |
| 80 | bip->bip_max_vcnt = bvec_nr_vecs(idx); |
| 81 | bip->bip_slab = idx; |
| 82 | } else { |
| 83 | bip->bip_vec = bip->bip_inline_vecs; |
| 84 | bip->bip_max_vcnt = inline_vecs; |
| 85 | } |
| 86 | |
| 87 | bip->bip_bio = bio; |
| 88 | bio->bi_integrity = bip; |
| 89 | bio->bi_opf |= REQ_INTEGRITY; |
| 90 | |
| 91 | return bip; |
| 92 | err: |
| 93 | mempool_free(bip, bs->bio_integrity_pool); |
| 94 | return ERR_PTR(-ENOMEM); |
| 95 | } |
| 96 | EXPORT_SYMBOL(bio_integrity_alloc); |
| 97 | |
| 98 | /** |
| 99 | * bio_integrity_free - Free bio integrity payload |
| 100 | * @bio: bio containing bip to be freed |
| 101 | * |
| 102 | * Description: Used to free the integrity portion of a bio. Usually |
| 103 | * called from bio_free(). |
| 104 | */ |
| 105 | void bio_integrity_free(struct bio *bio) |
| 106 | { |
| 107 | struct bio_integrity_payload *bip = bio_integrity(bio); |
| 108 | struct bio_set *bs = bio->bi_pool; |
| 109 | |
| 110 | if (bip->bip_flags & BIP_BLOCK_INTEGRITY) |
| 111 | kfree(page_address(bip->bip_vec->bv_page) + |
| 112 | bip->bip_vec->bv_offset); |
| 113 | |
| 114 | if (bs && bs->bio_integrity_pool) { |
| 115 | bvec_free(bs->bvec_integrity_pool, bip->bip_vec, bip->bip_slab); |
| 116 | |
| 117 | mempool_free(bip, bs->bio_integrity_pool); |
| 118 | } else { |
| 119 | kfree(bip); |
| 120 | } |
| 121 | |
| 122 | bio->bi_integrity = NULL; |
| 123 | } |
| 124 | EXPORT_SYMBOL(bio_integrity_free); |
| 125 | |
| 126 | /** |
| 127 | * bio_integrity_add_page - Attach integrity metadata |
| 128 | * @bio: bio to update |
| 129 | * @page: page containing integrity metadata |
| 130 | * @len: number of bytes of integrity metadata in page |
| 131 | * @offset: start offset within page |
| 132 | * |
| 133 | * Description: Attach a page containing integrity metadata to bio. |
| 134 | */ |
| 135 | int bio_integrity_add_page(struct bio *bio, struct page *page, |
| 136 | unsigned int len, unsigned int offset) |
| 137 | { |
| 138 | struct bio_integrity_payload *bip = bio_integrity(bio); |
| 139 | struct bio_vec *iv; |
| 140 | |
| 141 | if (bip->bip_vcnt >= bip->bip_max_vcnt) { |
| 142 | printk(KERN_ERR "%s: bip_vec full\n", __func__); |
| 143 | return 0; |
| 144 | } |
| 145 | |
| 146 | iv = bip->bip_vec + bip->bip_vcnt; |
| 147 | |
| 148 | if (bip->bip_vcnt && |
| 149 | bvec_gap_to_prev(bdev_get_queue(bio->bi_bdev), |
| 150 | &bip->bip_vec[bip->bip_vcnt - 1], offset)) |
| 151 | return 0; |
| 152 | |
| 153 | iv->bv_page = page; |
| 154 | iv->bv_len = len; |
| 155 | iv->bv_offset = offset; |
| 156 | bip->bip_vcnt++; |
| 157 | |
| 158 | return len; |
| 159 | } |
| 160 | EXPORT_SYMBOL(bio_integrity_add_page); |
| 161 | |
| 162 | /** |
| 163 | * bio_integrity_enabled - Check whether integrity can be passed |
| 164 | * @bio: bio to check |
| 165 | * |
| 166 | * Description: Determines whether bio_integrity_prep() can be called |
| 167 | * on this bio or not. bio data direction and target device must be |
| 168 | * set prior to calling. The functions honors the write_generate and |
| 169 | * read_verify flags in sysfs. |
| 170 | */ |
| 171 | bool bio_integrity_enabled(struct bio *bio) |
| 172 | { |
| 173 | struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); |
| 174 | |
| 175 | if (!bio_is_rw(bio)) |
| 176 | return false; |
| 177 | |
| 178 | /* Already protected? */ |
| 179 | if (bio_integrity(bio)) |
| 180 | return false; |
| 181 | |
| 182 | if (bi == NULL) |
| 183 | return false; |
| 184 | |
| 185 | if (bio_data_dir(bio) == READ && bi->profile->verify_fn != NULL && |
| 186 | (bi->flags & BLK_INTEGRITY_VERIFY)) |
| 187 | return true; |
| 188 | |
| 189 | if (bio_data_dir(bio) == WRITE && bi->profile->generate_fn != NULL && |
| 190 | (bi->flags & BLK_INTEGRITY_GENERATE)) |
| 191 | return true; |
| 192 | |
| 193 | return false; |
| 194 | } |
| 195 | EXPORT_SYMBOL(bio_integrity_enabled); |
| 196 | |
| 197 | /** |
| 198 | * bio_integrity_intervals - Return number of integrity intervals for a bio |
| 199 | * @bi: blk_integrity profile for device |
| 200 | * @sectors: Size of the bio in 512-byte sectors |
| 201 | * |
| 202 | * Description: The block layer calculates everything in 512 byte |
| 203 | * sectors but integrity metadata is done in terms of the data integrity |
| 204 | * interval size of the storage device. Convert the block layer sectors |
| 205 | * to the appropriate number of integrity intervals. |
| 206 | */ |
| 207 | static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi, |
| 208 | unsigned int sectors) |
| 209 | { |
| 210 | return sectors >> (bi->interval_exp - 9); |
| 211 | } |
| 212 | |
| 213 | static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, |
| 214 | unsigned int sectors) |
| 215 | { |
| 216 | return bio_integrity_intervals(bi, sectors) * bi->tuple_size; |
| 217 | } |
| 218 | |
| 219 | /** |
| 220 | * bio_integrity_process - Process integrity metadata for a bio |
| 221 | * @bio: bio to generate/verify integrity metadata for |
| 222 | * @proc_fn: Pointer to the relevant processing function |
| 223 | */ |
| 224 | static int bio_integrity_process(struct bio *bio, |
| 225 | integrity_processing_fn *proc_fn) |
| 226 | { |
| 227 | struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); |
| 228 | struct blk_integrity_iter iter; |
| 229 | struct bvec_iter bviter; |
| 230 | struct bio_vec bv; |
| 231 | struct bio_integrity_payload *bip = bio_integrity(bio); |
| 232 | unsigned int ret = 0; |
| 233 | void *prot_buf = page_address(bip->bip_vec->bv_page) + |
| 234 | bip->bip_vec->bv_offset; |
| 235 | |
| 236 | iter.disk_name = bio->bi_bdev->bd_disk->disk_name; |
| 237 | iter.interval = 1 << bi->interval_exp; |
| 238 | iter.seed = bip_get_seed(bip); |
| 239 | iter.prot_buf = prot_buf; |
| 240 | |
| 241 | bio_for_each_segment(bv, bio, bviter) { |
| 242 | void *kaddr = kmap_atomic(bv.bv_page); |
| 243 | |
| 244 | iter.data_buf = kaddr + bv.bv_offset; |
| 245 | iter.data_size = bv.bv_len; |
| 246 | |
| 247 | ret = proc_fn(&iter); |
| 248 | if (ret) { |
| 249 | kunmap_atomic(kaddr); |
| 250 | return ret; |
| 251 | } |
| 252 | |
| 253 | kunmap_atomic(kaddr); |
| 254 | } |
| 255 | return ret; |
| 256 | } |
| 257 | |
| 258 | /** |
| 259 | * bio_integrity_prep - Prepare bio for integrity I/O |
| 260 | * @bio: bio to prepare |
| 261 | * |
| 262 | * Description: Allocates a buffer for integrity metadata, maps the |
| 263 | * pages and attaches them to a bio. The bio must have data |
| 264 | * direction, target device and start sector set priot to calling. In |
| 265 | * the WRITE case, integrity metadata will be generated using the |
| 266 | * block device's integrity function. In the READ case, the buffer |
| 267 | * will be prepared for DMA and a suitable end_io handler set up. |
| 268 | */ |
| 269 | int bio_integrity_prep(struct bio *bio) |
| 270 | { |
| 271 | struct bio_integrity_payload *bip; |
| 272 | struct blk_integrity *bi; |
| 273 | struct request_queue *q; |
| 274 | void *buf; |
| 275 | unsigned long start, end; |
| 276 | unsigned int len, nr_pages; |
| 277 | unsigned int bytes, offset, i; |
| 278 | unsigned int intervals; |
| 279 | |
| 280 | bi = bdev_get_integrity(bio->bi_bdev); |
| 281 | q = bdev_get_queue(bio->bi_bdev); |
| 282 | BUG_ON(bi == NULL); |
| 283 | BUG_ON(bio_integrity(bio)); |
| 284 | |
| 285 | intervals = bio_integrity_intervals(bi, bio_sectors(bio)); |
| 286 | |
| 287 | /* Allocate kernel buffer for protection data */ |
| 288 | len = intervals * bi->tuple_size; |
| 289 | buf = kmalloc(len, GFP_NOIO | q->bounce_gfp); |
| 290 | if (unlikely(buf == NULL)) { |
| 291 | printk(KERN_ERR "could not allocate integrity buffer\n"); |
| 292 | return -ENOMEM; |
| 293 | } |
| 294 | |
| 295 | end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
| 296 | start = ((unsigned long) buf) >> PAGE_SHIFT; |
| 297 | nr_pages = end - start; |
| 298 | |
| 299 | /* Allocate bio integrity payload and integrity vectors */ |
| 300 | bip = bio_integrity_alloc(bio, GFP_NOIO, nr_pages); |
| 301 | if (IS_ERR(bip)) { |
| 302 | printk(KERN_ERR "could not allocate data integrity bioset\n"); |
| 303 | kfree(buf); |
| 304 | return PTR_ERR(bip); |
| 305 | } |
| 306 | |
| 307 | bip->bip_flags |= BIP_BLOCK_INTEGRITY; |
| 308 | bip->bip_iter.bi_size = len; |
| 309 | bip_set_seed(bip, bio->bi_iter.bi_sector); |
| 310 | |
| 311 | if (bi->flags & BLK_INTEGRITY_IP_CHECKSUM) |
| 312 | bip->bip_flags |= BIP_IP_CHECKSUM; |
| 313 | |
| 314 | /* Map it */ |
| 315 | offset = offset_in_page(buf); |
| 316 | for (i = 0 ; i < nr_pages ; i++) { |
| 317 | int ret; |
| 318 | bytes = PAGE_SIZE - offset; |
| 319 | |
| 320 | if (len <= 0) |
| 321 | break; |
| 322 | |
| 323 | if (bytes > len) |
| 324 | bytes = len; |
| 325 | |
| 326 | ret = bio_integrity_add_page(bio, virt_to_page(buf), |
| 327 | bytes, offset); |
| 328 | |
| 329 | if (ret == 0) |
| 330 | return 0; |
| 331 | |
| 332 | if (ret < bytes) |
| 333 | break; |
| 334 | |
| 335 | buf += bytes; |
| 336 | len -= bytes; |
| 337 | offset = 0; |
| 338 | } |
| 339 | |
| 340 | /* Install custom I/O completion handler if read verify is enabled */ |
| 341 | if (bio_data_dir(bio) == READ) { |
| 342 | bip->bip_end_io = bio->bi_end_io; |
| 343 | bio->bi_end_io = bio_integrity_endio; |
| 344 | } |
| 345 | |
| 346 | /* Auto-generate integrity metadata if this is a write */ |
| 347 | if (bio_data_dir(bio) == WRITE) |
| 348 | bio_integrity_process(bio, bi->profile->generate_fn); |
| 349 | |
| 350 | return 0; |
| 351 | } |
| 352 | EXPORT_SYMBOL(bio_integrity_prep); |
| 353 | |
| 354 | /** |
| 355 | * bio_integrity_verify_fn - Integrity I/O completion worker |
| 356 | * @work: Work struct stored in bio to be verified |
| 357 | * |
| 358 | * Description: This workqueue function is called to complete a READ |
| 359 | * request. The function verifies the transferred integrity metadata |
| 360 | * and then calls the original bio end_io function. |
| 361 | */ |
| 362 | static void bio_integrity_verify_fn(struct work_struct *work) |
| 363 | { |
| 364 | struct bio_integrity_payload *bip = |
| 365 | container_of(work, struct bio_integrity_payload, bip_work); |
| 366 | struct bio *bio = bip->bip_bio; |
| 367 | struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); |
| 368 | |
| 369 | bio->bi_error = bio_integrity_process(bio, bi->profile->verify_fn); |
| 370 | |
| 371 | /* Restore original bio completion handler */ |
| 372 | bio->bi_end_io = bip->bip_end_io; |
| 373 | bio_endio(bio); |
| 374 | } |
| 375 | |
| 376 | /** |
| 377 | * bio_integrity_endio - Integrity I/O completion function |
| 378 | * @bio: Protected bio |
| 379 | * @error: Pointer to errno |
| 380 | * |
| 381 | * Description: Completion for integrity I/O |
| 382 | * |
| 383 | * Normally I/O completion is done in interrupt context. However, |
| 384 | * verifying I/O integrity is a time-consuming task which must be run |
| 385 | * in process context. This function postpones completion |
| 386 | * accordingly. |
| 387 | */ |
| 388 | void bio_integrity_endio(struct bio *bio) |
| 389 | { |
| 390 | struct bio_integrity_payload *bip = bio_integrity(bio); |
| 391 | |
| 392 | BUG_ON(bip->bip_bio != bio); |
| 393 | |
| 394 | /* In case of an I/O error there is no point in verifying the |
| 395 | * integrity metadata. Restore original bio end_io handler |
| 396 | * and run it. |
| 397 | */ |
| 398 | if (bio->bi_error) { |
| 399 | bio->bi_end_io = bip->bip_end_io; |
| 400 | bio_endio(bio); |
| 401 | |
| 402 | return; |
| 403 | } |
| 404 | |
| 405 | INIT_WORK(&bip->bip_work, bio_integrity_verify_fn); |
| 406 | queue_work(kintegrityd_wq, &bip->bip_work); |
| 407 | } |
| 408 | EXPORT_SYMBOL(bio_integrity_endio); |
| 409 | |
| 410 | /** |
| 411 | * bio_integrity_advance - Advance integrity vector |
| 412 | * @bio: bio whose integrity vector to update |
| 413 | * @bytes_done: number of data bytes that have been completed |
| 414 | * |
| 415 | * Description: This function calculates how many integrity bytes the |
| 416 | * number of completed data bytes correspond to and advances the |
| 417 | * integrity vector accordingly. |
| 418 | */ |
| 419 | void bio_integrity_advance(struct bio *bio, unsigned int bytes_done) |
| 420 | { |
| 421 | struct bio_integrity_payload *bip = bio_integrity(bio); |
| 422 | struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); |
| 423 | unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9); |
| 424 | |
| 425 | bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes); |
| 426 | } |
| 427 | EXPORT_SYMBOL(bio_integrity_advance); |
| 428 | |
| 429 | /** |
| 430 | * bio_integrity_trim - Trim integrity vector |
| 431 | * @bio: bio whose integrity vector to update |
| 432 | * @offset: offset to first data sector |
| 433 | * @sectors: number of data sectors |
| 434 | * |
| 435 | * Description: Used to trim the integrity vector in a cloned bio. |
| 436 | * The ivec will be advanced corresponding to 'offset' data sectors |
| 437 | * and the length will be truncated corresponding to 'len' data |
| 438 | * sectors. |
| 439 | */ |
| 440 | void bio_integrity_trim(struct bio *bio, unsigned int offset, |
| 441 | unsigned int sectors) |
| 442 | { |
| 443 | struct bio_integrity_payload *bip = bio_integrity(bio); |
| 444 | struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); |
| 445 | |
| 446 | bio_integrity_advance(bio, offset << 9); |
| 447 | bip->bip_iter.bi_size = bio_integrity_bytes(bi, sectors); |
| 448 | } |
| 449 | EXPORT_SYMBOL(bio_integrity_trim); |
| 450 | |
| 451 | /** |
| 452 | * bio_integrity_clone - Callback for cloning bios with integrity metadata |
| 453 | * @bio: New bio |
| 454 | * @bio_src: Original bio |
| 455 | * @gfp_mask: Memory allocation mask |
| 456 | * |
| 457 | * Description: Called to allocate a bip when cloning a bio |
| 458 | */ |
| 459 | int bio_integrity_clone(struct bio *bio, struct bio *bio_src, |
| 460 | gfp_t gfp_mask) |
| 461 | { |
| 462 | struct bio_integrity_payload *bip_src = bio_integrity(bio_src); |
| 463 | struct bio_integrity_payload *bip; |
| 464 | |
| 465 | BUG_ON(bip_src == NULL); |
| 466 | |
| 467 | bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt); |
| 468 | if (IS_ERR(bip)) |
| 469 | return PTR_ERR(bip); |
| 470 | |
| 471 | memcpy(bip->bip_vec, bip_src->bip_vec, |
| 472 | bip_src->bip_vcnt * sizeof(struct bio_vec)); |
| 473 | |
| 474 | bip->bip_vcnt = bip_src->bip_vcnt; |
| 475 | bip->bip_iter = bip_src->bip_iter; |
| 476 | |
| 477 | return 0; |
| 478 | } |
| 479 | EXPORT_SYMBOL(bio_integrity_clone); |
| 480 | |
| 481 | int bioset_integrity_create(struct bio_set *bs, int pool_size) |
| 482 | { |
| 483 | if (bs->bio_integrity_pool) |
| 484 | return 0; |
| 485 | |
| 486 | bs->bio_integrity_pool = mempool_create_slab_pool(pool_size, bip_slab); |
| 487 | if (!bs->bio_integrity_pool) |
| 488 | return -1; |
| 489 | |
| 490 | bs->bvec_integrity_pool = biovec_create_pool(pool_size); |
| 491 | if (!bs->bvec_integrity_pool) { |
| 492 | mempool_destroy(bs->bio_integrity_pool); |
| 493 | return -1; |
| 494 | } |
| 495 | |
| 496 | return 0; |
| 497 | } |
| 498 | EXPORT_SYMBOL(bioset_integrity_create); |
| 499 | |
| 500 | void bioset_integrity_free(struct bio_set *bs) |
| 501 | { |
| 502 | if (bs->bio_integrity_pool) |
| 503 | mempool_destroy(bs->bio_integrity_pool); |
| 504 | |
| 505 | if (bs->bvec_integrity_pool) |
| 506 | mempool_destroy(bs->bvec_integrity_pool); |
| 507 | } |
| 508 | EXPORT_SYMBOL(bioset_integrity_free); |
| 509 | |
| 510 | void __init bio_integrity_init(void) |
| 511 | { |
| 512 | /* |
| 513 | * kintegrityd won't block much but may burn a lot of CPU cycles. |
| 514 | * Make it highpri CPU intensive wq with max concurrency of 1. |
| 515 | */ |
| 516 | kintegrityd_wq = alloc_workqueue("kintegrityd", WQ_MEM_RECLAIM | |
| 517 | WQ_HIGHPRI | WQ_CPU_INTENSIVE, 1); |
| 518 | if (!kintegrityd_wq) |
| 519 | panic("Failed to create kintegrityd\n"); |
| 520 | |
| 521 | bip_slab = kmem_cache_create("bio_integrity_payload", |
| 522 | sizeof(struct bio_integrity_payload) + |
| 523 | sizeof(struct bio_vec) * BIP_INLINE_VECS, |
| 524 | 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); |
| 525 | } |