static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
{
conf_t *conf = data;
- int size = offsetof(struct r10bio_s, devs[conf->copies]);
+ int size = offsetof(struct r10bio, devs[conf->copies]);
/* allocate a r10bio with room for raid_disks entries in the bios array */
return kzalloc(size, gfp_flags);
{
conf_t *conf = data;
struct page *page;
- r10bio_t *r10_bio;
+ struct r10bio *r10_bio;
struct bio *bio;
int i, j;
int nalloc;
{
int i;
conf_t *conf = data;
- r10bio_t *r10bio = __r10_bio;
+ struct r10bio *r10bio = __r10_bio;
int j;
for (j=0; j < conf->copies; j++) {
r10bio_pool_free(r10bio, conf);
}
-static void put_all_bios(conf_t *conf, r10bio_t *r10_bio)
+static void put_all_bios(conf_t *conf, struct r10bio *r10_bio)
{
int i;
for (i = 0; i < conf->copies; i++) {
struct bio **bio = & r10_bio->devs[i].bio;
- if (*bio && *bio != IO_BLOCKED)
+ if (!BIO_SPECIAL(*bio))
bio_put(*bio);
*bio = NULL;
}
}
-static void free_r10bio(r10bio_t *r10_bio)
+static void free_r10bio(struct r10bio *r10_bio)
{
conf_t *conf = r10_bio->mddev->private;
mempool_free(r10_bio, conf->r10bio_pool);
}
-static void put_buf(r10bio_t *r10_bio)
+static void put_buf(struct r10bio *r10_bio)
{
conf_t *conf = r10_bio->mddev->private;
lower_barrier(conf);
}
-static void reschedule_retry(r10bio_t *r10_bio)
+static void reschedule_retry(struct r10bio *r10_bio)
{
unsigned long flags;
- mddev_t *mddev = r10_bio->mddev;
+ struct mddev *mddev = r10_bio->mddev;
conf_t *conf = mddev->private;
spin_lock_irqsave(&conf->device_lock, flags);
* operation and are ready to return a success/failure code to the buffer
* cache layer.
*/
-static void raid_end_bio_io(r10bio_t *r10_bio)
+static void raid_end_bio_io(struct r10bio *r10_bio)
{
struct bio *bio = r10_bio->master_bio;
int done;
/*
* Update disk head position estimator based on IRQ completion info.
*/
-static inline void update_head_pos(int slot, r10bio_t *r10_bio)
+static inline void update_head_pos(int slot, struct r10bio *r10_bio)
{
conf_t *conf = r10_bio->mddev->private;
/*
* Find the disk number which triggered given bio
*/
-static int find_bio_disk(conf_t *conf, r10bio_t *r10_bio, struct bio *bio)
+static int find_bio_disk(conf_t *conf, struct r10bio *r10_bio,
+ struct bio *bio, int *slotp)
{
int slot;
BUG_ON(slot == conf->copies);
update_head_pos(slot, r10_bio);
+ if (slotp)
+ *slotp = slot;
return r10_bio->devs[slot].devnum;
}
static void raid10_end_read_request(struct bio *bio, int error)
{
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- r10bio_t *r10_bio = bio->bi_private;
+ struct r10bio *r10_bio = bio->bi_private;
int slot, dev;
conf_t *conf = r10_bio->mddev->private;
}
}
+static void close_write(struct r10bio *r10_bio)
+{
+ /* clear the bitmap if all writes complete successfully */
+ bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
+ r10_bio->sectors,
+ !test_bit(R10BIO_Degraded, &r10_bio->state),
+ 0);
+ md_write_end(r10_bio->mddev);
+}
+
+static void one_write_done(struct r10bio *r10_bio)
+{
+ if (atomic_dec_and_test(&r10_bio->remaining)) {
+ if (test_bit(R10BIO_WriteError, &r10_bio->state))
+ reschedule_retry(r10_bio);
+ else {
+ close_write(r10_bio);
+ if (test_bit(R10BIO_MadeGood, &r10_bio->state))
+ reschedule_retry(r10_bio);
+ else
+ raid_end_bio_io(r10_bio);
+ }
+ }
+}
+
static void raid10_end_write_request(struct bio *bio, int error)
{
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- r10bio_t *r10_bio = bio->bi_private;
+ struct r10bio *r10_bio = bio->bi_private;
int dev;
+ int dec_rdev = 1;
conf_t *conf = r10_bio->mddev->private;
+ int slot;
- dev = find_bio_disk(conf, r10_bio, bio);
+ dev = find_bio_disk(conf, r10_bio, bio, &slot);
/*
* this branch is our 'one mirror IO has finished' event handler:
*/
if (!uptodate) {
- md_error(r10_bio->mddev, conf->mirrors[dev].rdev);
- /* an I/O failed, we can't clear the bitmap */
- set_bit(R10BIO_Degraded, &r10_bio->state);
- } else
+ set_bit(WriteErrorSeen, &conf->mirrors[dev].rdev->flags);
+ set_bit(R10BIO_WriteError, &r10_bio->state);
+ dec_rdev = 0;
+ } else {
/*
* Set R10BIO_Uptodate in our master bio, so that
* we will return a good error code for to the higher
* user-side. So if something waits for IO, then it will
* wait for the 'master' bio.
*/
+ sector_t first_bad;
+ int bad_sectors;
+
set_bit(R10BIO_Uptodate, &r10_bio->state);
+ /* Maybe we can clear some bad blocks. */
+ if (is_badblock(conf->mirrors[dev].rdev,
+ r10_bio->devs[slot].addr,
+ r10_bio->sectors,
+ &first_bad, &bad_sectors)) {
+ bio_put(bio);
+ r10_bio->devs[slot].bio = IO_MADE_GOOD;
+ dec_rdev = 0;
+ set_bit(R10BIO_MadeGood, &r10_bio->state);
+ }
+ }
+
/*
*
* Let's see if all mirrored write operations have finished
* already.
*/
- if (atomic_dec_and_test(&r10_bio->remaining)) {
- /* clear the bitmap if all writes complete successfully */
- bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
- r10_bio->sectors,
- !test_bit(R10BIO_Degraded, &r10_bio->state),
- 0);
- md_write_end(r10_bio->mddev);
- raid_end_bio_io(r10_bio);
- }
-
- rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
+ one_write_done(r10_bio);
+ if (dec_rdev)
+ rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
}
* sector offset to a virtual address
*/
-static void raid10_find_phys(conf_t *conf, r10bio_t *r10bio)
+static void raid10_find_phys(conf_t *conf, struct r10bio *r10bio)
{
int n,f;
sector_t sector;
struct bvec_merge_data *bvm,
struct bio_vec *biovec)
{
- mddev_t *mddev = q->queuedata;
+ struct mddev *mddev = q->queuedata;
sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
int max;
unsigned int chunk_sectors = mddev->chunk_sectors;
* FIXME: possibly should rethink readbalancing and do it differently
* depending on near_copies / far_copies geometry.
*/
-static int read_balance(conf_t *conf, r10bio_t *r10_bio, int *max_sectors)
+static int read_balance(conf_t *conf, struct r10bio *r10_bio, int *max_sectors)
{
const sector_t this_sector = r10_bio->sector;
int disk, slot;
int sectors = r10_bio->sectors;
int best_good_sectors;
sector_t new_distance, best_dist;
- mdk_rdev_t *rdev;
+ struct md_rdev *rdev;
int do_balance;
int best_slot;
static int raid10_congested(void *data, int bits)
{
- mddev_t *mddev = data;
+ struct mddev *mddev = data;
conf_t *conf = mddev->private;
int i, ret = 0;
return 1;
rcu_read_lock();
for (i = 0; i < conf->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
+ struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev && !test_bit(Faulty, &rdev->flags)) {
struct request_queue *q = bdev_get_queue(rdev->bdev);
spin_unlock_irq(&conf->resync_lock);
}
-static int make_request(mddev_t *mddev, struct bio * bio)
+static int make_request(struct mddev *mddev, struct bio * bio)
{
conf_t *conf = mddev->private;
- mirror_info_t *mirror;
- r10bio_t *r10_bio;
+ struct mirror_info *mirror;
+ struct r10bio *r10_bio;
struct bio *read_bio;
int i;
int chunk_sects = conf->chunk_mask + 1;
const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
unsigned long flags;
- mdk_rdev_t *blocked_rdev;
+ struct md_rdev *blocked_rdev;
int plugged;
int sectors_handled;
int max_sectors;
for (i = 0; i < conf->copies; i++) {
int d = r10_bio->devs[i].devnum;
- mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[d].rdev);
+ struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev);
if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
atomic_inc(&rdev->nr_pending);
blocked_rdev = rdev;
spin_unlock_irqrestore(&conf->device_lock, flags);
}
- if (atomic_dec_and_test(&r10_bio->remaining)) {
- /* This matches the end of raid10_end_write_request() */
- bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
- r10_bio->sectors,
- !test_bit(R10BIO_Degraded, &r10_bio->state),
- 0);
- md_write_end(mddev);
- raid_end_bio_io(r10_bio);
- }
-
- /* In case raid10d snuck in to freeze_array */
- wake_up(&conf->wait_barrier);
+ /* Don't remove the bias on 'remaining' (one_write_done) until
+ * after checking if we need to go around again.
+ */
if (sectors_handled < (bio->bi_size >> 9)) {
- /* We need another r1_bio. It has already been counted
+ one_write_done(r10_bio);
+ /* We need another r10_bio. It has already been counted
* in bio->bi_phys_segments.
*/
r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
r10_bio->state = 0;
goto retry_write;
}
+ one_write_done(r10_bio);
+
+ /* In case raid10d snuck in to freeze_array */
+ wake_up(&conf->wait_barrier);
if (do_sync || !mddev->bitmap || !plugged)
md_wakeup_thread(mddev->thread);
return 0;
}
-static void status(struct seq_file *seq, mddev_t *mddev)
+static void status(struct seq_file *seq, struct mddev *mddev)
{
conf_t *conf = mddev->private;
int i;
return 1;
}
-static void error(mddev_t *mddev, mdk_rdev_t *rdev)
+static void error(struct mddev *mddev, struct md_rdev *rdev)
{
char b[BDEVNAME_SIZE];
conf_t *conf = mddev->private;
static void print_conf(conf_t *conf)
{
int i;
- mirror_info_t *tmp;
+ struct mirror_info *tmp;
printk(KERN_DEBUG "RAID10 conf printout:\n");
if (!conf) {
conf->r10buf_pool = NULL;
}
-static int raid10_spare_active(mddev_t *mddev)
+static int raid10_spare_active(struct mddev *mddev)
{
int i;
conf_t *conf = mddev->private;
- mirror_info_t *tmp;
+ struct mirror_info *tmp;
int count = 0;
unsigned long flags;
}
-static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
+static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
{
conf_t *conf = mddev->private;
int err = -EEXIST;
else
mirror = first;
for ( ; mirror <= last ; mirror++) {
- mirror_info_t *p = &conf->mirrors[mirror];
+ struct mirror_info *p = &conf->mirrors[mirror];
if (p->recovery_disabled == mddev->recovery_disabled)
continue;
if (!p->rdev)
return err;
}
-static int raid10_remove_disk(mddev_t *mddev, int number)
+static int raid10_remove_disk(struct mddev *mddev, int number)
{
conf_t *conf = mddev->private;
int err = 0;
- mdk_rdev_t *rdev;
- mirror_info_t *p = conf->mirrors+ number;
+ struct md_rdev *rdev;
+ struct mirror_info *p = conf->mirrors+ number;
print_conf(conf);
rdev = p->rdev;
static void end_sync_read(struct bio *bio, int error)
{
- r10bio_t *r10_bio = bio->bi_private;
+ struct r10bio *r10_bio = bio->bi_private;
conf_t *conf = r10_bio->mddev->private;
int d;
- d = find_bio_disk(conf, r10_bio, bio);
+ d = find_bio_disk(conf, r10_bio, bio, NULL);
if (test_bit(BIO_UPTODATE, &bio->bi_flags))
set_bit(R10BIO_Uptodate, &r10_bio->state);
- else {
+ else
+ /* The write handler will notice the lack of
+ * R10BIO_Uptodate and record any errors etc
+ */
atomic_add(r10_bio->sectors,
&conf->mirrors[d].rdev->corrected_errors);
- if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery))
- md_error(r10_bio->mddev,
- conf->mirrors[d].rdev);
- }
/* for reconstruct, we always reschedule after a read.
* for resync, only after all reads
}
}
-static void end_sync_write(struct bio *bio, int error)
+static void end_sync_request(struct r10bio *r10_bio)
{
- int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- r10bio_t *r10_bio = bio->bi_private;
- mddev_t *mddev = r10_bio->mddev;
- conf_t *conf = mddev->private;
- int d;
+ struct mddev *mddev = r10_bio->mddev;
- d = find_bio_disk(conf, r10_bio, bio);
-
- if (!uptodate)
- md_error(mddev, conf->mirrors[d].rdev);
-
- rdev_dec_pending(conf->mirrors[d].rdev, mddev);
while (atomic_dec_and_test(&r10_bio->remaining)) {
if (r10_bio->master_bio == NULL) {
/* the primary of several recovery bios */
sector_t s = r10_bio->sectors;
- put_buf(r10_bio);
+ if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
+ test_bit(R10BIO_WriteError, &r10_bio->state))
+ reschedule_retry(r10_bio);
+ else
+ put_buf(r10_bio);
md_done_sync(mddev, s, 1);
break;
} else {
- r10bio_t *r10_bio2 = (r10bio_t *)r10_bio->master_bio;
- put_buf(r10_bio);
+ struct r10bio *r10_bio2 = (struct r10bio *)r10_bio->master_bio;
+ if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
+ test_bit(R10BIO_WriteError, &r10_bio->state))
+ reschedule_retry(r10_bio);
+ else
+ put_buf(r10_bio);
r10_bio = r10_bio2;
}
}
}
+static void end_sync_write(struct bio *bio, int error)
+{
+ int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+ struct r10bio *r10_bio = bio->bi_private;
+ struct mddev *mddev = r10_bio->mddev;
+ conf_t *conf = mddev->private;
+ int d;
+ sector_t first_bad;
+ int bad_sectors;
+ int slot;
+
+ d = find_bio_disk(conf, r10_bio, bio, &slot);
+
+ if (!uptodate) {
+ set_bit(WriteErrorSeen, &conf->mirrors[d].rdev->flags);
+ set_bit(R10BIO_WriteError, &r10_bio->state);
+ } else if (is_badblock(conf->mirrors[d].rdev,
+ r10_bio->devs[slot].addr,
+ r10_bio->sectors,
+ &first_bad, &bad_sectors))
+ set_bit(R10BIO_MadeGood, &r10_bio->state);
+
+ rdev_dec_pending(conf->mirrors[d].rdev, mddev);
+
+ end_sync_request(r10_bio);
+}
+
/*
* Note: sync and recover and handled very differently for raid10
* This code is for resync.
* We check if all blocks are in-sync and only write to blocks that
* aren't in sync
*/
-static void sync_request_write(mddev_t *mddev, r10bio_t *r10_bio)
+static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
{
conf_t *conf = mddev->private;
int i, first;
if (j == vcnt)
continue;
mddev->resync_mismatches += r10_bio->sectors;
+ if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
+ /* Don't fix anything. */
+ continue;
}
- if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
- /* Don't fix anything. */
- continue;
- /* Ok, we need to write this bio
+ /* Ok, we need to write this bio, either to correct an
+ * inconsistency or to correct an unreadable block.
* First we need to fixup bv_offset, bv_len and
* bi_vecs, as the read request might have corrupted these
*/
* The second for writing.
*
*/
+static void fix_recovery_read_error(struct r10bio *r10_bio)
+{
+ /* We got a read error during recovery.
+ * We repeat the read in smaller page-sized sections.
+ * If a read succeeds, write it to the new device or record
+ * a bad block if we cannot.
+ * If a read fails, record a bad block on both old and
+ * new devices.
+ */
+ struct mddev *mddev = r10_bio->mddev;
+ conf_t *conf = mddev->private;
+ struct bio *bio = r10_bio->devs[0].bio;
+ sector_t sect = 0;
+ int sectors = r10_bio->sectors;
+ int idx = 0;
+ int dr = r10_bio->devs[0].devnum;
+ int dw = r10_bio->devs[1].devnum;
+
+ while (sectors) {
+ int s = sectors;
+ struct md_rdev *rdev;
+ sector_t addr;
+ int ok;
+
+ if (s > (PAGE_SIZE>>9))
+ s = PAGE_SIZE >> 9;
+
+ rdev = conf->mirrors[dr].rdev;
+ addr = r10_bio->devs[0].addr + sect,
+ ok = sync_page_io(rdev,
+ addr,
+ s << 9,
+ bio->bi_io_vec[idx].bv_page,
+ READ, false);
+ if (ok) {
+ rdev = conf->mirrors[dw].rdev;
+ addr = r10_bio->devs[1].addr + sect;
+ ok = sync_page_io(rdev,
+ addr,
+ s << 9,
+ bio->bi_io_vec[idx].bv_page,
+ WRITE, false);
+ if (!ok)
+ set_bit(WriteErrorSeen, &rdev->flags);
+ }
+ if (!ok) {
+ /* We don't worry if we cannot set a bad block -
+ * it really is bad so there is no loss in not
+ * recording it yet
+ */
+ rdev_set_badblocks(rdev, addr, s, 0);
+
+ if (rdev != conf->mirrors[dw].rdev) {
+ /* need bad block on destination too */
+ struct md_rdev *rdev2 = conf->mirrors[dw].rdev;
+ addr = r10_bio->devs[1].addr + sect;
+ ok = rdev_set_badblocks(rdev2, addr, s, 0);
+ if (!ok) {
+ /* just abort the recovery */
+ printk(KERN_NOTICE
+ "md/raid10:%s: recovery aborted"
+ " due to read error\n",
+ mdname(mddev));
+
+ conf->mirrors[dw].recovery_disabled
+ = mddev->recovery_disabled;
+ set_bit(MD_RECOVERY_INTR,
+ &mddev->recovery);
+ break;
+ }
+ }
+ }
+
+ sectors -= s;
+ sect += s;
+ idx++;
+ }
+}
-static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio)
+static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
{
conf_t *conf = mddev->private;
int d;
struct bio *wbio;
+ if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) {
+ fix_recovery_read_error(r10_bio);
+ end_sync_request(r10_bio);
+ return;
+ }
+
/*
* share the pages with the first bio
* and submit the write request
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9);
- if (test_bit(R10BIO_Uptodate, &r10_bio->state))
- generic_make_request(wbio);
- else {
- printk(KERN_NOTICE
- "md/raid10:%s: recovery aborted due to read error\n",
- mdname(mddev));
- conf->mirrors[d].recovery_disabled = mddev->recovery_disabled;
- set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- bio_endio(wbio, 0);
- }
+ generic_make_request(wbio);
}
* since the last recorded read error.
*
*/
-static void check_decay_read_errors(mddev_t *mddev, mdk_rdev_t *rdev)
+static void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
{
struct timespec cur_time_mon;
unsigned long hours_since_last;
atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
}
+static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
+ int sectors, struct page *page, int rw)
+{
+ sector_t first_bad;
+ int bad_sectors;
+
+ if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors)
+ && (rw == READ || test_bit(WriteErrorSeen, &rdev->flags)))
+ return -1;
+ if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
+ /* success */
+ return 1;
+ if (rw == WRITE)
+ set_bit(WriteErrorSeen, &rdev->flags);
+ /* need to record an error - either for the block or the device */
+ if (!rdev_set_badblocks(rdev, sector, sectors, 0))
+ md_error(rdev->mddev, rdev);
+ return 0;
+}
+
/*
* This is a kernel thread which:
*
* 3. Performs writes following reads for array synchronising.
*/
-static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
+static void fix_read_error(conf_t *conf, struct mddev *mddev, struct r10bio *r10_bio)
{
int sect = 0; /* Offset from r10_bio->sector */
int sectors = r10_bio->sectors;
- mdk_rdev_t*rdev;
+ struct md_rdev*rdev;
int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
int d = r10_bio->devs[r10_bio->read_slot].devnum;
rcu_read_unlock();
if (!success) {
- /* Cannot read from anywhere -- bye bye array */
+ /* Cannot read from anywhere, just mark the block
+ * as bad on the first device to discourage future
+ * reads.
+ */
int dn = r10_bio->devs[r10_bio->read_slot].devnum;
- md_error(mddev, conf->mirrors[dn].rdev);
+ rdev = conf->mirrors[dn].rdev;
+
+ if (!rdev_set_badblocks(
+ rdev,
+ r10_bio->devs[r10_bio->read_slot].addr
+ + sect,
+ s, 0))
+ md_error(mddev, rdev);
break;
}
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
- if (sync_page_io(rdev,
- r10_bio->devs[sl].addr +
- sect,
- s<<9, conf->tmppage, WRITE, false)
+ if (r10_sync_page_io(rdev,
+ r10_bio->devs[sl].addr +
+ sect,
+ s<<9, conf->tmppage, WRITE)
== 0) {
/* Well, this device is dead */
printk(KERN_NOTICE
"drive\n",
mdname(mddev),
bdevname(rdev->bdev, b));
- md_error(mddev, rdev);
}
rdev_dec_pending(rdev, mddev);
rcu_read_lock();
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
- if (sync_page_io(rdev,
- r10_bio->devs[sl].addr +
- sect,
- s<<9, conf->tmppage,
- READ, false) == 0) {
+ switch (r10_sync_page_io(rdev,
+ r10_bio->devs[sl].addr +
+ sect,
+ s<<9, conf->tmppage,
+ READ)) {
+ case 0:
/* Well, this device is dead */
printk(KERN_NOTICE
"md/raid10:%s: unable to read back "
"drive\n",
mdname(mddev),
bdevname(rdev->bdev, b));
-
- md_error(mddev, rdev);
- } else {
+ break;
+ case 1:
printk(KERN_INFO
"md/raid10:%s: read error corrected"
" (%d sectors at %llu on %s)\n",
}
}
-static void handle_read_error(mddev_t *mddev, r10bio_t *r10_bio)
+static void bi_complete(struct bio *bio, int error)
+{
+ complete((struct completion *)bio->bi_private);
+}
+
+static int submit_bio_wait(int rw, struct bio *bio)
+{
+ struct completion event;
+ rw |= REQ_SYNC;
+
+ init_completion(&event);
+ bio->bi_private = &event;
+ bio->bi_end_io = bi_complete;
+ submit_bio(rw, bio);
+ wait_for_completion(&event);
+
+ return test_bit(BIO_UPTODATE, &bio->bi_flags);
+}
+
+static int narrow_write_error(struct r10bio *r10_bio, int i)
+{
+ struct bio *bio = r10_bio->master_bio;
+ struct mddev *mddev = r10_bio->mddev;
+ conf_t *conf = mddev->private;
+ struct md_rdev *rdev = conf->mirrors[r10_bio->devs[i].devnum].rdev;
+ /* bio has the data to be written to slot 'i' where
+ * we just recently had a write error.
+ * We repeatedly clone the bio and trim down to one block,
+ * then try the write. Where the write fails we record
+ * a bad block.
+ * It is conceivable that the bio doesn't exactly align with
+ * blocks. We must handle this.
+ *
+ * We currently own a reference to the rdev.
+ */
+
+ int block_sectors;
+ sector_t sector;
+ int sectors;
+ int sect_to_write = r10_bio->sectors;
+ int ok = 1;
+
+ if (rdev->badblocks.shift < 0)
+ return 0;
+
+ block_sectors = 1 << rdev->badblocks.shift;
+ sector = r10_bio->sector;
+ sectors = ((r10_bio->sector + block_sectors)
+ & ~(sector_t)(block_sectors - 1))
+ - sector;
+
+ while (sect_to_write) {
+ struct bio *wbio;
+ if (sectors > sect_to_write)
+ sectors = sect_to_write;
+ /* Write at 'sector' for 'sectors' */
+ wbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+ md_trim_bio(wbio, sector - bio->bi_sector, sectors);
+ wbio->bi_sector = (r10_bio->devs[i].addr+
+ rdev->data_offset+
+ (sector - r10_bio->sector));
+ wbio->bi_bdev = rdev->bdev;
+ if (submit_bio_wait(WRITE, wbio) == 0)
+ /* Failure! */
+ ok = rdev_set_badblocks(rdev, sector,
+ sectors, 0)
+ && ok;
+
+ bio_put(wbio);
+ sect_to_write -= sectors;
+ sector += sectors;
+ sectors = block_sectors;
+ }
+ return ok;
+}
+
+static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
{
int slot = r10_bio->read_slot;
int mirror = r10_bio->devs[slot].devnum;
struct bio *bio;
conf_t *conf = mddev->private;
- mdk_rdev_t *rdev;
+ struct md_rdev *rdev;
char b[BDEVNAME_SIZE];
unsigned long do_sync;
int max_sectors;
generic_make_request(bio);
}
-static void raid10d(mddev_t *mddev)
+static void handle_write_completed(conf_t *conf, struct r10bio *r10_bio)
+{
+ /* Some sort of write request has finished and it
+ * succeeded in writing where we thought there was a
+ * bad block. So forget the bad block.
+ * Or possibly if failed and we need to record
+ * a bad block.
+ */
+ int m;
+ struct md_rdev *rdev;
+
+ if (test_bit(R10BIO_IsSync, &r10_bio->state) ||
+ test_bit(R10BIO_IsRecover, &r10_bio->state)) {
+ for (m = 0; m < conf->copies; m++) {
+ int dev = r10_bio->devs[m].devnum;
+ rdev = conf->mirrors[dev].rdev;
+ if (r10_bio->devs[m].bio == NULL)
+ continue;
+ if (test_bit(BIO_UPTODATE,
+ &r10_bio->devs[m].bio->bi_flags)) {
+ rdev_clear_badblocks(
+ rdev,
+ r10_bio->devs[m].addr,
+ r10_bio->sectors);
+ } else {
+ if (!rdev_set_badblocks(
+ rdev,
+ r10_bio->devs[m].addr,
+ r10_bio->sectors, 0))
+ md_error(conf->mddev, rdev);
+ }
+ }
+ put_buf(r10_bio);
+ } else {
+ for (m = 0; m < conf->copies; m++) {
+ int dev = r10_bio->devs[m].devnum;
+ struct bio *bio = r10_bio->devs[m].bio;
+ rdev = conf->mirrors[dev].rdev;
+ if (bio == IO_MADE_GOOD) {
+ rdev_clear_badblocks(
+ rdev,
+ r10_bio->devs[m].addr,
+ r10_bio->sectors);
+ rdev_dec_pending(rdev, conf->mddev);
+ } else if (bio != NULL &&
+ !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
+ if (!narrow_write_error(r10_bio, m)) {
+ md_error(conf->mddev, rdev);
+ set_bit(R10BIO_Degraded,
+ &r10_bio->state);
+ }
+ rdev_dec_pending(rdev, conf->mddev);
+ }
+ }
+ if (test_bit(R10BIO_WriteError,
+ &r10_bio->state))
+ close_write(r10_bio);
+ raid_end_bio_io(r10_bio);
+ }
+}
+
+static void raid10d(struct mddev *mddev)
{
- r10bio_t *r10_bio;
+ struct r10bio *r10_bio;
unsigned long flags;
conf_t *conf = mddev->private;
struct list_head *head = &conf->retry_list;
spin_unlock_irqrestore(&conf->device_lock, flags);
break;
}
- r10_bio = list_entry(head->prev, r10bio_t, retry_list);
+ r10_bio = list_entry(head->prev, struct r10bio, retry_list);
list_del(head->prev);
conf->nr_queued--;
spin_unlock_irqrestore(&conf->device_lock, flags);
mddev = r10_bio->mddev;
conf = mddev->private;
- if (test_bit(R10BIO_IsSync, &r10_bio->state))
+ if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
+ test_bit(R10BIO_WriteError, &r10_bio->state))
+ handle_write_completed(conf, r10_bio);
+ else if (test_bit(R10BIO_IsSync, &r10_bio->state))
sync_request_write(mddev, r10_bio);
else if (test_bit(R10BIO_IsRecover, &r10_bio->state))
recovery_request_write(mddev, r10_bio);
*
*/
-static sector_t sync_request(mddev_t *mddev, sector_t sector_nr,
+static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
int *skipped, int go_faster)
{
conf_t *conf = mddev->private;
- r10bio_t *r10_bio;
+ struct r10bio *r10_bio;
struct bio *biolist = NULL, *bio;
sector_t max_sector, nr_sectors;
int i;
for (i=0 ; i<conf->raid_disks; i++) {
int still_degraded;
- r10bio_t *rb2;
+ struct r10bio *rb2;
sector_t sect;
int must_sync;
int any_working;
for (j=0; j<conf->copies;j++) {
int k;
int d = r10_bio->devs[j].devnum;
- mdk_rdev_t *rdev;
+ sector_t from_addr, to_addr;
+ struct md_rdev *rdev;
sector_t sector, first_bad;
int bad_sectors;
if (!conf->mirrors[d].rdev ||
bio->bi_private = r10_bio;
bio->bi_end_io = end_sync_read;
bio->bi_rw = READ;
- bio->bi_sector = r10_bio->devs[j].addr +
+ from_addr = r10_bio->devs[j].addr;
+ bio->bi_sector = from_addr +
conf->mirrors[d].rdev->data_offset;
bio->bi_bdev = conf->mirrors[d].rdev->bdev;
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
bio->bi_private = r10_bio;
bio->bi_end_io = end_sync_write;
bio->bi_rw = WRITE;
- bio->bi_sector = r10_bio->devs[k].addr +
+ to_addr = r10_bio->devs[k].addr;
+ bio->bi_sector = to_addr +
conf->mirrors[i].rdev->data_offset;
bio->bi_bdev = conf->mirrors[i].rdev->bdev;
r10_bio->devs[0].devnum = d;
+ r10_bio->devs[0].addr = from_addr;
r10_bio->devs[1].devnum = i;
+ r10_bio->devs[1].addr = to_addr;
break;
}
}
if (biolist == NULL) {
while (r10_bio) {
- r10bio_t *rb2 = r10_bio;
- r10_bio = (r10bio_t*) rb2->master_bio;
+ struct r10bio *rb2 = r10_bio;
+ r10_bio = (struct r10bio*) rb2->master_bio;
rb2->master_bio = NULL;
put_buf(rb2);
}
}
static sector_t
-raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks)
+raid10_size(struct mddev *mddev, sector_t sectors, int raid_disks)
{
sector_t size;
conf_t *conf = mddev->private;
}
-static conf_t *setup_conf(mddev_t *mddev)
+static conf_t *setup_conf(struct mddev *mddev)
{
conf_t *conf = NULL;
int nc, fc, fo;
return ERR_PTR(err);
}
-static int run(mddev_t *mddev)
+static int run(struct mddev *mddev)
{
conf_t *conf;
int i, disk_idx, chunk_size;
- mirror_info_t *disk;
- mdk_rdev_t *rdev;
+ struct mirror_info *disk;
+ struct md_rdev *rdev;
sector_t size;
/*
return 0;
out_free_conf:
- md_unregister_thread(mddev->thread);
+ md_unregister_thread(&mddev->thread);
if (conf->r10bio_pool)
mempool_destroy(conf->r10bio_pool);
safe_put_page(conf->tmppage);
return -EIO;
}
-static int stop(mddev_t *mddev)
+static int stop(struct mddev *mddev)
{
conf_t *conf = mddev->private;
raise_barrier(conf, 0);
lower_barrier(conf);
- md_unregister_thread(mddev->thread);
- mddev->thread = NULL;
+ md_unregister_thread(&mddev->thread);
blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
if (conf->r10bio_pool)
mempool_destroy(conf->r10bio_pool);
return 0;
}
-static void raid10_quiesce(mddev_t *mddev, int state)
+static void raid10_quiesce(struct mddev *mddev, int state)
{
conf_t *conf = mddev->private;
}
}
-static void *raid10_takeover_raid0(mddev_t *mddev)
+static void *raid10_takeover_raid0(struct mddev *mddev)
{
- mdk_rdev_t *rdev;
+ struct md_rdev *rdev;
conf_t *conf;
if (mddev->degraded > 0) {
return conf;
}
-static void *raid10_takeover(mddev_t *mddev)
+static void *raid10_takeover(struct mddev *mddev)
{
- struct raid0_private_data *raid0_priv;
+ struct r0conf *raid0_conf;
/* raid10 can take over:
* raid0 - providing it has only two drives
*/
if (mddev->level == 0) {
/* for raid0 takeover only one zone is supported */
- raid0_priv = mddev->private;
- if (raid0_priv->nr_strip_zones > 1) {
+ raid0_conf = mddev->private;
+ if (raid0_conf->nr_strip_zones > 1) {
printk(KERN_ERR "md/raid10:%s: cannot takeover raid 0"
" with more than one zone.\n",
mdname(mddev));