Merge branch 'nfs-for-2.6.37' of git://git.linux-nfs.org/projects/trondmy/nfs-2.6

[deliverable/linux.git] / drivers / md / raid1.c
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c

index 0b830bbe1d8b6323bac02106c2ccc4d806e1e390..378a25894c57755afa27ab4fe87fd46f74834a97 100644 (file)
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -319,83 +319,74 @@ static void raid1_end_write_request(struct bio *bio, int error)
                 if (r1_bio->bios[mirror] == bio)
                         break;
  
-       if (error == -EOPNOTSUPP && test_bit(R1BIO_Barrier, &r1_bio->state)) {
-               set_bit(BarriersNotsupp, &conf->mirrors[mirror].rdev->flags);
-               set_bit(R1BIO_BarrierRetry, &r1_bio->state);
-               r1_bio->mddev->barriers_work = 0;
-               /* Don't rdev_dec_pending in this branch - keep it for the retry */
-       } else {
+       /*
+        * 'one mirror IO has finished' event handler:
+        */
+       r1_bio->bios[mirror] = NULL;
+       to_put = bio;
+       if (!uptodate) {
+               md_error(r1_bio->mddev, conf->mirrors[mirror].rdev);
+               /* an I/O failed, we can't clear the bitmap */
+               set_bit(R1BIO_Degraded, &r1_bio->state);
+       } else
                 /*
-                * this branch is our 'one mirror IO has finished' event handler:
+                * Set R1BIO_Uptodate in our master bio, so that we
+                * will return a good error code for to the higher
+                * levels even if IO on some other mirrored buffer
+                * fails.
+                *
+                * The 'master' represents the composite IO operation
+                * to user-side. So if something waits for IO, then it
+                * will wait for the 'master' bio.
                  */
-               r1_bio->bios[mirror] = NULL;
-               to_put = bio;
-               if (!uptodate) {
-                       md_error(r1_bio->mddev, conf->mirrors[mirror].rdev);
-                       /* an I/O failed, we can't clear the bitmap */
-                       set_bit(R1BIO_Degraded, &r1_bio->state);
-               } else
-                       /*
-                        * Set R1BIO_Uptodate in our master bio, so that
-                        * we will return a good error code for to the higher
-                        * levels even if IO on some other mirrored buffer fails.
-                        *
-                        * The 'master' represents the composite IO operation to
-                        * user-side. So if something waits for IO, then it will
-                        * wait for the 'master' bio.
-                        */
-                       set_bit(R1BIO_Uptodate, &r1_bio->state);
-
-               update_head_pos(mirror, r1_bio);
-
-               if (behind) {
-                       if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags))
-                               atomic_dec(&r1_bio->behind_remaining);
-
-                       /* In behind mode, we ACK the master bio once the I/O has safely
-                        * reached all non-writemostly disks. Setting the Returned bit
-                        * ensures that this gets done only once -- we don't ever want to
-                        * return -EIO here, instead we'll wait */
-
-                       if (atomic_read(&r1_bio->behind_remaining) >= (atomic_read(&r1_bio->remaining)-1) &&
-                           test_bit(R1BIO_Uptodate, &r1_bio->state)) {
-                               /* Maybe we can return now */
-                               if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) {
-                                       struct bio *mbio = r1_bio->master_bio;
-                                       PRINTK(KERN_DEBUG "raid1: behind end write sectors %llu-%llu\n",
-                                              (unsigned long long) mbio->bi_sector,
-                                              (unsigned long long) mbio->bi_sector +
-                                              (mbio->bi_size >> 9) - 1);
-                                       bio_endio(mbio, 0);
-                               }
+               set_bit(R1BIO_Uptodate, &r1_bio->state);
+
+       update_head_pos(mirror, r1_bio);
+
+       if (behind) {
+               if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags))
+                       atomic_dec(&r1_bio->behind_remaining);
+
+               /*
+                * In behind mode, we ACK the master bio once the I/O
+                * has safely reached all non-writemostly
+                * disks. Setting the Returned bit ensures that this
+                * gets done only once -- we don't ever want to return
+                * -EIO here, instead we'll wait
+                */
+               if (atomic_read(&r1_bio->behind_remaining) >= (atomic_read(&r1_bio->remaining)-1) &&
+                   test_bit(R1BIO_Uptodate, &r1_bio->state)) {
+                       /* Maybe we can return now */
+                       if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) {
+                               struct bio *mbio = r1_bio->master_bio;
+                               PRINTK(KERN_DEBUG "raid1: behind end write sectors %llu-%llu\n",
+                                      (unsigned long long) mbio->bi_sector,
+                                      (unsigned long long) mbio->bi_sector +
+                                      (mbio->bi_size >> 9) - 1);
+                               bio_endio(mbio, 0);
                         }
                 }
-               rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
         }
+       rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
+
         /*
-        *
          * Let's see if all mirrored write operations have finished
          * already.
          */
         if (atomic_dec_and_test(&r1_bio->remaining)) {
-               if (test_bit(R1BIO_BarrierRetry, &r1_bio->state))
-                       reschedule_retry(r1_bio);
-               else {
-                       /* it really is the end of this request */
-                       if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
-                               /* free extra copy of the data pages */
-                               int i = bio->bi_vcnt;
-                               while (i--)
-                                       safe_put_page(bio->bi_io_vec[i].bv_page);
-                       }
-                       /* clear the bitmap if all writes complete successfully */
-                       bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
-                                       r1_bio->sectors,
-                                       !test_bit(R1BIO_Degraded, &r1_bio->state),
-                                       behind);
-                       md_write_end(r1_bio->mddev);
-                       raid_end_bio_io(r1_bio);
+               if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
+                       /* free extra copy of the data pages */
+                       int i = bio->bi_vcnt;
+                       while (i--)
+                               safe_put_page(bio->bi_io_vec[i].bv_page);
                 }
+               /* clear the bitmap if all writes complete successfully */
+               bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
+                               r1_bio->sectors,
+                               !test_bit(R1BIO_Degraded, &r1_bio->state),
+                               behind);
+               md_write_end(r1_bio->mddev);
+               raid_end_bio_io(r1_bio);
         }
  
         if (to_put)
@@ -788,16 +779,13 @@ static int make_request(mddev_t *mddev, struct bio * bio)
         struct page **behind_pages = NULL;
         const int rw = bio_data_dir(bio);
         const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
-       unsigned long do_barriers;
+       const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
         mdk_rdev_t *blocked_rdev;
  
         /*
          * Register the new request and wait if the reconstruction
          * thread has put up a bar for new requests.
          * Continue immediately if no resync is active currently.
-        * We test barriers_work *after* md_write_start as md_write_start
-        * may cause the first superblock write, and that will check out
-        * if barriers work.
          */
  
         md_write_start(mddev, bio); /* wait on superblock update early */
@@ -821,13 +809,6 @@ static int make_request(mddev_t *mddev, struct bio * bio)
                 }
                 finish_wait(&conf->wait_barrier, &w);
         }
-       if (unlikely(!mddev->barriers_work &&
-                    (bio->bi_rw & REQ_HARDBARRIER))) {
-               if (rw == WRITE)
-                       md_write_end(mddev);
-               bio_endio(bio, -EOPNOTSUPP);
-               return 0;
-       }
  
         wait_barrier(conf);
  
@@ -959,10 +940,6 @@ static int make_request(mddev_t *mddev, struct bio * bio)
         atomic_set(&r1_bio->remaining, 0);
         atomic_set(&r1_bio->behind_remaining, 0);
  
-       do_barriers = bio->bi_rw & REQ_HARDBARRIER;
-       if (do_barriers)
-               set_bit(R1BIO_Barrier, &r1_bio->state);
-
         bio_list_init(&bl);
         for (i = 0; i < disks; i++) {
                 struct bio *mbio;
@@ -975,7 +952,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
                 mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset;
                 mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
                 mbio->bi_end_io = raid1_end_write_request;
-               mbio->bi_rw = WRITE | do_barriers | do_sync;
+               mbio->bi_rw = WRITE | do_flush_fua | do_sync;
                 mbio->bi_private = r1_bio;
  
                 if (behind_pages) {
@@ -1634,41 +1611,6 @@ static void raid1d(mddev_t *mddev)
                 if (test_bit(R1BIO_IsSync, &r1_bio->state)) {
                         sync_request_write(mddev, r1_bio);
                         unplug = 1;
-               } else if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) {
-                       /* some requests in the r1bio were REQ_HARDBARRIER
-                        * requests which failed with -EOPNOTSUPP.  Hohumm..
-                        * Better resubmit without the barrier.
-                        * We know which devices to resubmit for, because
-                        * all others have had their bios[] entry cleared.
-                        * We already have a nr_pending reference on these rdevs.
-                        */
-                       int i;
-                       const unsigned long do_sync = (r1_bio->master_bio->bi_rw & REQ_SYNC);
-                       clear_bit(R1BIO_BarrierRetry, &r1_bio->state);
-                       clear_bit(R1BIO_Barrier, &r1_bio->state);
-                       for (i=0; i < conf->raid_disks; i++)
-                               if (r1_bio->bios[i])
-                                       atomic_inc(&r1_bio->remaining);
-                       for (i=0; i < conf->raid_disks; i++)
-                               if (r1_bio->bios[i]) {
-                                       struct bio_vec *bvec;
-                                       int j;
-
-                                       bio = bio_clone(r1_bio->master_bio, GFP_NOIO);
-                                       /* copy pages from the failed bio, as
-                                        * this might be a write-behind device */
-                                       __bio_for_each_segment(bvec, bio, j, 0)
-                                               bvec->bv_page = bio_iovec_idx(r1_bio->bios[i], j)->bv_page;
-                                       bio_put(r1_bio->bios[i]);
-                                       bio->bi_sector = r1_bio->sector +
-                                               conf->mirrors[i].rdev->data_offset;
-                                       bio->bi_bdev = conf->mirrors[i].rdev->bdev;
-                                       bio->bi_end_io = raid1_end_write_request;
-                                       bio->bi_rw = WRITE | do_sync;
-                                       bio->bi_private = r1_bio;
-                                       r1_bio->bios[i] = bio;
-                                       generic_make_request(bio);
-                               }
                 } else {
                         int disk;