Btrfs: update to use fs_state bit
[deliverable/linux.git] / fs / btrfs / disk-io.c
index 8e58a1f9054697fdf6a4866e4fb738afc97e9a98..127b23e8323b5ad6480ff792589ec63c07c483a3 100644 (file)
@@ -46,6 +46,7 @@
 #include "check-integrity.h"
 #include "rcu-string.h"
 #include "dev-replace.h"
+#include "raid56.h"
 
 #ifdef CONFIG_X86
 #include <asm/cpufeature.h>
@@ -56,11 +57,12 @@ static void end_workqueue_fn(struct btrfs_work *work);
 static void free_fs_root(struct btrfs_root *root);
 static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
                                    int read_only);
-static void btrfs_destroy_ordered_operations(struct btrfs_root *root);
+static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
+                                            struct btrfs_root *root);
 static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
 static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
                                      struct btrfs_root *root);
-static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t);
+static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t);
 static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
 static int btrfs_destroy_marked_extents(struct btrfs_root *root,
                                        struct extent_io_tree *dirty_pages,
@@ -639,8 +641,15 @@ err:
                btree_readahead_hook(root, eb, eb->start, ret);
        }
 
-       if (ret)
+       if (ret) {
+               /*
+                * our io error hook is going to dec the io pages
+                * again, we have to make sure it has something
+                * to decrement
+                */
+               atomic_inc(&eb->io_pages);
                clear_extent_buffer_uptodate(eb);
+       }
        free_extent_buffer(eb);
 out:
        return ret;
@@ -654,6 +663,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror)
        eb = (struct extent_buffer *)page->private;
        set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
        eb->read_mirror = failed_mirror;
+       atomic_dec(&eb->io_pages);
        if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
                btree_readahead_hook(root, eb, eb->start, -EIO);
        return -EIO;    /* we fixed nothing */
@@ -670,17 +680,23 @@ static void end_workqueue_bio(struct bio *bio, int err)
        end_io_wq->work.flags = 0;
 
        if (bio->bi_rw & REQ_WRITE) {
-               if (end_io_wq->metadata == 1)
+               if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA)
                        btrfs_queue_worker(&fs_info->endio_meta_write_workers,
                                           &end_io_wq->work);
-               else if (end_io_wq->metadata == 2)
+               else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE)
                        btrfs_queue_worker(&fs_info->endio_freespace_worker,
                                           &end_io_wq->work);
+               else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
+                       btrfs_queue_worker(&fs_info->endio_raid56_workers,
+                                          &end_io_wq->work);
                else
                        btrfs_queue_worker(&fs_info->endio_write_workers,
                                           &end_io_wq->work);
        } else {
-               if (end_io_wq->metadata)
+               if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
+                       btrfs_queue_worker(&fs_info->endio_raid56_workers,
+                                          &end_io_wq->work);
+               else if (end_io_wq->metadata)
                        btrfs_queue_worker(&fs_info->endio_meta_workers,
                                           &end_io_wq->work);
                else
@@ -695,6 +711,7 @@ static void end_workqueue_bio(struct bio *bio, int err)
  * 0 - if data
  * 1 - if normal metadta
  * 2 - if writing to the free space cache area
+ * 3 - raid parity work
  */
 int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
                        int metadata)
@@ -2029,7 +2046,6 @@ int open_ctree(struct super_block *sb,
        INIT_LIST_HEAD(&fs_info->dead_roots);
        INIT_LIST_HEAD(&fs_info->delayed_iputs);
        INIT_LIST_HEAD(&fs_info->delalloc_inodes);
-       INIT_LIST_HEAD(&fs_info->ordered_operations);
        INIT_LIST_HEAD(&fs_info->caching_block_groups);
        spin_lock_init(&fs_info->delalloc_lock);
        spin_lock_init(&fs_info->trans_lock);
@@ -2179,6 +2195,12 @@ int open_ctree(struct super_block *sb,
        init_waitqueue_head(&fs_info->transaction_blocked_wait);
        init_waitqueue_head(&fs_info->async_submit_wait);
 
+       ret = btrfs_alloc_stripe_hash_table(fs_info);
+       if (ret) {
+               err = ret;
+               goto fail_alloc;
+       }
+
        __setup_root(4096, 4096, 4096, 4096, tree_root,
                     fs_info, BTRFS_ROOT_TREE_OBJECTID);
 
@@ -2349,6 +2371,12 @@ int open_ctree(struct super_block *sb,
        btrfs_init_workers(&fs_info->endio_meta_write_workers,
                           "endio-meta-write", fs_info->thread_pool_size,
                           &fs_info->generic_worker);
+       btrfs_init_workers(&fs_info->endio_raid56_workers,
+                          "endio-raid56", fs_info->thread_pool_size,
+                          &fs_info->generic_worker);
+       btrfs_init_workers(&fs_info->rmw_workers,
+                          "rmw", fs_info->thread_pool_size,
+                          &fs_info->generic_worker);
        btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
                           fs_info->thread_pool_size,
                           &fs_info->generic_worker);
@@ -2367,6 +2395,8 @@ int open_ctree(struct super_block *sb,
         */
        fs_info->endio_workers.idle_thresh = 4;
        fs_info->endio_meta_workers.idle_thresh = 4;
+       fs_info->endio_raid56_workers.idle_thresh = 4;
+       fs_info->rmw_workers.idle_thresh = 2;
 
        fs_info->endio_write_workers.idle_thresh = 2;
        fs_info->endio_meta_write_workers.idle_thresh = 2;
@@ -2383,6 +2413,8 @@ int open_ctree(struct super_block *sb,
        ret |= btrfs_start_workers(&fs_info->fixup_workers);
        ret |= btrfs_start_workers(&fs_info->endio_workers);
        ret |= btrfs_start_workers(&fs_info->endio_meta_workers);
+       ret |= btrfs_start_workers(&fs_info->rmw_workers);
+       ret |= btrfs_start_workers(&fs_info->endio_raid56_workers);
        ret |= btrfs_start_workers(&fs_info->endio_meta_write_workers);
        ret |= btrfs_start_workers(&fs_info->endio_write_workers);
        ret |= btrfs_start_workers(&fs_info->endio_freespace_worker);
@@ -2407,8 +2439,7 @@ int open_ctree(struct super_block *sb,
        sb->s_blocksize = sectorsize;
        sb->s_blocksize_bits = blksize_bits(sectorsize);
 
-       if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
-                   sizeof(disk_super->magic))) {
+       if (disk_super->magic != cpu_to_le64(BTRFS_MAGIC)) {
                printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id);
                goto fail_sb_buffer;
        }
@@ -2711,13 +2742,13 @@ fail_cleaner:
         * kthreads
         */
        filemap_write_and_wait(fs_info->btree_inode->i_mapping);
-       invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
 
 fail_block_groups:
        btrfs_free_block_groups(fs_info);
 
 fail_tree_roots:
        free_root_pointers(fs_info, 1);
+       invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
 
 fail_sb_buffer:
        btrfs_stop_workers(&fs_info->generic_worker);
@@ -2727,6 +2758,8 @@ fail_sb_buffer:
        btrfs_stop_workers(&fs_info->workers);
        btrfs_stop_workers(&fs_info->endio_workers);
        btrfs_stop_workers(&fs_info->endio_meta_workers);
+       btrfs_stop_workers(&fs_info->endio_raid56_workers);
+       btrfs_stop_workers(&fs_info->rmw_workers);
        btrfs_stop_workers(&fs_info->endio_meta_write_workers);
        btrfs_stop_workers(&fs_info->endio_write_workers);
        btrfs_stop_workers(&fs_info->endio_freespace_worker);
@@ -2738,7 +2771,6 @@ fail_alloc:
 fail_iput:
        btrfs_mapping_tree_free(&fs_info->mapping_tree);
 
-       invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
        iput(fs_info->btree_inode);
 fail_delalloc_bytes:
        percpu_counter_destroy(&fs_info->delalloc_bytes);
@@ -2749,6 +2781,7 @@ fail_bdi:
 fail_srcu:
        cleanup_srcu_struct(&fs_info->subvol_srcu);
 fail:
+       btrfs_free_stripe_hash_table(fs_info);
        btrfs_close_devices(fs_info->fs_devices);
        return err;
 
@@ -2816,8 +2849,7 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
 
                super = (struct btrfs_super_block *)bh->b_data;
                if (btrfs_super_bytenr(super) != bytenr ||
-                   strncmp((char *)(&super->magic), BTRFS_MAGIC,
-                           sizeof(super->magic))) {
+                   super->magic != cpu_to_le64(BTRFS_MAGIC)) {
                        brelse(bh);
                        continue;
                }
@@ -3097,11 +3129,16 @@ int btrfs_calc_num_tolerated_disk_barrier_failures(
                                     ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK)
                                      == 0)))
                                        num_tolerated_disk_barrier_failures = 0;
-                               else if (num_tolerated_disk_barrier_failures > 1
-                                        &&
-                                        (flags & (BTRFS_BLOCK_GROUP_RAID1 |
-                                                  BTRFS_BLOCK_GROUP_RAID10)))
-                                       num_tolerated_disk_barrier_failures = 1;
+                               else if (num_tolerated_disk_barrier_failures > 1) {
+                                       if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
+                                           BTRFS_BLOCK_GROUP_RAID5 |
+                                           BTRFS_BLOCK_GROUP_RAID10)) {
+                                               num_tolerated_disk_barrier_failures = 1;
+                                       } else if (flags &
+                                                  BTRFS_BLOCK_GROUP_RAID5) {
+                                               num_tolerated_disk_barrier_failures = 2;
+                                       }
+                               }
                        }
                }
                up_read(&sinfo->groups_sem);
@@ -3216,6 +3253,11 @@ void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
        if (btrfs_root_refs(&root->root_item) == 0)
                synchronize_srcu(&fs_info->subvol_srcu);
 
+       if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
+               btrfs_free_log(NULL, root);
+               btrfs_free_log_root_tree(NULL, fs_info);
+       }
+
        __btrfs_remove_free_space_cache(root->free_ino_pinned);
        __btrfs_remove_free_space_cache(root->free_ino_ctl);
        free_fs_root(root);
@@ -3405,6 +3447,8 @@ int close_ctree(struct btrfs_root *root)
        btrfs_stop_workers(&fs_info->workers);
        btrfs_stop_workers(&fs_info->endio_workers);
        btrfs_stop_workers(&fs_info->endio_meta_workers);
+       btrfs_stop_workers(&fs_info->endio_raid56_workers);
+       btrfs_stop_workers(&fs_info->rmw_workers);
        btrfs_stop_workers(&fs_info->endio_meta_write_workers);
        btrfs_stop_workers(&fs_info->endio_write_workers);
        btrfs_stop_workers(&fs_info->endio_freespace_worker);
@@ -3427,6 +3471,8 @@ int close_ctree(struct btrfs_root *root)
        bdi_destroy(&fs_info->bdi);
        cleanup_srcu_struct(&fs_info->subvol_srcu);
 
+       btrfs_free_stripe_hash_table(fs_info);
+
        return 0;
 }
 
@@ -3490,8 +3536,8 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
        ret = percpu_counter_compare(&root->fs_info->dirty_metadata_bytes,
                                     BTRFS_DIRTY_METADATA_THRESH);
        if (ret > 0) {
-               balance_dirty_pages_ratelimited_nr(
-                                  root->fs_info->btree_inode->i_mapping, 1);
+               balance_dirty_pages_ratelimited(
+                                  root->fs_info->btree_inode->i_mapping);
        }
        return;
 }
@@ -3539,7 +3585,8 @@ void btrfs_error_commit_super(struct btrfs_root *root)
        btrfs_cleanup_transaction(root);
 }
 
-static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
+static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
+                                            struct btrfs_root *root)
 {
        struct btrfs_inode *btrfs_inode;
        struct list_head splice;
@@ -3549,7 +3596,7 @@ static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
        mutex_lock(&root->fs_info->ordered_operations_mutex);
        spin_lock(&root->fs_info->ordered_extent_lock);
 
-       list_splice_init(&root->fs_info->ordered_operations, &splice);
+       list_splice_init(&t->ordered_operations, &splice);
        while (!list_empty(&splice)) {
                btrfs_inode = list_entry(splice.next, struct btrfs_inode,
                                         ordered_operations);
@@ -3565,35 +3612,16 @@ static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
 
 static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
 {
-       struct list_head splice;
        struct btrfs_ordered_extent *ordered;
-       struct inode *inode;
-
-       INIT_LIST_HEAD(&splice);
 
        spin_lock(&root->fs_info->ordered_extent_lock);
-
-       list_splice_init(&root->fs_info->ordered_extents, &splice);
-       while (!list_empty(&splice)) {
-               ordered = list_entry(splice.next, struct btrfs_ordered_extent,
-                                    root_extent_list);
-
-               list_del_init(&ordered->root_extent_list);
-               atomic_inc(&ordered->refs);
-
-               /* the inode may be getting freed (in sys_unlink path). */
-               inode = igrab(ordered->inode);
-
-               spin_unlock(&root->fs_info->ordered_extent_lock);
-               if (inode)
-                       iput(inode);
-
-               atomic_set(&ordered->refs, 1);
-               btrfs_put_ordered_extent(ordered);
-
-               spin_lock(&root->fs_info->ordered_extent_lock);
-       }
-
+       /*
+        * This will just short circuit the ordered completion stuff which will
+        * make sure the ordered extent gets properly cleaned up.
+        */
+       list_for_each_entry(ordered, &root->fs_info->ordered_extents,
+                           root_extent_list)
+               set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
        spin_unlock(&root->fs_info->ordered_extent_lock);
 }
 
@@ -3615,11 +3643,11 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
        }
 
        while ((node = rb_first(&delayed_refs->root)) != NULL) {
-               ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
+               struct btrfs_delayed_ref_head *head = NULL;
 
+               ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
                atomic_set(&ref->refs, 1);
                if (btrfs_delayed_ref_is_head(ref)) {
-                       struct btrfs_delayed_ref_head *head;
 
                        head = btrfs_delayed_node_to_head(ref);
                        if (!mutex_trylock(&head->mutex)) {
@@ -3641,10 +3669,12 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
                                delayed_refs->num_heads_ready--;
                        list_del_init(&head->cluster);
                }
+
                ref->in_tree = 0;
                rb_erase(&ref->rb_node, &delayed_refs->root);
                delayed_refs->num_entries--;
-
+               if (head)
+                       mutex_unlock(&head->mutex);
                spin_unlock(&delayed_refs->lock);
                btrfs_put_delayed_ref(ref);
 
@@ -3657,7 +3687,7 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
        return ret;
 }
 
-static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t)
+static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t)
 {
        struct btrfs_pending_snapshot *snapshot;
        struct list_head splice;
@@ -3670,10 +3700,8 @@ static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t)
                snapshot = list_entry(splice.next,
                                      struct btrfs_pending_snapshot,
                                      list);
-
+               snapshot->error = -ECANCELED;
                list_del_init(&snapshot->list);
-
-               kfree(snapshot);
        }
 }
 
@@ -3810,6 +3838,8 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
        cur_trans->blocked = 1;
        wake_up(&root->fs_info->transaction_blocked_wait);
 
+       btrfs_evict_pending_snapshots(cur_trans);
+
        cur_trans->blocked = 0;
        wake_up(&root->fs_info->transaction_wait);
 
@@ -3819,8 +3849,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
        btrfs_destroy_delayed_inodes(root);
        btrfs_assert_delayed_root_empty(root);
 
-       btrfs_destroy_pending_snapshots(cur_trans);
-
        btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages,
                                     EXTENT_DIRTY);
        btrfs_destroy_pinned_extent(root,
@@ -3846,10 +3874,8 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
 
        while (!list_empty(&list)) {
                t = list_entry(list.next, struct btrfs_transaction, list);
-               if (!t)
-                       break;
 
-               btrfs_destroy_ordered_operations(root);
+               btrfs_destroy_ordered_operations(t, root);
 
                btrfs_destroy_ordered_extents(root);
 
@@ -3866,6 +3892,8 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
                if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
                        wake_up(&root->fs_info->transaction_blocked_wait);
 
+               btrfs_evict_pending_snapshots(t);
+
                t->blocked = 0;
                smp_mb();
                if (waitqueue_active(&root->fs_info->transaction_wait))
@@ -3879,8 +3907,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
                btrfs_destroy_delayed_inodes(root);
                btrfs_assert_delayed_root_empty(root);
 
-               btrfs_destroy_pending_snapshots(t);
-
                btrfs_destroy_delalloc_inodes(root);
 
                spin_lock(&root->fs_info->trans_lock);
This page took 0.033823 seconds and 5 git commands to generate.