Btrfs: fail to mount if we have problems reading the block groups
[deliverable/linux.git] / fs / btrfs / extent-tree.c
index e238a0cdac677c06b1774018bf549b03399da4c8..4c910359c807bc825a7049d5d8aaacf4ccd4bd13 100644 (file)
@@ -83,6 +83,17 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
        return (cache->flags & bits) == bits;
 }
 
+void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
+{
+       atomic_inc(&cache->count);
+}
+
+void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
+{
+       if (atomic_dec_and_test(&cache->count))
+               kfree(cache);
+}
+
 /*
  * this adds the block group to the fs_info rb tree for the block group
  * cache
@@ -156,7 +167,7 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
                }
        }
        if (ret)
-               atomic_inc(&ret->count);
+               btrfs_get_block_group(ret);
        spin_unlock(&info->block_group_cache_lock);
 
        return ret;
@@ -195,6 +206,14 @@ static int exclude_super_stripes(struct btrfs_root *root,
        int stripe_len;
        int i, nr, ret;
 
+       if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
+               stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
+               cache->bytes_super += stripe_len;
+               ret = add_excluded_extent(root, cache->key.objectid,
+                                         stripe_len);
+               BUG_ON(ret);
+       }
+
        for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
                bytenr = btrfs_sb_offset(i);
                ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
@@ -255,7 +274,7 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
                if (ret)
                        break;
 
-               if (extent_start == start) {
+               if (extent_start <= start) {
                        start = extent_end + 1;
                } else if (extent_start > start && extent_start < end) {
                        size = extent_start - start;
@@ -399,6 +418,8 @@ err:
 
        put_caching_control(caching_ctl);
        atomic_dec(&block_group->space_info->caching_threads);
+       btrfs_put_block_group(block_group);
+
        return 0;
 }
 
@@ -439,6 +460,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache)
        up_write(&fs_info->extent_commit_sem);
 
        atomic_inc(&cache->space_info->caching_threads);
+       btrfs_get_block_group(cache);
 
        tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
                          cache->key.objectid);
@@ -478,12 +500,6 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
        return cache;
 }
 
-void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
-{
-       if (atomic_dec_and_test(&cache->count))
-               kfree(cache);
-}
-
 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
                                                  u64 flags)
 {
@@ -2574,7 +2590,7 @@ next_block_group(struct btrfs_root *root,
        if (node) {
                cache = rb_entry(node, struct btrfs_block_group_cache,
                                 cache_node);
-               atomic_inc(&cache->count);
+               btrfs_get_block_group(cache);
        } else
                cache = NULL;
        spin_unlock(&root->fs_info->block_group_cache_lock);
@@ -2880,9 +2896,9 @@ static noinline void flush_delalloc_async(struct btrfs_work *work)
        root = async->root;
        info = async->info;
 
-       btrfs_start_delalloc_inodes(root);
+       btrfs_start_delalloc_inodes(root, 0);
        wake_up(&info->flush_wait);
-       btrfs_wait_ordered_extents(root, 0);
+       btrfs_wait_ordered_extents(root, 0, 0);
 
        spin_lock(&info->lock);
        info->flushing = 0;
@@ -2956,8 +2972,8 @@ static void flush_delalloc(struct btrfs_root *root,
        return;
 
 flush:
-       btrfs_start_delalloc_inodes(root);
-       btrfs_wait_ordered_extents(root, 0);
+       btrfs_start_delalloc_inodes(root, 0);
+       btrfs_wait_ordered_extents(root, 0, 0);
 
        spin_lock(&info->lock);
        info->flushing = 0;
@@ -2977,10 +2993,10 @@ static int maybe_allocate_chunk(struct btrfs_root *root,
 
        free_space = btrfs_super_total_bytes(disk_super);
        /*
-        * we allow the metadata to grow to a max of either 5gb or 5% of the
+        * we allow the metadata to grow to a max of either 10gb or 5% of the
         * space in the volume.
         */
-       min_metadata = min((u64)5 * 1024 * 1024 * 1024,
+       min_metadata = min((u64)10 * 1024 * 1024 * 1024,
                             div64_u64(free_space * 5, 100));
        if (info->total_bytes >= min_metadata) {
                spin_unlock(&info->lock);
@@ -3454,14 +3470,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
        else
                old_val -= num_bytes;
        btrfs_set_super_bytes_used(&info->super_copy, old_val);
-
-       /* block accounting for root item */
-       old_val = btrfs_root_used(&root->root_item);
-       if (alloc)
-               old_val += num_bytes;
-       else
-               old_val -= num_bytes;
-       btrfs_set_root_used(&root->root_item, old_val);
        spin_unlock(&info->delalloc_lock);
 
        while (total) {
@@ -4049,6 +4057,21 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
        return ret;
 }
 
+int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
+                         struct btrfs_root *root,
+                         u64 bytenr, u32 blocksize,
+                         u64 parent, u64 root_objectid, int level)
+{
+       u64 used;
+       spin_lock(&root->node_lock);
+       used = btrfs_root_used(&root->root_item) - blocksize;
+       btrfs_set_root_used(&root->root_item, used);
+       spin_unlock(&root->node_lock);
+
+       return btrfs_free_extent(trans, root, bytenr, blocksize,
+                                parent, root_objectid, level, 0);
+}
+
 static u64 stripe_align(struct btrfs_root *root, u64 val)
 {
        u64 mask = ((u64)root->stripesize - 1);
@@ -4102,7 +4125,7 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
 }
 
 enum btrfs_loop_type {
-       LOOP_CACHED_ONLY = 0,
+       LOOP_FIND_IDEAL = 0,
        LOOP_CACHING_NOWAIT = 1,
        LOOP_CACHING_WAIT = 2,
        LOOP_ALLOC_CHUNK = 3,
@@ -4131,12 +4154,15 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
        struct btrfs_block_group_cache *block_group = NULL;
        int empty_cluster = 2 * 1024 * 1024;
        int allowed_chunk_alloc = 0;
+       int done_chunk_alloc = 0;
        struct btrfs_space_info *space_info;
        int last_ptr_loop = 0;
        int loop = 0;
        bool found_uncached_bg = false;
        bool failed_cluster_refill = false;
        bool failed_alloc = false;
+       u64 ideal_cache_percent = 0;
+       u64 ideal_cache_offset = 0;
 
        WARN_ON(num_bytes < root->sectorsize);
        btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -4144,6 +4170,10 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
        ins->offset = 0;
 
        space_info = __find_space_info(root->fs_info, data);
+       if (!space_info) {
+               printk(KERN_ERR "No space info for %d\n", data);
+               return -ENOSPC;
+       }
 
        if (orig_root->ref_cows || empty_size)
                allowed_chunk_alloc = 1;
@@ -4172,14 +4202,19 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
                empty_cluster = 0;
 
        if (search_start == hint_byte) {
+ideal_cache:
                block_group = btrfs_lookup_block_group(root->fs_info,
                                                       search_start);
                /*
                 * we don't want to use the block group if it doesn't match our
                 * allocation bits, or if its not cached.
+                *
+                * However if we are re-searching with an ideal block group
+                * picked out then we don't care that the block group is cached.
                 */
                if (block_group && block_group_bits(block_group, data) &&
-                   block_group_cache_done(block_group)) {
+                   (block_group->cached != BTRFS_CACHE_NO ||
+                    search_start == ideal_cache_offset)) {
                        down_read(&space_info->groups_sem);
                        if (list_empty(&block_group->list) ||
                            block_group->ro) {
@@ -4191,46 +4226,63 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
                                 */
                                btrfs_put_block_group(block_group);
                                up_read(&space_info->groups_sem);
-                       } else
+                       } else {
                                goto have_block_group;
+                       }
                } else if (block_group) {
                        btrfs_put_block_group(block_group);
                }
        }
-
 search:
        down_read(&space_info->groups_sem);
        list_for_each_entry(block_group, &space_info->block_groups, list) {
                u64 offset;
                int cached;
 
-               atomic_inc(&block_group->count);
+               btrfs_get_block_group(block_group);
                search_start = block_group->key.objectid;
 
 have_block_group:
                if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
+                       u64 free_percent;
+
+                       free_percent = btrfs_block_group_used(&block_group->item);
+                       free_percent *= 100;
+                       free_percent = div64_u64(free_percent,
+                                                block_group->key.offset);
+                       free_percent = 100 - free_percent;
+                       if (free_percent > ideal_cache_percent &&
+                           likely(!block_group->ro)) {
+                               ideal_cache_offset = block_group->key.objectid;
+                               ideal_cache_percent = free_percent;
+                       }
+
                        /*
-                        * we want to start caching kthreads, but not too many
-                        * right off the bat so we don't overwhelm the system,
-                        * so only start them if there are less than 2 and we're
-                        * in the initial allocation phase.
+                        * We only want to start kthread caching if we are at
+                        * the point where we will wait for caching to make
+                        * progress, or if our ideal search is over and we've
+                        * found somebody to start caching.
                         */
                        if (loop > LOOP_CACHING_NOWAIT ||
-                           atomic_read(&space_info->caching_threads) < 2) {
+                           (loop > LOOP_FIND_IDEAL &&
+                            atomic_read(&space_info->caching_threads) < 2)) {
                                ret = cache_block_group(block_group);
                                BUG_ON(ret);
                        }
-               }
-
-               cached = block_group_cache_done(block_group);
-               if (unlikely(!cached)) {
                        found_uncached_bg = true;
 
-                       /* if we only want cached bgs, loop */
-                       if (loop == LOOP_CACHED_ONLY)
+                       /*
+                        * If loop is set for cached only, try the next block
+                        * group.
+                        */
+                       if (loop == LOOP_FIND_IDEAL)
                                goto loop;
                }
 
+               cached = block_group_cache_done(block_group);
+               if (unlikely(!cached))
+                       found_uncached_bg = true;
+
                if (unlikely(block_group->ro))
                        goto loop;
 
@@ -4275,7 +4327,7 @@ have_block_group:
 
                                btrfs_put_block_group(block_group);
                                block_group = last_ptr->block_group;
-                               atomic_inc(&block_group->count);
+                               btrfs_get_block_group(block_group);
                                spin_unlock(&last_ptr->lock);
                                spin_unlock(&last_ptr->refill_lock);
 
@@ -4410,9 +4462,11 @@ loop:
        }
        up_read(&space_info->groups_sem);
 
-       /* LOOP_CACHED_ONLY, only search fully cached block groups
-        * LOOP_CACHING_NOWAIT, search partially cached block groups, but
-        *                      dont wait foR them to finish caching
+       /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for
+        *                      for them to make caching progress.  Also
+        *                      determine the best possible bg to cache
+        * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
+        *                      caching kthreads as we move along
         * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
         * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
         * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
@@ -4421,12 +4475,47 @@ loop:
        if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE &&
            (found_uncached_bg || empty_size || empty_cluster ||
             allowed_chunk_alloc)) {
-               if (found_uncached_bg) {
+               if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
                        found_uncached_bg = false;
-                       if (loop < LOOP_CACHING_WAIT) {
-                               loop++;
+                       loop++;
+                       if (!ideal_cache_percent &&
+                           atomic_read(&space_info->caching_threads))
                                goto search;
-                       }
+
+                       /*
+                        * 1 of the following 2 things have happened so far
+                        *
+                        * 1) We found an ideal block group for caching that
+                        * is mostly full and will cache quickly, so we might
+                        * as well wait for it.
+                        *
+                        * 2) We searched for cached only and we didn't find
+                        * anything, and we didn't start any caching kthreads
+                        * either, so chances are we will loop through and
+                        * start a couple caching kthreads, and then come back
+                        * around and just wait for them.  This will be slower
+                        * because we will have 2 caching kthreads reading at
+                        * the same time when we could have just started one
+                        * and waited for it to get far enough to give us an
+                        * allocation, so go ahead and go to the wait caching
+                        * loop.
+                        */
+                       loop = LOOP_CACHING_WAIT;
+                       search_start = ideal_cache_offset;
+                       ideal_cache_percent = 0;
+                       goto ideal_cache;
+               } else if (loop == LOOP_FIND_IDEAL) {
+                       /*
+                        * Didn't find a uncached bg, wait on anything we find
+                        * next.
+                        */
+                       loop = LOOP_CACHING_WAIT;
+                       goto search;
+               }
+
+               if (loop < LOOP_CACHING_WAIT) {
+                       loop++;
+                       goto search;
                }
 
                if (loop == LOOP_ALLOC_CHUNK) {
@@ -4438,7 +4527,8 @@ loop:
                        ret = do_chunk_alloc(trans, root, num_bytes +
                                             2 * 1024 * 1024, data, 1);
                        allowed_chunk_alloc = 0;
-               } else {
+                       done_chunk_alloc = 1;
+               } else if (!done_chunk_alloc) {
                        space_info->force_alloc = 1;
                }
 
@@ -4515,7 +4605,6 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
 {
        int ret;
        u64 search_start = 0;
-       struct btrfs_fs_info *info = root->fs_info;
 
        data = btrfs_get_alloc_profile(root, data);
 again:
@@ -4523,17 +4612,9 @@ again:
         * the only place that sets empty_size is btrfs_realloc_node, which
         * is not called recursively on allocations
         */
-       if (empty_size || root->ref_cows) {
-               if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
-                       ret = do_chunk_alloc(trans, root->fs_info->extent_root,
-                                    2 * 1024 * 1024,
-                                    BTRFS_BLOCK_GROUP_METADATA |
-                                    (info->metadata_alloc_profile &
-                                     info->avail_metadata_alloc_bits), 0);
-               }
+       if (empty_size || root->ref_cows)
                ret = do_chunk_alloc(trans, root->fs_info->extent_root,
                                     num_bytes + 2 * 1024 * 1024, data, 0);
-       }
 
        WARN_ON(num_bytes < root->sectorsize);
        ret = find_free_extent(trans, root, num_bytes, empty_size,
@@ -4834,6 +4915,14 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
                                        extent_op);
                BUG_ON(ret);
        }
+
+       if (root_objectid == root->root_key.objectid) {
+               u64 used;
+               spin_lock(&root->node_lock);
+               used = btrfs_root_used(&root->root_item) + num_bytes;
+               btrfs_set_root_used(&root->root_item, used);
+               spin_unlock(&root->node_lock);
+       }
        return ret;
 }
 
@@ -4856,8 +4945,16 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
        btrfs_set_buffer_uptodate(buf);
 
        if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
-               set_extent_dirty(&root->dirty_log_pages, buf->start,
-                        buf->start + buf->len - 1, GFP_NOFS);
+               /*
+                * we allow two log transactions at a time, use different
+                * EXENT bit to differentiate dirty pages.
+                */
+               if (root->log_transid % 2 == 0)
+                       set_extent_dirty(&root->dirty_log_pages, buf->start,
+                                       buf->start + buf->len - 1, GFP_NOFS);
+               else
+                       set_extent_new(&root->dirty_log_pages, buf->start,
+                                       buf->start + buf->len - 1, GFP_NOFS);
        } else {
                set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
                         buf->start + buf->len - 1, GFP_NOFS);
@@ -5112,6 +5209,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
        next = btrfs_find_tree_block(root, bytenr, blocksize);
        if (!next) {
                next = btrfs_find_create_tree_block(root, bytenr, blocksize);
+               if (!next)
+                       return -ENOMEM;
                reada = 1;
        }
        btrfs_tree_lock(next);
@@ -5309,10 +5408,6 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
        int ret;
 
        while (level >= 0) {
-               if (path->slots[level] >=
-                   btrfs_header_nritems(path->nodes[level]))
-                       break;
-
                ret = walk_down_proc(trans, root, path, wc, lookup_info);
                if (ret > 0)
                        break;
@@ -5320,11 +5415,16 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
                if (level == 0)
                        break;
 
+               if (path->slots[level] >=
+                   btrfs_header_nritems(path->nodes[level]))
+                       break;
+
                ret = do_walk_down(trans, root, path, wc, &lookup_info);
                if (ret > 0) {
                        path->slots[level]++;
                        continue;
-               }
+               } else if (ret < 0)
+                       return ret;
                level = wc->level;
        }
        return 0;
@@ -6468,6 +6568,7 @@ static noinline int invalidate_extent_cache(struct btrfs_root *root,
        struct btrfs_key key;
        struct inode *inode = NULL;
        struct btrfs_file_extent_item *fi;
+       struct extent_state *cached_state = NULL;
        u64 num_bytes;
        u64 skip_objectid = 0;
        u32 nritems;
@@ -6496,12 +6597,14 @@ static noinline int invalidate_extent_cache(struct btrfs_root *root,
                }
                num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
 
-               lock_extent(&BTRFS_I(inode)->io_tree, key.offset,
-                           key.offset + num_bytes - 1, GFP_NOFS);
+               lock_extent_bits(&BTRFS_I(inode)->io_tree, key.offset,
+                                key.offset + num_bytes - 1, 0, &cached_state,
+                                GFP_NOFS);
                btrfs_drop_extent_cache(inode, key.offset,
                                        key.offset + num_bytes - 1, 1);
-               unlock_extent(&BTRFS_I(inode)->io_tree, key.offset,
-                             key.offset + num_bytes - 1, GFP_NOFS);
+               unlock_extent_cached(&BTRFS_I(inode)->io_tree, key.offset,
+                                    key.offset + num_bytes - 1, &cached_state,
+                                    GFP_NOFS);
                cond_resched();
        }
        iput(inode);
@@ -7273,7 +7376,6 @@ static int find_first_block_group(struct btrfs_root *root,
                }
                path->slots[0]++;
        }
-       ret = -ENOENT;
 out:
        return ret;
 }
@@ -7310,9 +7412,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
                        wait_block_group_cache_done(block_group);
 
                btrfs_remove_free_space_cache(block_group);
-
-               WARN_ON(atomic_read(&block_group->count) != 1);
-               kfree(block_group);
+               btrfs_put_block_group(block_group);
 
                spin_lock(&info->block_group_cache_lock);
        }
This page took 0.03144 seconds and 5 git commands to generate.