Btrfs: add fragment=* debug mount option

[deliverable/linux.git] / fs / btrfs / extent-tree.c
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c

index 522fb45d472ae9673cdbcd8c1f3ffccafa4a476f..0e32abf53b5bf2a7e6482271d8e9c00115d19ec6 100644 (file)
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -332,6 +332,27 @@ static void put_caching_control(struct btrfs_caching_control *ctl)
                 kfree(ctl);
  }
  
+#ifdef CONFIG_BTRFS_DEBUG
+static void fragment_free_space(struct btrfs_root *root,
+                               struct btrfs_block_group_cache *block_group)
+{
+       u64 start = block_group->key.objectid;
+       u64 len = block_group->key.offset;
+       u64 chunk = block_group->flags & BTRFS_BLOCK_GROUP_METADATA ?
+               root->nodesize : root->sectorsize;
+       u64 step = chunk << 1;
+
+       while (len > chunk) {
+               btrfs_remove_free_space(block_group, start, chunk);
+               start += step;
+               if (len < step)
+                       len = 0;
+               else
+                       len -= step;
+       }
+}
+#endif
+
  /*
   * this is only called by cache_block_group, since we could have freed extents
   * we need to check the pinned_extents for any extents that can't be used yet
@@ -388,6 +409,7 @@ static noinline void caching_thread(struct btrfs_work *work)
         u64 last = 0;
         u32 nritems;
         int ret = -ENOMEM;
+       bool wakeup = true;
  
         caching_ctl = container_of(work, struct btrfs_caching_control, work);
         block_group = caching_ctl->block_group;
@@ -400,6 +422,15 @@ static noinline void caching_thread(struct btrfs_work *work)
  
         last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
  
+#ifdef CONFIG_BTRFS_DEBUG
+       /*
+        * If we're fragmenting we don't want to make anybody think we can
+        * allocate from this block group until we've had a chance to fragment
+        * the free space.
+        */
+       if (btrfs_should_fragment_free_space(extent_root, block_group))
+               wakeup = false;
+#endif
         /*
          * We don't want to deadlock with somebody trying to allocate a new
          * extent for the extent root while also trying to search the extent
@@ -441,7 +472,8 @@ next:
  
                         if (need_resched() ||
                             rwsem_is_contended(&fs_info->commit_root_sem)) {
-                               caching_ctl->progress = last;
+                               if (wakeup)
+                                       caching_ctl->progress = last;
                                 btrfs_release_path(path);
                                 up_read(&fs_info->commit_root_sem);
                                 mutex_unlock(&caching_ctl->mutex);
@@ -464,7 +496,8 @@ next:
                         key.offset = 0;
                         key.type = BTRFS_EXTENT_ITEM_KEY;
  
-                       caching_ctl->progress = last;
+                       if (wakeup)
+                               caching_ctl->progress = last;
                         btrfs_release_path(path);
                         goto next;
                 }
@@ -491,7 +524,8 @@ next:
  
                         if (total_found > (1024 * 1024 * 2)) {
                                 total_found = 0;
-                               wake_up(&caching_ctl->wait);
+                               if (wakeup)
+                                       wake_up(&caching_ctl->wait);
                         }
                 }
                 path->slots[0]++;
@@ -501,13 +535,27 @@ next:
         total_found += add_new_free_space(block_group, fs_info, last,
                                           block_group->key.objectid +
                                           block_group->key.offset);
-       caching_ctl->progress = (u64)-1;
-
         spin_lock(&block_group->lock);
         block_group->caching_ctl = NULL;
         block_group->cached = BTRFS_CACHE_FINISHED;
         spin_unlock(&block_group->lock);
  
+#ifdef CONFIG_BTRFS_DEBUG
+       if (btrfs_should_fragment_free_space(extent_root, block_group)) {
+               u64 bytes_used;
+
+               spin_lock(&block_group->space_info->lock);
+               spin_lock(&block_group->lock);
+               bytes_used = block_group->key.offset -
+                       btrfs_block_group_used(&block_group->item);
+               block_group->space_info->bytes_used += bytes_used >> 1;
+               spin_unlock(&block_group->lock);
+               spin_unlock(&block_group->space_info->lock);
+               fragment_free_space(extent_root, block_group);
+       }
+#endif
+
+       caching_ctl->progress = (u64)-1;
  err:
         btrfs_free_path(path);
         up_read(&fs_info->commit_root_sem);
@@ -607,6 +655,22 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
                         }
                 }
                 spin_unlock(&cache->lock);
+#ifdef CONFIG_BTRFS_DEBUG
+               if (ret == 1 &&
+                   btrfs_should_fragment_free_space(fs_info->extent_root,
+                                                    cache)) {
+                       u64 bytes_used;
+
+                       spin_lock(&cache->space_info->lock);
+                       spin_lock(&cache->lock);
+                       bytes_used = cache->key.offset -
+                               btrfs_block_group_used(&cache->item);
+                       cache->space_info->bytes_used += bytes_used >> 1;
+                       spin_unlock(&cache->lock);
+                       spin_unlock(&cache->space_info->lock);
+                       fragment_free_space(fs_info->extent_root, cache);
+               }
+#endif
                 mutex_unlock(&caching_ctl->mutex);
  
                 wake_up(&caching_ctl->wait);
@@ -3822,7 +3886,8 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
  {
         u64 num_devices = root->fs_info->fs_devices->rw_devices;
         u64 target;
-       u64 tmp;
+       u64 raid_type;
+       u64 allowed = 0;
  
         /*
          * see if restripe for this chunk_type is in progress, if so
@@ -3840,31 +3905,26 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
         spin_unlock(&root->fs_info->balance_lock);
  
         /* First, mask out the RAID levels which aren't possible */
-       if (num_devices == 1)
-               flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0 |
-                          BTRFS_BLOCK_GROUP_RAID5);
-       if (num_devices < 3)
-               flags &= ~BTRFS_BLOCK_GROUP_RAID6;
-       if (num_devices < 4)
-               flags &= ~BTRFS_BLOCK_GROUP_RAID10;
-
-       tmp = flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID0 |
-                      BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID5 |
-                      BTRFS_BLOCK_GROUP_RAID6 | BTRFS_BLOCK_GROUP_RAID10);
-       flags &= ~tmp;
-
-       if (tmp & BTRFS_BLOCK_GROUP_RAID6)
-               tmp = BTRFS_BLOCK_GROUP_RAID6;
-       else if (tmp & BTRFS_BLOCK_GROUP_RAID5)
-               tmp = BTRFS_BLOCK_GROUP_RAID5;
-       else if (tmp & BTRFS_BLOCK_GROUP_RAID10)
-               tmp = BTRFS_BLOCK_GROUP_RAID10;
-       else if (tmp & BTRFS_BLOCK_GROUP_RAID1)
-               tmp = BTRFS_BLOCK_GROUP_RAID1;
-       else if (tmp & BTRFS_BLOCK_GROUP_RAID0)
-               tmp = BTRFS_BLOCK_GROUP_RAID0;
-
-       return extended_to_chunk(flags | tmp);
+       for (raid_type = 0; raid_type < BTRFS_NR_RAID_TYPES; raid_type++) {
+               if (num_devices >= btrfs_raid_array[raid_type].devs_min)
+                       allowed |= btrfs_raid_group[raid_type];
+       }
+       allowed &= flags;
+
+       if (allowed & BTRFS_BLOCK_GROUP_RAID6)
+               allowed = BTRFS_BLOCK_GROUP_RAID6;
+       else if (allowed & BTRFS_BLOCK_GROUP_RAID5)
+               allowed = BTRFS_BLOCK_GROUP_RAID5;
+       else if (allowed & BTRFS_BLOCK_GROUP_RAID10)
+               allowed = BTRFS_BLOCK_GROUP_RAID10;
+       else if (allowed & BTRFS_BLOCK_GROUP_RAID1)
+               allowed = BTRFS_BLOCK_GROUP_RAID1;
+       else if (allowed & BTRFS_BLOCK_GROUP_RAID0)
+               allowed = BTRFS_BLOCK_GROUP_RAID0;
+
+       flags &= ~BTRFS_BLOCK_GROUP_PROFILE_MASK;
+
+       return extended_to_chunk(flags | allowed);
  }
  
  static u64 get_alloc_profile(struct btrfs_root *root, u64 orig_flags)
@@ -4891,13 +4951,9 @@ static struct btrfs_block_rsv *get_block_rsv(
  {
         struct btrfs_block_rsv *block_rsv = NULL;
  
-       if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
-               block_rsv = trans->block_rsv;
-
-       if (root == root->fs_info->csum_root && trans->adding_csums)
-               block_rsv = trans->block_rsv;
-
-       if (root == root->fs_info->uuid_root)
+       if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
+           (root == root->fs_info->csum_root && trans->adding_csums) ||
+            (root == root->fs_info->uuid_root))
                 block_rsv = trans->block_rsv;
  
         if (!block_rsv)
@@ -9632,6 +9688,14 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
  
         free_excluded_extents(root, cache);
  
+#ifdef CONFIG_BTRFS_DEBUG
+       if (btrfs_should_fragment_free_space(root, cache)) {
+               u64 new_bytes_used = size - bytes_used;
+
+               bytes_used += new_bytes_used >> 1;
+               fragment_free_space(root, cache);
+       }
+#endif
         /*
          * Call to ensure the corresponding space_info object is created and
          * assigned to our block group, but don't update its counters just yet.