Btrfs: Deal with failed writes in mirrored configurations
[deliverable/linux.git] / fs / btrfs / extent-tree.c
index 14eb8fc87015d7d25099ec1622fa648cf6d74162..f94794a993291aff636177cf4bd7a7f46913347a 100644 (file)
  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  * Boston, MA 021110-1307, USA.
  */
-
 #include <linux/sched.h>
-#include <linux/crc32c.h>
 #include <linux/pagemap.h>
+#include <linux/writeback.h>
 #include "hash.h"
+#include "crc32c.h"
 #include "ctree.h"
 #include "disk-io.h"
 #include "print-tree.h"
@@ -36,10 +36,6 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct
                                 btrfs_root *extent_root);
 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
                               btrfs_root *extent_root);
-int btrfs_make_block_group(struct btrfs_trans_handle *trans,
-                          struct btrfs_root *root, u64 bytes_used,
-                          u64 type, u64 chunk_tree, u64 chunk_objectid,
-                          u64 size);
 
 
 static int cache_block_group(struct btrfs_root *root,
@@ -172,7 +168,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
 
 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
 {
-       return (cache->flags & bits);
+       return (cache->flags & bits) == bits;
 }
 
 static int noinline find_search_start(struct btrfs_root *root,
@@ -192,6 +188,7 @@ static int noinline find_search_start(struct btrfs_root *root,
 
        if (!cache)
                goto out;
+
        total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
        free_space_cache = &root->fs_info->free_space_cache;
 
@@ -201,7 +198,7 @@ again:
                goto out;
 
        last = max(search_start, cache->key.objectid);
-       if (!block_group_bits(cache, data)) {
+       if (!block_group_bits(cache, data) || cache->ro) {
                goto new_group;
        }
 
@@ -226,13 +223,19 @@ again:
                        continue;
                }
                spin_unlock_irq(&free_space_cache->lock);
+               if (cache->ro)
+                       goto new_group;
                if (start + num > cache->key.objectid + cache->key.offset)
                        goto new_group;
                if (start + num  > total_fs_bytes)
                        goto new_group;
+               if (!block_group_bits(cache, data)) {
+                       printk("block group bits don't match %Lu %d\n", cache->flags, data);
+               }
                *start_ret = start;
                return 0;
-       } out:
+       }
+out:
        cache = btrfs_lookup_block_group(root->fs_info, search_start);
        if (!cache) {
                printk("Unable to find block group for %Lu\n", search_start);
@@ -307,20 +310,20 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
        int bit;
        int ret;
        int full_search = 0;
-       int factor = 8;
+       int factor = 10;
 
        block_group_cache = &info->block_group_cache;
        total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
 
-       if (!owner)
-               factor = 8;
+       if (data & BTRFS_BLOCK_GROUP_METADATA)
+               factor = 9;
 
        bit = block_group_state_bits(data);
 
        if (search_start && search_start < total_fs_bytes) {
                struct btrfs_block_group_cache *shint;
                shint = btrfs_lookup_block_group(info, search_start);
-               if (shint && block_group_bits(shint, data)) {
+               if (shint && block_group_bits(shint, data) && !shint->ro) {
                        used = btrfs_block_group_used(&shint->item);
                        if (used + shint->pinned <
                            div_factor(shint->key.offset, factor)) {
@@ -328,7 +331,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
                        }
                }
        }
-       if (hint && block_group_bits(hint, data) &&
+       if (hint && !hint->ro && block_group_bits(hint, data) &&
            hint->key.objectid < total_fs_bytes) {
                used = btrfs_block_group_used(&hint->item);
                if (used + hint->pinned <
@@ -365,14 +368,17 @@ again:
                if (cache->key.objectid > total_fs_bytes)
                        break;
 
-               if (full_search)
-                       free_check = cache->key.offset;
-               else
-                       free_check = div_factor(cache->key.offset, factor);
+               if (!cache->ro && block_group_bits(cache, data)) {
+                       if (full_search)
+                               free_check = cache->key.offset;
+                       else
+                               free_check = div_factor(cache->key.offset,
+                                                       factor);
 
-               if (used + cache->pinned < free_check) {
-                       found_group = cache;
-                       goto found;
+                       if (used + cache->pinned < free_check) {
+                               found_group = cache;
+                               goto found;
+                       }
                }
                cond_resched();
        }
@@ -391,16 +397,15 @@ static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
        u32 high_crc = ~(u32)0;
        u32 low_crc = ~(u32)0;
        __le64 lenum;
-
        lenum = cpu_to_le64(root_objectid);
-       high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
+       high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
        lenum = cpu_to_le64(ref_generation);
-       low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
+       low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
        if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
                lenum = cpu_to_le64(owner);
-               low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
+               low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
                lenum = cpu_to_le64(owner_offset);
-               low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
+               low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
        }
        return ((u64)high_crc << 32) | (u64)low_crc;
 }
@@ -639,7 +644,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
        if (!path)
                return -ENOMEM;
 
-       path->reada = 0;
+       path->reada = 1;
        key.objectid = bytenr;
        btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
        key.offset = num_bytes;
@@ -659,7 +664,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
 
        btrfs_release_path(root->fs_info->extent_root, path);
 
-       path->reada = 0;
+       path->reada = 1;
        ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
                                          path, bytenr, root_objectid,
                                          ref_generation, owner, owner_offset);
@@ -691,7 +696,7 @@ static int lookup_extent_ref(struct btrfs_trans_handle *trans,
 
        WARN_ON(num_bytes < root->sectorsize);
        path = btrfs_alloc_path();
-       path->reada = 0;
+       path->reada = 1;
        key.objectid = bytenr;
        key.offset = num_bytes;
        btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
@@ -975,7 +980,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                ret = get_state_private(block_group_cache, start, &ptr);
                if (ret)
                        break;
-
                cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
                err = write_one_cache_group(trans, root,
                                            path, cache);
@@ -1010,6 +1014,71 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
 
 }
 
+static int update_space_info(struct btrfs_fs_info *info, u64 flags,
+                            u64 total_bytes, u64 bytes_used,
+                            struct btrfs_space_info **space_info)
+{
+       struct btrfs_space_info *found;
+
+       found = __find_space_info(info, flags);
+       if (found) {
+               found->total_bytes += total_bytes;
+               found->bytes_used += bytes_used;
+               found->full = 0;
+               WARN_ON(found->total_bytes < found->bytes_used);
+               *space_info = found;
+               return 0;
+       }
+       found = kmalloc(sizeof(*found), GFP_NOFS);
+       if (!found)
+               return -ENOMEM;
+
+       list_add(&found->list, &info->space_info);
+       found->flags = flags;
+       found->total_bytes = total_bytes;
+       found->bytes_used = bytes_used;
+       found->bytes_pinned = 0;
+       found->full = 0;
+       *space_info = found;
+       return 0;
+}
+
+static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
+{
+       u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
+                                  BTRFS_BLOCK_GROUP_RAID1 |
+                                  BTRFS_BLOCK_GROUP_RAID10 |
+                                  BTRFS_BLOCK_GROUP_DUP);
+       if (extra_flags) {
+               if (flags & BTRFS_BLOCK_GROUP_DATA)
+                       fs_info->avail_data_alloc_bits |= extra_flags;
+               if (flags & BTRFS_BLOCK_GROUP_METADATA)
+                       fs_info->avail_metadata_alloc_bits |= extra_flags;
+               if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
+                       fs_info->avail_system_alloc_bits |= extra_flags;
+       }
+}
+
+static u64 reduce_alloc_profile(u64 flags)
+{
+       if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
+           (flags & (BTRFS_BLOCK_GROUP_RAID1 |
+                     BTRFS_BLOCK_GROUP_RAID10)))
+               flags &= ~BTRFS_BLOCK_GROUP_DUP;
+
+       if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
+           (flags & BTRFS_BLOCK_GROUP_RAID10))
+               flags &= ~BTRFS_BLOCK_GROUP_RAID1;
+
+       if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
+           ((flags & BTRFS_BLOCK_GROUP_RAID1) |
+            (flags & BTRFS_BLOCK_GROUP_RAID10) |
+            (flags & BTRFS_BLOCK_GROUP_DUP)))
+               flags &= ~BTRFS_BLOCK_GROUP_RAID0;
+       return flags;
+}
+
+
 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
                          struct btrfs_root *extent_root, u64 alloc_bytes,
                          u64 flags)
@@ -1020,13 +1089,20 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
        u64 num_bytes;
        int ret;
 
+       flags = reduce_alloc_profile(flags);
+
        space_info = __find_space_info(extent_root->fs_info, flags);
+       if (!space_info) {
+               ret = update_space_info(extent_root->fs_info, flags,
+                                       0, 0, &space_info);
+               BUG_ON(ret);
+       }
        BUG_ON(!space_info);
 
        if (space_info->full)
                return 0;
 
-       thresh = div_factor(space_info->total_bytes, 7);
+       thresh = div_factor(space_info->total_bytes, 6);
        if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
            thresh)
                return 0;
@@ -1041,9 +1117,9 @@ printk("space info full %Lu\n", flags);
        BUG_ON(ret);
 
        ret = btrfs_make_block_group(trans, extent_root, 0, flags,
-                    extent_root->fs_info->chunk_root->root_key.objectid,
-                    start, num_bytes);
+                    BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
        BUG_ON(ret);
+
        return 0;
 }
 
@@ -1231,7 +1307,9 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
                                    root->fs_info->running_transaction->transid;
                                u64 header_transid =
                                        btrfs_header_generation(buf);
-                               if (header_transid == transid) {
+                               if (header_transid == transid &&
+                                   !btrfs_header_flag(buf,
+                                              BTRFS_HEADER_FLAG_WRITTEN)) {
                                        clean_tree_block(NULL, root, buf);
                                        free_extent_buffer(buf);
                                        return 1;
@@ -1277,7 +1355,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
        if (!path)
                return -ENOMEM;
 
-       path->reada = 0;
+       path->reada = 1;
        ret = lookup_extent_backref(trans, extent_root, path,
                                    bytenr, root_objectid,
                                    ref_generation,
@@ -1473,13 +1551,32 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans,
        struct btrfs_root * root = orig_root->fs_info->extent_root;
        struct btrfs_fs_info *info = root->fs_info;
        u64 total_needed = num_bytes;
+       u64 *last_ptr = NULL;
        struct btrfs_block_group_cache *block_group;
        int full_scan = 0;
        int wrapped = 0;
+       int empty_cluster = 2 * 1024 * 1024;
 
        WARN_ON(num_bytes < root->sectorsize);
        btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
 
+       if (data & BTRFS_BLOCK_GROUP_METADATA) {
+               last_ptr = &root->fs_info->last_alloc;
+               empty_cluster = 256 * 1024;
+       }
+
+       if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
+               last_ptr = &root->fs_info->last_data_alloc;
+       }
+
+       if (last_ptr) {
+               if (*last_ptr)
+                       hint_byte = *last_ptr;
+               else {
+                       empty_size += empty_cluster;
+               }
+       }
+
        if (search_end == (u64)-1)
                search_end = btrfs_super_total_bytes(&info->super_copy);
 
@@ -1489,11 +1586,14 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans,
                        hint_byte = search_start;
                block_group = btrfs_find_block_group(root, block_group,
                                                     hint_byte, data, 1);
+               if (last_ptr && *last_ptr == 0 && block_group)
+                       hint_byte = block_group->key.objectid;
        } else {
                block_group = btrfs_find_block_group(root,
                                                     trans->block_group,
                                                     search_start, data, 1);
        }
+       search_start = max(search_start, hint_byte);
 
        total_needed += empty_size;
 
@@ -1506,9 +1606,36 @@ check_failed:
        }
        ret = find_search_start(root, &block_group, &search_start,
                                total_needed, data);
+       if (ret == -ENOSPC && last_ptr && *last_ptr) {
+               *last_ptr = 0;
+               block_group = btrfs_lookup_block_group(info,
+                                                      orig_search_start);
+               search_start = orig_search_start;
+               ret = find_search_start(root, &block_group, &search_start,
+                                       total_needed, data);
+       }
+       if (ret == -ENOSPC)
+               goto enospc;
        if (ret)
                goto error;
 
+       if (last_ptr && *last_ptr && search_start != *last_ptr) {
+               *last_ptr = 0;
+               if (!empty_size) {
+                       empty_size += empty_cluster;
+                       total_needed += empty_size;
+               }
+               block_group = btrfs_lookup_block_group(info,
+                                                      orig_search_start);
+               search_start = orig_search_start;
+               ret = find_search_start(root, &block_group,
+                                       &search_start, total_needed, data);
+               if (ret == -ENOSPC)
+                       goto enospc;
+               if (ret)
+                       goto error;
+       }
+
        search_start = stripe_align(root, search_start);
        ins->objectid = search_start;
        ins->offset = num_bytes;
@@ -1547,6 +1674,13 @@ check_failed:
                        trans->block_group = block_group;
        }
        ins->offset = num_bytes;
+       if (last_ptr) {
+               *last_ptr = ins->objectid + ins->offset;
+               if (*last_ptr ==
+                   btrfs_super_total_bytes(&root->fs_info->super_copy)) {
+                       *last_ptr = 0;
+               }
+       }
        return 0;
 
 new_group:
@@ -1573,6 +1707,7 @@ enospc:
 error:
        return ret;
 }
+
 /*
  * finds a free extent and does all the dirty work required for allocation
  * returns the key for the extent through ins, and a tree buffer for
@@ -1582,17 +1717,18 @@ error:
  */
 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
                       struct btrfs_root *root,
-                      u64 num_bytes, u64 root_objectid, u64 ref_generation,
+                      u64 num_bytes, u64 min_alloc_size,
+                      u64 root_objectid, u64 ref_generation,
                       u64 owner, u64 owner_offset,
                       u64 empty_size, u64 hint_byte,
-                      u64 search_end, struct btrfs_key *ins, int data)
+                      u64 search_end, struct btrfs_key *ins, u64 data)
 {
        int ret;
        int pending_ret;
        u64 super_used;
        u64 root_used;
        u64 search_start = 0;
-       u64 new_hint;
+       u64 alloc_profile;
        u32 sizes[2];
        struct btrfs_fs_info *info = root->fs_info;
        struct btrfs_root *extent_root = info->extent_root;
@@ -1602,34 +1738,48 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
        struct btrfs_key keys[2];
 
        if (data) {
-               data = BTRFS_BLOCK_GROUP_DATA;
+               alloc_profile = info->avail_data_alloc_bits &
+                               info->data_alloc_profile;
+               data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
        } else if (root == root->fs_info->chunk_root) {
-               data = BTRFS_BLOCK_GROUP_SYSTEM;
+               alloc_profile = info->avail_system_alloc_bits &
+                               info->system_alloc_profile;
+               data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
        } else {
-               data = BTRFS_BLOCK_GROUP_METADATA;
+               alloc_profile = info->avail_metadata_alloc_bits &
+                               info->metadata_alloc_profile;
+               data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
        }
-
+again:
+       data = reduce_alloc_profile(data);
        if (root->ref_cows) {
-               if (data != BTRFS_BLOCK_GROUP_METADATA) {
+               if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
                        ret = do_chunk_alloc(trans, root->fs_info->extent_root,
-                                            num_bytes,
-                                            BTRFS_BLOCK_GROUP_METADATA);
+                                            2 * 1024 * 1024,
+                                            BTRFS_BLOCK_GROUP_METADATA |
+                                            (info->metadata_alloc_profile &
+                                             info->avail_metadata_alloc_bits));
                        BUG_ON(ret);
                }
                ret = do_chunk_alloc(trans, root->fs_info->extent_root,
-                                    num_bytes, data);
+                                    num_bytes + 2 * 1024 * 1024, data);
                BUG_ON(ret);
        }
 
-       new_hint = max(hint_byte, root->fs_info->alloc_start);
-       if (new_hint < btrfs_super_total_bytes(&info->super_copy))
-               hint_byte = new_hint;
-
        WARN_ON(num_bytes < root->sectorsize);
        ret = find_free_extent(trans, root, num_bytes, empty_size,
                               search_start, search_end, hint_byte, ins,
                               trans->alloc_exclude_start,
                               trans->alloc_exclude_nr, data);
+
+       if (ret == -ENOSPC && num_bytes > min_alloc_size) {
+               num_bytes = num_bytes >> 1;
+               num_bytes = max(num_bytes, min_alloc_size);
+               goto again;
+       }
+       if (ret) {
+               printk("allocation failed flags %Lu\n", data);
+       }
        BUG_ON(ret);
        if (ret)
                return ret;
@@ -1748,7 +1898,7 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
        int ret;
        struct extent_buffer *buf;
 
-       ret = btrfs_alloc_extent(trans, root, blocksize,
+       ret = btrfs_alloc_extent(trans, root, blocksize, blocksize,
                                 root_objectid, ref_generation,
                                 level, first_objectid, empty_size, hint,
                                 (u64)-1, &ins, 0);
@@ -1765,7 +1915,6 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
        }
        btrfs_set_header_generation(buf, trans->transid);
        clean_tree_block(trans, root, buf);
-       wait_on_tree_block_writeback(root, buf);
        btrfs_set_buffer_uptodate(buf);
 
        if (PageDirty(buf->first_page)) {
@@ -1775,10 +1924,6 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
 
        set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
                         buf->start + buf->len - 1, GFP_NOFS);
-       set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->io_tree,
-                       buf->start, buf->start + buf->len - 1,
-                       EXTENT_CSUM, GFP_NOFS);
-       buf->flags |= EXTENT_CSUM;
        if (!btrfs_test_opt(root, SSD))
                btrfs_set_buffer_defrag(buf);
        trans->blocks_used++;
@@ -1939,11 +2084,12 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
                if (!next || !btrfs_buffer_uptodate(next)) {
                        free_extent_buffer(next);
                        reada_walk_down(root, cur, path->slots[*level]);
+
                        mutex_unlock(&root->fs_info->fs_mutex);
                        next = read_tree_block(root, bytenr, blocksize);
                        mutex_lock(&root->fs_info->fs_mutex);
 
-                       /* we dropped the lock, check one more time */
+                       /* we've dropped the lock, double check */
                        ret = lookup_extent_ref(trans, root, bytenr,
                                                blocksize, &refs);
                        BUG_ON(ret);
@@ -1961,6 +2107,8 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
                                BUG_ON(ret);
                                continue;
                        }
+               } else if (next) {
+                       btrfs_verify_block_csum(root, next);
                }
                WARN_ON(*level <= 0);
                if (path->nodes[*level-1])
@@ -2143,18 +2291,24 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
        return 0;
 }
 
+static unsigned long calc_ra(unsigned long start, unsigned long last,
+                            unsigned long nr)
+{
+       return min(last, start + nr - 1);
+}
+
 static int noinline relocate_inode_pages(struct inode *inode, u64 start,
                                         u64 len)
 {
        u64 page_start;
        u64 page_end;
-       u64 delalloc_start;
-       u64 existing_delalloc;
        unsigned long last_index;
        unsigned long i;
        struct page *page;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        struct file_ra_state *ra;
+       unsigned long total_read = 0;
+       unsigned long ra_pages;
 
        ra = kzalloc(sizeof(*ra), GFP_NOFS);
 
@@ -2162,11 +2316,16 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
        i = start >> PAGE_CACHE_SHIFT;
        last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
 
+       ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages;
+
        file_ra_state_init(ra, inode->i_mapping);
-       btrfs_force_ra(inode->i_mapping, ra, NULL, i, last_index);
-       kfree(ra);
 
        for (; i <= last_index; i++) {
+               if (total_read % ra_pages == 0) {
+                       btrfs_force_ra(inode->i_mapping, ra, NULL, i,
+                                      calc_ra(i, last_index, ra_pages));
+               }
+               total_read++;
                page = grab_cache_page(inode->i_mapping, i);
                if (!page)
                        goto out_unlock;
@@ -2179,26 +2338,30 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
                                goto out_unlock;
                        }
                }
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+               ClearPageDirty(page);
+#else
+               cancel_dirty_page(page, PAGE_CACHE_SIZE);
+#endif
+               wait_on_page_writeback(page);
+               set_page_extent_mapped(page);
                page_start = (u64)page->index << PAGE_CACHE_SHIFT;
                page_end = page_start + PAGE_CACHE_SIZE - 1;
 
                lock_extent(io_tree, page_start, page_end, GFP_NOFS);
 
-               delalloc_start = page_start;
-               existing_delalloc = count_range_bits(io_tree,
-                                            &delalloc_start, page_end,
-                                            PAGE_CACHE_SIZE, EXTENT_DELALLOC);
-
+               set_page_dirty(page);
                set_extent_delalloc(io_tree, page_start,
                                    page_end, GFP_NOFS);
 
                unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
-               set_page_dirty(page);
                unlock_page(page);
                page_cache_release(page);
+               balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
        }
 
 out_unlock:
+       kfree(ra);
        mutex_unlock(&inode->i_mutex);
        return 0;
 }
@@ -2263,8 +2426,6 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root,
                        goto out;
                }
                relocate_inode_pages(inode, ref_offset, extent_key->offset);
-               /* FIXME, data=ordered will help get rid of this */
-               filemap_fdatawrite(inode->i_mapping);
                iput(inode);
                mutex_lock(&extent_root->fs_info->fs_mutex);
        } else {
@@ -2352,15 +2513,58 @@ out:
        return ret;
 }
 
-int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size)
+static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
+{
+       u64 num_devices;
+       u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
+               BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
+
+       num_devices = btrfs_super_num_devices(&root->fs_info->super_copy);
+       if (num_devices == 1) {
+               stripped |= BTRFS_BLOCK_GROUP_DUP;
+               stripped = flags & ~stripped;
+
+               /* turn raid0 into single device chunks */
+               if (flags & BTRFS_BLOCK_GROUP_RAID0)
+                       return stripped;
+
+               /* turn mirroring into duplication */
+               if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
+                            BTRFS_BLOCK_GROUP_RAID10))
+                       return stripped | BTRFS_BLOCK_GROUP_DUP;
+               return flags;
+       } else {
+               /* they already had raid on here, just return */
+               if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
+                   (flags & BTRFS_BLOCK_GROUP_RAID1)) {
+               }
+               if (flags & stripped)
+                       return flags;
+
+               stripped |= BTRFS_BLOCK_GROUP_DUP;
+               stripped = flags & ~stripped;
+
+               /* switch duplicated blocks with raid1 */
+               if (flags & BTRFS_BLOCK_GROUP_DUP)
+                       return stripped | BTRFS_BLOCK_GROUP_RAID1;
+
+               /* turn single device chunks into raid0 */
+               return stripped | BTRFS_BLOCK_GROUP_RAID0;
+       }
+       return flags;
+}
+
+int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)
 {
        struct btrfs_trans_handle *trans;
        struct btrfs_root *tree_root = root->fs_info->tree_root;
        struct btrfs_path *path;
        u64 cur_byte;
        u64 total_found;
+       u64 shrink_last_byte;
+       u64 new_alloc_flags;
+       struct btrfs_block_group_cache *shrink_block_group;
        struct btrfs_fs_info *info = root->fs_info;
-       struct extent_io_tree *block_group_cache;
        struct btrfs_key key;
        struct btrfs_key found_key;
        struct extent_buffer *leaf;
@@ -2368,17 +2572,32 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size)
        int ret;
        int progress = 0;
 
-       btrfs_set_super_total_bytes(&info->super_copy, new_size);
-       clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1,
-                          GFP_NOFS);
-       block_group_cache = &info->block_group_cache;
+       shrink_block_group = btrfs_lookup_block_group(root->fs_info,
+                                                     shrink_start);
+       BUG_ON(!shrink_block_group);
+
+       shrink_last_byte = shrink_start + shrink_block_group->key.offset;
+
+       shrink_block_group->space_info->total_bytes -=
+               shrink_block_group->key.offset;
        path = btrfs_alloc_path();
        root = root->fs_info->extent_root;
        path->reada = 2;
 
 again:
+       if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
+               trans = btrfs_start_transaction(root, 1);
+               new_alloc_flags = update_block_group_flags(root,
+                                                  shrink_block_group->flags);
+               do_chunk_alloc(trans, root->fs_info->extent_root,
+                       btrfs_block_group_used(&shrink_block_group->item) +
+                       2 * 1024 * 1024, new_alloc_flags);
+               btrfs_end_transaction(trans, root);
+       }
+       shrink_block_group->ro = 1;
+
        total_found = 0;
-       key.objectid = new_size;
+       key.objectid = shrink_start;
        key.offset = 0;
        key.type = 0;
        cur_byte = key.objectid;
@@ -2390,10 +2609,12 @@ again:
        ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
        if (ret < 0)
                goto out;
+
        if (ret == 0) {
                leaf = path->nodes[0];
                btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-               if (found_key.objectid + found_key.offset > new_size) {
+               if (found_key.objectid + found_key.offset > shrink_start &&
+                   found_key.objectid < shrink_last_byte) {
                        cur_byte = found_key.objectid;
                        key.objectid = cur_byte;
                }
@@ -2422,6 +2643,9 @@ next:
 
                btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 
+               if (found_key.objectid >= shrink_last_byte)
+                       break;
+
                if (progress && need_resched()) {
                        memcpy(&key, &found_key, sizeof(key));
                        mutex_unlock(&root->fs_info->fs_mutex);
@@ -2462,68 +2686,31 @@ next:
                goto again;
        }
 
+       /*
+        * we've freed all the extents, now remove the block
+        * group item from the tree
+        */
        trans = btrfs_start_transaction(root, 1);
-       key.objectid = new_size;
-       key.offset = 0;
-       key.type = 0;
-       while(1) {
-               u64 ptr;
+       memcpy(&key, &shrink_block_group->key, sizeof(key));
 
-               ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
-               if (ret < 0)
-                       goto out;
-
-               leaf = path->nodes[0];
-               nritems = btrfs_header_nritems(leaf);
-bg_next:
-               if (path->slots[0] >= nritems) {
-                       ret = btrfs_next_leaf(root, path);
-                       if (ret < 0)
-                               break;
-                       if (ret == 1) {
-                               ret = 0;
-                               break;
-                       }
-                       leaf = path->nodes[0];
-                       btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-
-                       /*
-                        * btrfs_next_leaf doesn't cow buffers, we have to
-                        * do the search again
-                        */
-                       memcpy(&key, &found_key, sizeof(key));
-                       btrfs_release_path(root, path);
-                       goto resched_check;
-               }
+       ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+       if (ret > 0)
+               ret = -EIO;
+       if (ret < 0)
+               goto out;
 
-               btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-               if (btrfs_key_type(&found_key) != BTRFS_BLOCK_GROUP_ITEM_KEY) {
-                       printk("shrinker found key %Lu %u %Lu\n",
-                               found_key.objectid, found_key.type,
-                               found_key.offset);
-                       path->slots[0]++;
-                       goto bg_next;
-               }
-               ret = get_state_private(&info->block_group_cache,
-                                       found_key.objectid, &ptr);
-               if (!ret)
-                       kfree((void *)(unsigned long)ptr);
+       leaf = path->nodes[0];
+       nritems = btrfs_header_nritems(leaf);
+       btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+       kfree(shrink_block_group);
 
-               clear_extent_bits(&info->block_group_cache, found_key.objectid,
-                                 found_key.objectid + found_key.offset - 1,
-                                 (unsigned int)-1, GFP_NOFS);
+       clear_extent_bits(&info->block_group_cache, found_key.objectid,
+                         found_key.objectid + found_key.offset - 1,
+                         (unsigned int)-1, GFP_NOFS);
 
-               key.objectid = found_key.objectid + 1;
-               btrfs_del_item(trans, root, path);
-               btrfs_release_path(root, path);
-resched_check:
-               if (need_resched()) {
-                       mutex_unlock(&root->fs_info->fs_mutex);
-                       cond_resched();
-                       mutex_lock(&root->fs_info->fs_mutex);
-               }
-       }
-       clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1,
+       btrfs_del_item(trans, root, path);
+       clear_extent_dirty(&info->free_space_cache,
+                          shrink_start, shrink_last_byte - 1,
                           GFP_NOFS);
        btrfs_commit_transaction(trans, root);
 out:
@@ -2531,13 +2718,6 @@ out:
        return ret;
 }
 
-int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans,
-                          struct btrfs_root *root, u64 new_size)
-{
-       btrfs_set_super_total_bytes(&root->fs_info->super_copy, new_size);
-       return 0;
-}
-
 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
                           struct btrfs_key *key)
 {
@@ -2572,34 +2752,6 @@ error:
        return ret;
 }
 
-static int update_space_info(struct btrfs_fs_info *info, u64 flags,
-                            u64 total_bytes, u64 bytes_used,
-                            struct btrfs_space_info **space_info)
-{
-       struct btrfs_space_info *found;
-
-       found = __find_space_info(info, flags);
-       if (found) {
-               found->total_bytes += total_bytes;
-               found->bytes_used += bytes_used;
-               WARN_ON(found->total_bytes < found->bytes_used);
-               *space_info = found;
-               return 0;
-       }
-       found = kmalloc(sizeof(*found), GFP_NOFS);
-       if (!found)
-               return -ENOMEM;
-
-       list_add(&found->list, &info->space_info);
-       found->flags = flags;
-       found->total_bytes = total_bytes;
-       found->bytes_used = bytes_used;
-       found->bytes_pinned = 0;
-       found->full = 0;
-       *space_info = found;
-       return 0;
-}
-
 int btrfs_read_block_groups(struct btrfs_root *root)
 {
        struct btrfs_path *path;
@@ -2633,7 +2785,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
 
                leaf = path->nodes[0];
                btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-               cache = kmalloc(sizeof(*cache), GFP_NOFS);
+               cache = kzalloc(sizeof(*cache), GFP_NOFS);
                if (!cache) {
                        ret = -ENOMEM;
                        break;
@@ -2643,8 +2795,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                                   btrfs_item_ptr_offset(leaf, path->slots[0]),
                                   sizeof(cache->item));
                memcpy(&cache->key, &found_key, sizeof(found_key));
-               cache->cached = 0;
-               cache->pinned = 0;
 
                key.objectid = found_key.objectid + found_key.offset;
                btrfs_release_path(root, path);
@@ -2657,6 +2807,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
                        bit = BLOCK_GROUP_METADATA;
                }
+               set_avail_alloc_bits(info, cache->flags);
 
                ret = update_space_info(info, cache->flags, found_key.offset,
                                        btrfs_block_group_used(&cache->item),
@@ -2683,7 +2834,7 @@ error:
 
 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root, u64 bytes_used,
-                          u64 type, u64 chunk_tree, u64 chunk_objectid,
+                          u64 type, u64 chunk_objectid, u64 chunk_offset,
                           u64 size)
 {
        int ret;
@@ -2695,16 +2846,14 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
        extent_root = root->fs_info->extent_root;
        block_group_cache = &root->fs_info->block_group_cache;
 
-       cache = kmalloc(sizeof(*cache), GFP_NOFS);
+       cache = kzalloc(sizeof(*cache), GFP_NOFS);
        BUG_ON(!cache);
-       cache->key.objectid = chunk_objectid;
+       cache->key.objectid = chunk_offset;
        cache->key.offset = size;
-       cache->cached = 0;
-       cache->pinned = 0;
+
        btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
        memset(&cache->item, 0, sizeof(cache->item));
        btrfs_set_block_group_used(&cache->item, bytes_used);
-       btrfs_set_block_group_chunk_tree(&cache->item, chunk_tree);
        btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
        cache->flags = type;
        btrfs_set_block_group_flags(&cache->item, type);
@@ -2713,19 +2862,13 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
                                &cache->space_info);
        BUG_ON(ret);
 
-       if (type & BTRFS_BLOCK_GROUP_DATA) {
-               bit = BLOCK_GROUP_DATA;
-       } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
-               bit = BLOCK_GROUP_SYSTEM;
-       } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
-               bit = BLOCK_GROUP_METADATA;
-       }
-       set_extent_bits(block_group_cache, chunk_objectid,
-                       chunk_objectid + size - 1,
+       bit = block_group_state_bits(type);
+       set_extent_bits(block_group_cache, chunk_offset,
+                       chunk_offset + size - 1,
                        bit | EXTENT_LOCKED, GFP_NOFS);
-       set_state_private(block_group_cache, chunk_objectid,
-                         (unsigned long)cache);
 
+       set_state_private(block_group_cache, chunk_offset,
+                         (unsigned long)cache);
        ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
                                sizeof(cache->item));
        BUG_ON(ret);
@@ -2733,5 +2876,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
        finish_current_insert(trans, extent_root);
        ret = del_pending_extents(trans, extent_root);
        BUG_ON(ret);
+       set_avail_alloc_bits(extent_root->fs_info, type);
        return 0;
 }
This page took 0.037035 seconds and 5 git commands to generate.