Merge branch 'for-linus-4.6' of git://git.kernel.org/pub/scm/linux/kernel/git/mason...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 9 Apr 2016 17:41:34 +0000 (10:41 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 9 Apr 2016 17:41:34 +0000 (10:41 -0700)
Pull btrfs fixes from Chris Mason:
 "These are bug fixes, including a really old fsync bug, and a few trace
  points to help us track down problems in the quota code"

* 'for-linus-4.6' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  Btrfs: fix file/data loss caused by fsync after rename and new inode
  btrfs: Reset IO error counters before start of device replacing
  btrfs: Add qgroup tracing
  Btrfs: don't use src fd for printk
  btrfs: fallback to vmalloc in btrfs_compare_tree
  btrfs: handle non-fatal errors in btrfs_qgroup_inherit()
  btrfs: Output more info for enospc_debug mount option
  Btrfs: fix invalid reference in replace_path
  Btrfs: Improve FL_KEEP_SIZE handling in fallocate

fs/btrfs/ctree.c
fs/btrfs/dev-replace.c
fs/btrfs/extent-tree.c
fs/btrfs/file.c
fs/btrfs/ioctl.c
fs/btrfs/qgroup.c
fs/btrfs/relocation.c
fs/btrfs/tree-log.c
include/trace/events/btrfs.h

index 77592931ab4feba16615997f96d8beb9d385e858..ec7928a27aaad4c2241ef5b9dcd2048401861085 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/rbtree.h>
+#include <linux/vmalloc.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
@@ -5361,10 +5362,13 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
                goto out;
        }
 
-       tmp_buf = kmalloc(left_root->nodesize, GFP_KERNEL);
+       tmp_buf = kmalloc(left_root->nodesize, GFP_KERNEL | __GFP_NOWARN);
        if (!tmp_buf) {
-               ret = -ENOMEM;
-               goto out;
+               tmp_buf = vmalloc(left_root->nodesize);
+               if (!tmp_buf) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
        }
 
        left_path->search_commit_root = 1;
@@ -5565,7 +5569,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
 out:
        btrfs_free_path(left_path);
        btrfs_free_path(right_path);
-       kfree(tmp_buf);
+       kvfree(tmp_buf);
        return ret;
 }
 
index a1d6652e0c4779ecf7e8c675fddcd06a560cdc32..26bcb487f95885295ad3e24d2a8063cbf6355351 100644 (file)
@@ -394,6 +394,8 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
        dev_replace->cursor_right = 0;
        dev_replace->is_valid = 1;
        dev_replace->item_needs_writeback = 1;
+       atomic64_set(&dev_replace->num_write_errors, 0);
+       atomic64_set(&dev_replace->num_uncorrectable_read_errors, 0);
        args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
        btrfs_dev_replace_unlock(dev_replace, 1);
 
index ce114ba9780a3b51ceecaee1fdcdaa4baa769f2c..84e060eb0de8c6aca562bcfb537f5236a71e9e66 100644 (file)
@@ -9386,15 +9386,23 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
        u64 dev_min = 1;
        u64 dev_nr = 0;
        u64 target;
+       int debug;
        int index;
        int full = 0;
        int ret = 0;
 
+       debug = btrfs_test_opt(root, ENOSPC_DEBUG);
+
        block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
 
        /* odd, couldn't find the block group, leave it alone */
-       if (!block_group)
+       if (!block_group) {
+               if (debug)
+                       btrfs_warn(root->fs_info,
+                                  "can't find block group for bytenr %llu",
+                                  bytenr);
                return -1;
+       }
 
        min_free = btrfs_block_group_used(&block_group->item);
 
@@ -9448,8 +9456,13 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
                 * this is just a balance, so if we were marked as full
                 * we know there is no space for a new chunk
                 */
-               if (full)
+               if (full) {
+                       if (debug)
+                               btrfs_warn(root->fs_info,
+                                       "no space to alloc new chunk for block group %llu",
+                                       block_group->key.objectid);
                        goto out;
+               }
 
                index = get_block_group_index(block_group);
        }
@@ -9496,6 +9509,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
                        ret = -1;
                }
        }
+       if (debug && ret == -1)
+               btrfs_warn(root->fs_info,
+                       "no space to allocate a new chunk for block group %llu",
+                       block_group->key.objectid);
        mutex_unlock(&root->fs_info->chunk_mutex);
        btrfs_end_transaction(trans, root);
 out:
index fbe2589f99f0c3ce8da9fd7839146beafc6a8408..8d7b5a45c00523f4ca7ee5d58810b9d571ad4148 100644 (file)
@@ -2682,9 +2682,12 @@ static long btrfs_fallocate(struct file *file, int mode,
                return ret;
 
        inode_lock(inode);
-       ret = inode_newsize_ok(inode, alloc_end);
-       if (ret)
-               goto out;
+
+       if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) {
+               ret = inode_newsize_ok(inode, offset + len);
+               if (ret)
+                       goto out;
+       }
 
        /*
         * TODO: Move these two operations after we have checked
index 94a0c8a3e8717b72f50bcf119d3e3b152203c973..5a23806ae418af8e3952b4cbf65df06aae384b57 100644 (file)
@@ -1654,7 +1654,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
 
                src_inode = file_inode(src.file);
                if (src_inode->i_sb != file_inode(file)->i_sb) {
-                       btrfs_info(BTRFS_I(src_inode)->root->fs_info,
+                       btrfs_info(BTRFS_I(file_inode(file))->root->fs_info,
                                   "Snapshot src from another FS");
                        ret = -EXDEV;
                } else if (!inode_owner_or_capable(src_inode)) {
index 5279fdae7142fbe3177a556a020ed1af3a7aa8f1..9e119552ed32cb4236eb3933fb9e7e9d104b77af 100644 (file)
@@ -1463,6 +1463,7 @@ struct btrfs_qgroup_extent_record
        u64 bytenr = record->bytenr;
 
        assert_spin_locked(&delayed_refs->lock);
+       trace_btrfs_qgroup_insert_dirty_extent(record);
 
        while (*p) {
                parent_node = *p;
@@ -1594,6 +1595,9 @@ static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
                cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq);
                cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq);
 
+               trace_qgroup_update_counters(qg->qgroupid, cur_old_count,
+                                            cur_new_count);
+
                /* Rfer update part */
                if (cur_old_count == 0 && cur_new_count > 0) {
                        qg->rfer += num_bytes;
@@ -1683,6 +1687,9 @@ btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
                goto out_free;
        BUG_ON(!fs_info->quota_root);
 
+       trace_btrfs_qgroup_account_extent(bytenr, num_bytes, nr_old_roots,
+                                         nr_new_roots);
+
        qgroups = ulist_alloc(GFP_NOFS);
        if (!qgroups) {
                ret = -ENOMEM;
@@ -1752,6 +1759,8 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
                record = rb_entry(node, struct btrfs_qgroup_extent_record,
                                  node);
 
+               trace_btrfs_qgroup_account_extents(record);
+
                if (!ret) {
                        /*
                         * Use (u64)-1 as time_seq to do special search, which
@@ -1842,8 +1851,10 @@ out:
 }
 
 /*
- * copy the acounting information between qgroups. This is necessary when a
- * snapshot or a subvolume is created
+ * Copy the acounting information between qgroups. This is necessary
+ * when a snapshot or a subvolume is created. Throwing an error will
+ * cause a transaction abort so we take extra care here to only error
+ * when a readonly fs is a reasonable outcome.
  */
 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
                         struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
@@ -1873,15 +1884,15 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
                       2 * inherit->num_excl_copies;
                for (i = 0; i < nums; ++i) {
                        srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
-                       if (!srcgroup) {
-                               ret = -EINVAL;
-                               goto out;
-                       }
 
-                       if ((srcgroup->qgroupid >> 48) <= (objectid >> 48)) {
-                               ret = -EINVAL;
-                               goto out;
-                       }
+                       /*
+                        * Zero out invalid groups so we can ignore
+                        * them later.
+                        */
+                       if (!srcgroup ||
+                           ((srcgroup->qgroupid >> 48) <= (objectid >> 48)))
+                               *i_qgroups = 0ULL;
+
                        ++i_qgroups;
                }
        }
@@ -1916,17 +1927,19 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
         */
        if (inherit) {
                i_qgroups = (u64 *)(inherit + 1);
-               for (i = 0; i < inherit->num_qgroups; ++i) {
+               for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) {
+                       if (*i_qgroups == 0)
+                               continue;
                        ret = add_qgroup_relation_item(trans, quota_root,
                                                       objectid, *i_qgroups);
-                       if (ret)
+                       if (ret && ret != -EEXIST)
                                goto out;
                        ret = add_qgroup_relation_item(trans, quota_root,
                                                       *i_qgroups, objectid);
-                       if (ret)
+                       if (ret && ret != -EEXIST)
                                goto out;
-                       ++i_qgroups;
                }
+               ret = 0;
        }
 
 
@@ -1987,17 +2000,22 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
 
        i_qgroups = (u64 *)(inherit + 1);
        for (i = 0; i < inherit->num_qgroups; ++i) {
-               ret = add_relation_rb(quota_root->fs_info, objectid,
-                                     *i_qgroups);
-               if (ret)
-                       goto unlock;
+               if (*i_qgroups) {
+                       ret = add_relation_rb(quota_root->fs_info, objectid,
+                                             *i_qgroups);
+                       if (ret)
+                               goto unlock;
+               }
                ++i_qgroups;
        }
 
-       for (i = 0; i <  inherit->num_ref_copies; ++i) {
+       for (i = 0; i <  inherit->num_ref_copies; ++i, i_qgroups += 2) {
                struct btrfs_qgroup *src;
                struct btrfs_qgroup *dst;
 
+               if (!i_qgroups[0] || !i_qgroups[1])
+                       continue;
+
                src = find_qgroup_rb(fs_info, i_qgroups[0]);
                dst = find_qgroup_rb(fs_info, i_qgroups[1]);
 
@@ -2008,12 +2026,14 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
 
                dst->rfer = src->rfer - level_size;
                dst->rfer_cmpr = src->rfer_cmpr - level_size;
-               i_qgroups += 2;
        }
-       for (i = 0; i <  inherit->num_excl_copies; ++i) {
+       for (i = 0; i <  inherit->num_excl_copies; ++i, i_qgroups += 2) {
                struct btrfs_qgroup *src;
                struct btrfs_qgroup *dst;
 
+               if (!i_qgroups[0] || !i_qgroups[1])
+                       continue;
+
                src = find_qgroup_rb(fs_info, i_qgroups[0]);
                dst = find_qgroup_rb(fs_info, i_qgroups[1]);
 
@@ -2024,7 +2044,6 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
 
                dst->excl = src->excl + level_size;
                dst->excl_cmpr = src->excl_cmpr + level_size;
-               i_qgroups += 2;
        }
 
 unlock:
index 3c93968b539dd16a32e3d3c171c9c8b46d9ab4f7..08ef890deca69fdce7a35d26bcf5ae45d9f67d8b 100644 (file)
@@ -1850,6 +1850,7 @@ again:
                        eb = read_tree_block(dest, old_bytenr, old_ptr_gen);
                        if (IS_ERR(eb)) {
                                ret = PTR_ERR(eb);
+                               break;
                        } else if (!extent_buffer_uptodate(eb)) {
                                ret = -EIO;
                                free_extent_buffer(eb);
index 24d03c751149f56e484e32c3b1120dceb495b299..517d0ccb351e205a4f415c15c4735c3a76647b91 100644 (file)
@@ -4415,6 +4415,127 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
        return ret;
 }
 
+/*
+ * When we are logging a new inode X, check if it doesn't have a reference that
+ * matches the reference from some other inode Y created in a past transaction
+ * and that was renamed in the current transaction. If we don't do this, then at
+ * log replay time we can lose inode Y (and all its files if it's a directory):
+ *
+ * mkdir /mnt/x
+ * echo "hello world" > /mnt/x/foobar
+ * sync
+ * mv /mnt/x /mnt/y
+ * mkdir /mnt/x                 # or touch /mnt/x
+ * xfs_io -c fsync /mnt/x
+ * <power fail>
+ * mount fs, trigger log replay
+ *
+ * After the log replay procedure, we would lose the first directory and all its
+ * files (file foobar).
+ * For the case where inode Y is not a directory we simply end up losing it:
+ *
+ * echo "123" > /mnt/foo
+ * sync
+ * mv /mnt/foo /mnt/bar
+ * echo "abc" > /mnt/foo
+ * xfs_io -c fsync /mnt/foo
+ * <power fail>
+ *
+ * We also need this for cases where a snapshot entry is replaced by some other
+ * entry (file or directory) otherwise we end up with an unreplayable log due to
+ * attempts to delete the snapshot entry (entry of type BTRFS_ROOT_ITEM_KEY) as
+ * if it were a regular entry:
+ *
+ * mkdir /mnt/x
+ * btrfs subvolume snapshot /mnt /mnt/x/snap
+ * btrfs subvolume delete /mnt/x/snap
+ * rmdir /mnt/x
+ * mkdir /mnt/x
+ * fsync /mnt/x or fsync some new file inside it
+ * <power fail>
+ *
+ * The snapshot delete, rmdir of x, mkdir of a new x and the fsync all happen in
+ * the same transaction.
+ */
+static int btrfs_check_ref_name_override(struct extent_buffer *eb,
+                                        const int slot,
+                                        const struct btrfs_key *key,
+                                        struct inode *inode)
+{
+       int ret;
+       struct btrfs_path *search_path;
+       char *name = NULL;
+       u32 name_len = 0;
+       u32 item_size = btrfs_item_size_nr(eb, slot);
+       u32 cur_offset = 0;
+       unsigned long ptr = btrfs_item_ptr_offset(eb, slot);
+
+       search_path = btrfs_alloc_path();
+       if (!search_path)
+               return -ENOMEM;
+       search_path->search_commit_root = 1;
+       search_path->skip_locking = 1;
+
+       while (cur_offset < item_size) {
+               u64 parent;
+               u32 this_name_len;
+               u32 this_len;
+               unsigned long name_ptr;
+               struct btrfs_dir_item *di;
+
+               if (key->type == BTRFS_INODE_REF_KEY) {
+                       struct btrfs_inode_ref *iref;
+
+                       iref = (struct btrfs_inode_ref *)(ptr + cur_offset);
+                       parent = key->offset;
+                       this_name_len = btrfs_inode_ref_name_len(eb, iref);
+                       name_ptr = (unsigned long)(iref + 1);
+                       this_len = sizeof(*iref) + this_name_len;
+               } else {
+                       struct btrfs_inode_extref *extref;
+
+                       extref = (struct btrfs_inode_extref *)(ptr +
+                                                              cur_offset);
+                       parent = btrfs_inode_extref_parent(eb, extref);
+                       this_name_len = btrfs_inode_extref_name_len(eb, extref);
+                       name_ptr = (unsigned long)&extref->name;
+                       this_len = sizeof(*extref) + this_name_len;
+               }
+
+               if (this_name_len > name_len) {
+                       char *new_name;
+
+                       new_name = krealloc(name, this_name_len, GFP_NOFS);
+                       if (!new_name) {
+                               ret = -ENOMEM;
+                               goto out;
+                       }
+                       name_len = this_name_len;
+                       name = new_name;
+               }
+
+               read_extent_buffer(eb, name, name_ptr, this_name_len);
+               di = btrfs_lookup_dir_item(NULL, BTRFS_I(inode)->root,
+                                          search_path, parent,
+                                          name, this_name_len, 0);
+               if (di && !IS_ERR(di)) {
+                       ret = 1;
+                       goto out;
+               } else if (IS_ERR(di)) {
+                       ret = PTR_ERR(di);
+                       goto out;
+               }
+               btrfs_release_path(search_path);
+
+               cur_offset += this_len;
+       }
+       ret = 0;
+out:
+       btrfs_free_path(search_path);
+       kfree(name);
+       return ret;
+}
+
 /* log a single inode in the tree log.
  * At least one parent directory for this inode must exist in the tree
  * or be logged already.
@@ -4602,6 +4723,22 @@ again:
                if (min_key.type == BTRFS_INODE_ITEM_KEY)
                        need_log_inode_item = false;
 
+               if ((min_key.type == BTRFS_INODE_REF_KEY ||
+                    min_key.type == BTRFS_INODE_EXTREF_KEY) &&
+                   BTRFS_I(inode)->generation == trans->transid) {
+                       ret = btrfs_check_ref_name_override(path->nodes[0],
+                                                           path->slots[0],
+                                                           &min_key, inode);
+                       if (ret < 0) {
+                               err = ret;
+                               goto out_unlock;
+                       } else if (ret > 0) {
+                               err = 1;
+                               btrfs_set_log_full_commit(root->fs_info, trans);
+                               goto out_unlock;
+                       }
+               }
+
                /* Skip xattrs, we log them later with btrfs_log_all_xattrs() */
                if (min_key.type == BTRFS_XATTR_ITEM_KEY) {
                        if (ins_nr == 0)
index 677807f29a1cd18ae142f0f6e95999b0b6e1125e..e90e82ad68754cc8776935961275631507695ac4 100644 (file)
@@ -23,7 +23,7 @@ struct map_lookup;
 struct extent_buffer;
 struct btrfs_work;
 struct __btrfs_workqueue;
-struct btrfs_qgroup_operation;
+struct btrfs_qgroup_extent_record;
 
 #define show_ref_type(type)                                            \
        __print_symbolic(type,                                          \
@@ -1231,6 +1231,93 @@ DEFINE_EVENT(btrfs__qgroup_delayed_ref, btrfs_qgroup_free_delayed_ref,
 
        TP_ARGS(ref_root, reserved)
 );
+
+DECLARE_EVENT_CLASS(btrfs_qgroup_extent,
+       TP_PROTO(struct btrfs_qgroup_extent_record *rec),
+
+       TP_ARGS(rec),
+
+       TP_STRUCT__entry(
+               __field(        u64,  bytenr            )
+               __field(        u64,  num_bytes         )
+       ),
+
+       TP_fast_assign(
+               __entry->bytenr         = rec->bytenr,
+               __entry->num_bytes      = rec->num_bytes;
+       ),
+
+       TP_printk("bytenr = %llu, num_bytes = %llu",
+                 (unsigned long long)__entry->bytenr,
+                 (unsigned long long)__entry->num_bytes)
+);
+
+DEFINE_EVENT(btrfs_qgroup_extent, btrfs_qgroup_account_extents,
+
+       TP_PROTO(struct btrfs_qgroup_extent_record *rec),
+
+       TP_ARGS(rec)
+);
+
+DEFINE_EVENT(btrfs_qgroup_extent, btrfs_qgroup_insert_dirty_extent,
+
+       TP_PROTO(struct btrfs_qgroup_extent_record *rec),
+
+       TP_ARGS(rec)
+);
+
+TRACE_EVENT(btrfs_qgroup_account_extent,
+
+       TP_PROTO(u64 bytenr, u64 num_bytes, u64 nr_old_roots, u64 nr_new_roots),
+
+       TP_ARGS(bytenr, num_bytes, nr_old_roots, nr_new_roots),
+
+       TP_STRUCT__entry(
+               __field(        u64,  bytenr                    )
+               __field(        u64,  num_bytes                 )
+               __field(        u64,  nr_old_roots              )
+               __field(        u64,  nr_new_roots              )
+       ),
+
+       TP_fast_assign(
+               __entry->bytenr         = bytenr;
+               __entry->num_bytes      = num_bytes;
+               __entry->nr_old_roots   = nr_old_roots;
+               __entry->nr_new_roots   = nr_new_roots;
+       ),
+
+       TP_printk("bytenr = %llu, num_bytes = %llu, nr_old_roots = %llu, "
+                 "nr_new_roots = %llu",
+                 __entry->bytenr,
+                 __entry->num_bytes,
+                 __entry->nr_old_roots,
+                 __entry->nr_new_roots)
+);
+
+TRACE_EVENT(qgroup_update_counters,
+
+       TP_PROTO(u64 qgid, u64 cur_old_count, u64 cur_new_count),
+
+       TP_ARGS(qgid, cur_old_count, cur_new_count),
+
+       TP_STRUCT__entry(
+               __field(        u64,  qgid                      )
+               __field(        u64,  cur_old_count             )
+               __field(        u64,  cur_new_count             )
+       ),
+
+       TP_fast_assign(
+               __entry->qgid           = qgid;
+               __entry->cur_old_count  = cur_old_count;
+               __entry->cur_new_count  = cur_new_count;
+       ),
+
+       TP_printk("qgid = %llu, cur_old_count = %llu, cur_new_count = %llu",
+                 __entry->qgid,
+                 __entry->cur_old_count,
+                 __entry->cur_new_count)
+);
+
 #endif /* _TRACE_BTRFS_H */
 
 /* This part must be outside protection */
This page took 0.039652 seconds and 5 git commands to generate.