Merge branch 'for-linus-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/mason...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 29 Jan 2016 23:46:49 +0000 (15:46 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 29 Jan 2016 23:46:49 +0000 (15:46 -0800)
Pull btrfs fixes from Chris Mason:
 "Dave had a small collection of fixes to the new free space tree code,
  one of which was keeping our sysfs files more up to date with feature
  bits as different things get enabled (lzo, raid5/6, etc).

  I should have kept the sysfs stuff for rc3, since we always manage to
  trip over something.  This time it was GFP_KERNEL from somewhere that
  is NOFS only.  Instead of rebasing it out I've put a revert in, and
  we'll fix it properly for rc3.

  Otherwise, Filipe fixed a btrfs DIO race and Qu Wenruo fixed up a
  use-after-free in our tracepoints that Dave Jones reported"

* 'for-linus-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  Revert "btrfs: synchronize incompat feature bits with sysfs files"
  btrfs: don't use GFP_HIGHMEM for free-space-tree bitmap kzalloc
  btrfs: sysfs: check initialization state before updating features
  Revert "btrfs: clear PF_NOFREEZE in cleaner_kthread()"
  btrfs: async-thread: Fix a use-after-free error for trace
  Btrfs: fix race between fsync and lockless direct IO writes
  btrfs: add free space tree to the cow-only list
  btrfs: add free space tree to lockdep classes
  btrfs: tweak free space tree bitmap allocation
  btrfs: tests: switch to GFP_KERNEL
  btrfs: synchronize incompat feature bits with sysfs files
  btrfs: sysfs: introduce helper for syncing bits with sysfs files
  btrfs: sysfs: add free-space-tree bit attribute
  btrfs: sysfs: fix typo in compat_ro attribute definition

fs/btrfs/async-thread.c
fs/btrfs/disk-io.c
fs/btrfs/free-space-tree.c
fs/btrfs/inode.c
fs/btrfs/relocation.c
fs/btrfs/sysfs.c
fs/btrfs/sysfs.h
fs/btrfs/tests/btrfs-tests.c
fs/btrfs/tests/extent-io-tests.c
fs/btrfs/tests/inode-tests.c
fs/btrfs/tree-log.c

index 88d9af3d4581f7c45aa0065fa467d097625be5c1..5fb60ea7eee2b2c28e067d11b54c2fad7127c7d8 100644 (file)
@@ -328,8 +328,8 @@ static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq,
                list_add_tail(&work->ordered_list, &wq->ordered_list);
                spin_unlock_irqrestore(&wq->list_lock, flags);
        }
-       queue_work(wq->normal_wq, &work->normal_work);
        trace_btrfs_work_queued(work);
+       queue_work(wq->normal_wq, &work->normal_work);
 }
 
 void btrfs_queue_work(struct btrfs_workqueue *wq,
index dd08e29f51177d27859c12f0d173224a24a1f4a9..4545e2e2ad45a556113eecf30b1a7d49ee7ca260 100644 (file)
@@ -182,6 +182,7 @@ static struct btrfs_lockdep_keyset {
        { .id = BTRFS_TREE_RELOC_OBJECTID,      .name_stem = "treloc"   },
        { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc"   },
        { .id = BTRFS_UUID_TREE_OBJECTID,       .name_stem = "uuid"     },
+       { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, .name_stem = "free-space" },
        { .id = 0,                              .name_stem = "tree"     },
 };
 
@@ -1787,7 +1788,6 @@ static int cleaner_kthread(void *arg)
        int again;
        struct btrfs_trans_handle *trans;
 
-       set_freezable();
        do {
                again = 0;
 
index 393e36bd5845d996d216e14ef2c6ac6866fce746..53dbeaf6ce941cc7a6a8bf06c6a81df354e9e364 100644 (file)
@@ -153,6 +153,20 @@ static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize)
 
 static unsigned long *alloc_bitmap(u32 bitmap_size)
 {
+       void *mem;
+
+       /*
+        * The allocation size varies, observed numbers were < 4K up to 16K.
+        * Using vmalloc unconditionally would be too heavy, we'll try
+        * contiguous allocations first.
+        */
+       if  (bitmap_size <= PAGE_SIZE)
+               return kzalloc(bitmap_size, GFP_NOFS);
+
+       mem = kzalloc(bitmap_size, GFP_NOFS | __GFP_NOWARN);
+       if (mem)
+               return mem;
+
        return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO,
                         PAGE_KERNEL);
 }
@@ -289,7 +303,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
 
        ret = 0;
 out:
-       vfree(bitmap);
+       kvfree(bitmap);
        if (ret)
                btrfs_abort_transaction(trans, root, ret);
        return ret;
@@ -438,7 +452,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
 
        ret = 0;
 out:
-       vfree(bitmap);
+       kvfree(bitmap);
        if (ret)
                btrfs_abort_transaction(trans, root, ret);
        return ret;
index e28f3d4691afee8bf6dcd68d0617a27e07439e82..5f06eb1f43843055c0373daeb9ad98648865150f 100644 (file)
@@ -7116,21 +7116,41 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
        if (ret)
                return ERR_PTR(ret);
 
-       em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
-                             ins.offset, ins.offset, ins.offset, 0);
-       if (IS_ERR(em)) {
-               btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
-               return em;
-       }
-
+       /*
+        * Create the ordered extent before the extent map. This is to avoid
+        * races with the fast fsync path that would lead to it logging file
+        * extent items that point to disk extents that were not yet written to.
+        * The fast fsync path collects ordered extents into a local list and
+        * then collects all the new extent maps, so we must create the ordered
+        * extent first and make sure the fast fsync path collects any new
+        * ordered extents after collecting new extent maps as well.
+        * The fsync path simply can not rely on inode_dio_wait() because it
+        * causes deadlock with AIO.
+        */
        ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
                                           ins.offset, ins.offset, 0);
        if (ret) {
                btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
-               free_extent_map(em);
                return ERR_PTR(ret);
        }
 
+       em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
+                             ins.offset, ins.offset, ins.offset, 0);
+       if (IS_ERR(em)) {
+               struct btrfs_ordered_extent *oe;
+
+               btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
+               oe = btrfs_lookup_ordered_extent(inode, start);
+               ASSERT(oe);
+               if (WARN_ON(!oe))
+                       return em;
+               set_bit(BTRFS_ORDERED_IOERR, &oe->flags);
+               set_bit(BTRFS_ORDERED_IO_DONE, &oe->flags);
+               btrfs_remove_ordered_extent(inode, oe);
+               /* Once for our lookup and once for the ordered extents tree. */
+               btrfs_put_ordered_extent(oe);
+               btrfs_put_ordered_extent(oe);
+       }
        return em;
 }
 
index fd1c4d982463fd92768537f86f8b9ae8545befba..2bd0011450df2715ca926a6ccd2d99cb7d40f8b4 100644 (file)
@@ -575,7 +575,8 @@ static int is_cowonly_root(u64 root_objectid)
            root_objectid == BTRFS_TREE_LOG_OBJECTID ||
            root_objectid == BTRFS_CSUM_TREE_OBJECTID ||
            root_objectid == BTRFS_UUID_TREE_OBJECTID ||
-           root_objectid == BTRFS_QUOTA_TREE_OBJECTID)
+           root_objectid == BTRFS_QUOTA_TREE_OBJECTID ||
+           root_objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
                return 1;
        return 0;
 }
index e0ac85949067c30191ce3635e2a65edbe8a56361..539e7b5e3f86a83059f070409fdb36749a958ce4 100644 (file)
@@ -202,6 +202,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF);
 BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56);
 BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA);
 BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
+BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
 
 static struct attribute *btrfs_supported_feature_attrs[] = {
        BTRFS_FEAT_ATTR_PTR(mixed_backref),
@@ -213,6 +214,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
        BTRFS_FEAT_ATTR_PTR(raid56),
        BTRFS_FEAT_ATTR_PTR(skinny_metadata),
        BTRFS_FEAT_ATTR_PTR(no_holes),
+       BTRFS_FEAT_ATTR_PTR(free_space_tree),
        NULL
 };
 
@@ -780,6 +782,39 @@ failure:
        return error;
 }
 
+
+/*
+ * Change per-fs features in /sys/fs/btrfs/UUID/features to match current
+ * values in superblock. Call after any changes to incompat/compat_ro flags
+ */
+void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
+               u64 bit, enum btrfs_feature_set set)
+{
+       struct btrfs_fs_devices *fs_devs;
+       struct kobject *fsid_kobj;
+       u64 features;
+       int ret;
+
+       if (!fs_info)
+               return;
+
+       features = get_features(fs_info, set);
+       ASSERT(bit & supported_feature_masks[set]);
+
+       fs_devs = fs_info->fs_devices;
+       fsid_kobj = &fs_devs->fsid_kobj;
+
+       if (!fsid_kobj->state_initialized)
+               return;
+
+       /*
+        * FIXME: this is too heavy to update just one value, ideally we'd like
+        * to use sysfs_update_group but some refactoring is needed first.
+        */
+       sysfs_remove_group(fsid_kobj, &btrfs_feature_attr_group);
+       ret = sysfs_create_group(fsid_kobj, &btrfs_feature_attr_group);
+}
+
 static int btrfs_init_debugfs(void)
 {
 #ifdef CONFIG_DEBUG_FS
index 9c09522125a6b26d0577e139c38ebad7ca5c8331..d7da1a4c2f6c12d04a8f29e4d8f503943058d46d 100644 (file)
@@ -56,7 +56,7 @@ static struct btrfs_feature_attr btrfs_attr_##_name = {                            \
 #define BTRFS_FEAT_ATTR_COMPAT(name, feature) \
        BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature)
 #define BTRFS_FEAT_ATTR_COMPAT_RO(name, feature) \
-       BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT, feature)
+       BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT_RO, feature)
 #define BTRFS_FEAT_ATTR_INCOMPAT(name, feature) \
        BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature)
 
@@ -90,4 +90,7 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs,
                                struct kobject *parent);
 int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs);
 void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs);
+void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
+               u64 bit, enum btrfs_feature_set set);
+
 #endif /* _BTRFS_SYSFS_H_ */
index b1d920b3007017c2014d28217cb5efa1d7ed4d96..0e1e61a7ec23265d292e165fd39f1f8d2080f604 100644 (file)
@@ -82,18 +82,18 @@ void btrfs_destroy_test_fs(void)
 struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void)
 {
        struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info),
-                                               GFP_NOFS);
+                                               GFP_KERNEL);
 
        if (!fs_info)
                return fs_info;
        fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices),
-                                     GFP_NOFS);
+                                     GFP_KERNEL);
        if (!fs_info->fs_devices) {
                kfree(fs_info);
                return NULL;
        }
        fs_info->super_copy = kzalloc(sizeof(struct btrfs_super_block),
-                                     GFP_NOFS);
+                                     GFP_KERNEL);
        if (!fs_info->super_copy) {
                kfree(fs_info->fs_devices);
                kfree(fs_info);
@@ -180,11 +180,11 @@ btrfs_alloc_dummy_block_group(unsigned long length)
 {
        struct btrfs_block_group_cache *cache;
 
-       cache = kzalloc(sizeof(*cache), GFP_NOFS);
+       cache = kzalloc(sizeof(*cache), GFP_KERNEL);
        if (!cache)
                return NULL;
        cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
-                                       GFP_NOFS);
+                                       GFP_KERNEL);
        if (!cache->free_space_ctl) {
                kfree(cache);
                return NULL;
index e29fa297e053951a2d2d178a8793c9d49e021023..669b58201e36881fb1434d76fd787a45329f042e 100644 (file)
@@ -94,7 +94,7 @@ static int test_find_delalloc(void)
         * test.
         */
        for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) {
-               page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
+               page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
                if (!page) {
                        test_msg("Failed to allocate test page\n");
                        ret = -ENOMEM;
@@ -113,7 +113,7 @@ static int test_find_delalloc(void)
         * |--- delalloc ---|
         * |---  search  ---|
         */
-       set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_NOFS);
+       set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_KERNEL);
        start = 0;
        end = 0;
        found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -144,7 +144,7 @@ static int test_find_delalloc(void)
                test_msg("Couldn't find the locked page\n");
                goto out_bits;
        }
-       set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_NOFS);
+       set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_KERNEL);
        start = test_start;
        end = 0;
        found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -199,7 +199,7 @@ static int test_find_delalloc(void)
         *
         * We are re-using our test_start from above since it works out well.
         */
-       set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_NOFS);
+       set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_KERNEL);
        start = test_start;
        end = 0;
        found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -262,7 +262,7 @@ static int test_find_delalloc(void)
        }
        ret = 0;
 out_bits:
-       clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_NOFS);
+       clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_KERNEL);
 out:
        if (locked_page)
                page_cache_release(locked_page);
@@ -360,7 +360,7 @@ static int test_eb_bitmaps(void)
 
        test_msg("Running extent buffer bitmap tests\n");
 
-       bitmap = kmalloc(len, GFP_NOFS);
+       bitmap = kmalloc(len, GFP_KERNEL);
        if (!bitmap) {
                test_msg("Couldn't allocate test bitmap\n");
                return -ENOMEM;
index 5de55fdd28bcda6aca3d0a2becf5bed8e96f1b63..e2d3da02deee4b73b9f9c96ed1e6cc9f4c1b5700 100644 (file)
@@ -974,7 +974,7 @@ static int test_extent_accounting(void)
                               (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
                               EXTENT_DELALLOC | EXTENT_DIRTY |
                               EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
-                              NULL, GFP_NOFS);
+                              NULL, GFP_KERNEL);
        if (ret) {
                test_msg("clear_extent_bit returned %d\n", ret);
                goto out;
@@ -1045,7 +1045,7 @@ static int test_extent_accounting(void)
                               BTRFS_MAX_EXTENT_SIZE+8191,
                               EXTENT_DIRTY | EXTENT_DELALLOC |
                               EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
-                              NULL, GFP_NOFS);
+                              NULL, GFP_KERNEL);
        if (ret) {
                test_msg("clear_extent_bit returned %d\n", ret);
                goto out;
@@ -1079,7 +1079,7 @@ static int test_extent_accounting(void)
        ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
                               EXTENT_DIRTY | EXTENT_DELALLOC |
                               EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
-                              NULL, GFP_NOFS);
+                              NULL, GFP_KERNEL);
        if (ret) {
                test_msg("clear_extent_bit returned %d\n", ret);
                goto out;
@@ -1096,7 +1096,7 @@ out:
                clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
                                 EXTENT_DIRTY | EXTENT_DELALLOC |
                                 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
-                                NULL, GFP_NOFS);
+                                NULL, GFP_KERNEL);
        iput(inode);
        btrfs_free_dummy_root(root);
        return ret;
index 323e12cc9d2f522388fe929ed66d4dd946b5881d..978c3a8108936381309de4681e90400aeb86874f 100644 (file)
@@ -4127,7 +4127,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
                                     struct inode *inode,
                                     struct btrfs_path *path,
                                     struct list_head *logged_list,
-                                    struct btrfs_log_ctx *ctx)
+                                    struct btrfs_log_ctx *ctx,
+                                    const u64 start,
+                                    const u64 end)
 {
        struct extent_map *em, *n;
        struct list_head extents;
@@ -4166,7 +4168,13 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
        }
 
        list_sort(NULL, &extents, extent_cmp);
-
+       /*
+        * Collect any new ordered extents within the range. This is to
+        * prevent logging file extent items without waiting for the disk
+        * location they point to being written. We do this only to deal
+        * with races against concurrent lockless direct IO writes.
+        */
+       btrfs_get_logged_extents(inode, logged_list, start, end);
 process:
        while (!list_empty(&extents)) {
                em = list_entry(extents.next, struct extent_map, list);
@@ -4701,7 +4709,7 @@ log_extents:
                        goto out_unlock;
                }
                ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
-                                               &logged_list, ctx);
+                                               &logged_list, ctx, start, end);
                if (ret) {
                        err = ret;
                        goto out_unlock;
This page took 0.040508 seconds and 5 git commands to generate.