xfs: xfs_quiesce_attr() should quiesce the log like unmount

[deliverable/linux.git] / fs / xfs / xfs_super.c
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c

index 20fa955d80d1f6e3982964a72e36b135ad229304..fdedf2cabae3a3936de1c4c4fe1525ccdadda9bd 100644 (file)
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -863,8 +863,23 @@ xfs_init_mount_workqueues(
                         WQ_MEM_RECLAIM, 0, mp->m_fsname);
         if (!mp->m_cil_workqueue)
                 goto out_destroy_unwritten;
+
+       mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
+                       WQ_NON_REENTRANT, 0, mp->m_fsname);
+       if (!mp->m_reclaim_workqueue)
+               goto out_destroy_cil;
+
+       mp->m_log_workqueue = alloc_workqueue("xfs-log/%s",
+                       WQ_NON_REENTRANT, 0, mp->m_fsname);
+       if (!mp->m_log_workqueue)
+               goto out_destroy_reclaim;
+
         return 0;
  
+out_destroy_reclaim:
+       destroy_workqueue(mp->m_reclaim_workqueue);
+out_destroy_cil:
+       destroy_workqueue(mp->m_cil_workqueue);
  out_destroy_unwritten:
         destroy_workqueue(mp->m_unwritten_workqueue);
  out_destroy_data_iodone_queue:
@@ -877,11 +892,31 @@ STATIC void
  xfs_destroy_mount_workqueues(
         struct xfs_mount        *mp)
  {
+       destroy_workqueue(mp->m_log_workqueue);
+       destroy_workqueue(mp->m_reclaim_workqueue);
         destroy_workqueue(mp->m_cil_workqueue);
         destroy_workqueue(mp->m_data_workqueue);
         destroy_workqueue(mp->m_unwritten_workqueue);
  }
  
+/*
+ * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK
+ * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting
+ * for IO to complete so that we effectively throttle multiple callers to the
+ * rate at which IO is completing.
+ */
+void
+xfs_flush_inodes(
+       struct xfs_mount        *mp)
+{
+       struct super_block      *sb = mp->m_super;
+
+       if (down_read_trylock(&sb->s_umount)) {
+               sync_inodes_sb(sb);
+               up_read(&sb->s_umount);
+       }
+}
+
  /* Catch misguided souls that try to use this interface on XFS */
  STATIC struct inode *
  xfs_fs_alloc_inode(
@@ -1005,9 +1040,6 @@ xfs_fs_put_super(
  {
         struct xfs_mount        *mp = XFS_M(sb);
  
-       cancel_delayed_work_sync(&mp->m_sync_work);
-       cancel_work_sync(&mp->m_flush_work);
-
         xfs_filestream_unmount(mp);
         xfs_unmountfs(mp);
  
@@ -1025,7 +1057,6 @@ xfs_fs_sync_fs(
         int                     wait)
  {
         struct xfs_mount        *mp = XFS_M(sb);
-       int                     error;
  
         /*
          * Doing anything during the async pass would be counterproductive.
@@ -1033,17 +1064,14 @@ xfs_fs_sync_fs(
         if (!wait)
                 return 0;
  
-       error = xfs_quiesce_data(mp);
-       if (error)
-               return -error;
-
+       xfs_log_force(mp, XFS_LOG_SYNC);
         if (laptop_mode) {
                 /*
                  * The disk must be active because we're syncing.
-                * We schedule xfssyncd now (now that the disk is
+                * We schedule log work now (now that the disk is
                  * active) instead of later (when it might not be).
                  */
-               flush_delayed_work(&mp->m_sync_work);
+               flush_delayed_work(&mp->m_log->l_work);
         }
  
         return 0;
@@ -1120,6 +1148,48 @@ xfs_restore_resvblks(struct xfs_mount *mp)
         xfs_reserve_blocks(mp, &resblks, NULL);
  }
  
+/*
+ * Trigger writeback of all the dirty metadata in the file system.
+ *
+ * This ensures that the metadata is written to their location on disk rather
+ * than just existing in transactions in the log. This means after a quiesce
+ * there is no log replay required to write the inodes to disk - this is the
+ * primary difference between a sync and a quiesce.
+ *
+ * Note: xfs_log_quiesce() stops background log work - the callers must ensure
+ * it is started again when appropriate.
+ */
+void
+xfs_quiesce_attr(
+       struct xfs_mount        *mp)
+{
+       int     error = 0;
+
+       /* wait for all modifications to complete */
+       while (atomic_read(&mp->m_active_trans) > 0)
+               delay(100);
+
+       /* force the log to unpin objects from the now complete transactions */
+       xfs_log_force(mp, XFS_LOG_SYNC);
+
+       /* reclaim inodes to do any IO before the freeze completes */
+       xfs_reclaim_inodes(mp, 0);
+       xfs_reclaim_inodes(mp, SYNC_WAIT);
+
+       /* Push the superblock and write an unmount record */
+       error = xfs_log_sbcount(mp);
+       if (error)
+               xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
+                               "Frozen image may not be consistent.");
+       /*
+        * Just warn here till VFS can correctly support
+        * read-only remount without racing.
+        */
+       WARN_ON(atomic_read(&mp->m_active_trans) != 0);
+
+       xfs_log_quiesce(mp);
+}
+
  STATIC int
  xfs_fs_remount(
         struct super_block      *sb,
@@ -1200,21 +1270,18 @@ xfs_fs_remount(
                  * value if it is non-zero, otherwise go with the default.
                  */
                 xfs_restore_resvblks(mp);
-               xfs_syncd_queue_sync(mp);
+               xfs_log_work_queue(mp);
         }
  
         /* rw -> ro */
         if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
                 /*
-                * After we have synced the data but before we sync the
-                * metadata, we need to free up the reserve block pool so that
-                * the used block count in the superblock on disk is correct at
-                * the end of the remount. Stash the current reserve pool size
-                * so that if we get remounted rw, we can return it to the same
-                * size.
+                * Before we sync the metadata, we need to free up the reserve
+                * block pool so that the used block count in the superblock on
+                * disk is correct at the end of the remount. Stash the current
+                * reserve pool size so that if we get remounted rw, we can
+                * return it to the same size.
                  */
-
-               xfs_quiesce_data(mp);
                 xfs_save_resvblks(mp);
                 xfs_quiesce_attr(mp);
                 mp->m_flags |= XFS_MOUNT_RDONLY;
@@ -1246,7 +1313,7 @@ xfs_fs_unfreeze(
         struct xfs_mount        *mp = XFS_M(sb);
  
         xfs_restore_resvblks(mp);
-       xfs_syncd_queue_sync(mp);
+       xfs_log_work_queue(mp);
         return 0;
  }
  
@@ -1325,8 +1392,6 @@ xfs_fs_fill_super(
         spin_lock_init(&mp->m_sb_lock);
         mutex_init(&mp->m_growlock);
         atomic_set(&mp->m_active_trans, 0);
-       INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
-       INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
         INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
  
         mp->m_super = sb;
@@ -1378,10 +1443,6 @@ xfs_fs_fill_super(
         /*
          * we must configure the block size in the superblock before we run the
          * full mount process as the mount process can lookup and cache inodes.
-        * For the same reason we must also initialise the syncd and register
-        * the inode cache shrinker so that inodes can be reclaimed during
-        * operations like a quotacheck that iterate all inodes in the
-        * filesystem.
          */
         sb->s_magic = XFS_SB_MAGIC;
         sb->s_blocksize = mp->m_sb.sb_blocksize;
@@ -1410,12 +1471,6 @@ xfs_fs_fill_super(
                 goto out_unmount;
         }
  
-       /*
-        * The filesystem is successfully mounted, so we can start background
-        * sync work now.
-        */
-       xfs_syncd_queue_sync(mp);
-
         return 0;
  
   out_filestream_unmount:
@@ -1631,16 +1686,6 @@ xfs_destroy_zones(void)
  STATIC int __init
  xfs_init_workqueues(void)
  {
-       /*
-        * We never want to the same work item to run twice, reclaiming inodes
-        * or idling the log is not going to get any faster by multiple CPUs
-        * competing for ressources.  Use the default large max_active value
-        * so that even lots of filesystems can perform these task in parallel.
-        */
-       xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_NON_REENTRANT, 0);
-       if (!xfs_syncd_wq)
-               return -ENOMEM;
-
         /*
          * The allocation workqueue can be used in memory reclaim situations
          * (writepage path), and parallelism is only limited by the number of
@@ -1649,20 +1694,15 @@ xfs_init_workqueues(void)
          */
         xfs_alloc_wq = alloc_workqueue("xfsalloc", WQ_MEM_RECLAIM, 0);
         if (!xfs_alloc_wq)
-               goto out_destroy_syncd;
+               return -ENOMEM;
  
         return 0;
-
-out_destroy_syncd:
-       destroy_workqueue(xfs_syncd_wq);
-       return -ENOMEM;
  }
  
  STATIC void
  xfs_destroy_workqueues(void)
  {
         destroy_workqueue(xfs_alloc_wq);
-       destroy_workqueue(xfs_syncd_wq);
  }
  
  STATIC int __init