xfs: optimise away log forces on timestamp updates for fdatasync

[deliverable/linux.git] / fs / xfs / xfs_file.c
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c

index de2c2376242bc3fe464fc759a097cb64158b9435..c94699cbc667fd64954ca3ea2d2fe100e872a1c9 100644 (file)
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -242,19 +242,30 @@ xfs_file_fsync(
         }
  
         /*
-        * All metadata updates are logged, which means that we just have
-        * to flush the log up to the latest LSN that touched the inode.
+        * All metadata updates are logged, which means that we just have to
+        * flush the log up to the latest LSN that touched the inode. If we have
+        * concurrent fsync/fdatasync() calls, we need them to all block on the
+        * log force before we clear the ili_fsync_fields field. This ensures
+        * that we don't get a racing sync operation that does not wait for the
+        * metadata to hit the journal before returning. If we race with
+        * clearing the ili_fsync_fields, then all that will happen is the log
+        * force will do nothing as the lsn will already be on disk. We can't
+        * race with setting ili_fsync_fields because that is done under
+        * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared
+        * until after the ili_fsync_fields is cleared.
          */
         xfs_ilock(ip, XFS_ILOCK_SHARED);
         if (xfs_ipincount(ip)) {
                 if (!datasync ||
-                   (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP))
+                   (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
                         lsn = ip->i_itemp->ili_last_lsn;
         }
-       xfs_iunlock(ip, XFS_ILOCK_SHARED);
  
-       if (lsn)
+       if (lsn) {
                 error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
+               ip->i_itemp->ili_fsync_fields = 0;
+       }
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
  
         /*
          * If we only have a single device, and the log force about was
@@ -1546,8 +1557,36 @@ xfs_filemap_fault(
         return ret;
  }
  
+STATIC int
+xfs_filemap_pmd_fault(
+       struct vm_area_struct   *vma,
+       unsigned long           addr,
+       pmd_t                   *pmd,
+       unsigned int            flags)
+{
+       struct inode            *inode = file_inode(vma->vm_file);
+       struct xfs_inode        *ip = XFS_I(inode);
+       int                     ret;
+
+       if (!IS_DAX(inode))
+               return VM_FAULT_FALLBACK;
+
+       trace_xfs_filemap_pmd_fault(ip);
+
+       sb_start_pagefault(inode->i_sb);
+       file_update_time(vma->vm_file);
+       xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+       ret = __dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_direct,
+                                   xfs_end_io_dax_write);
+       xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+       sb_end_pagefault(inode->i_sb);
+
+       return ret;
+}
+
  static const struct vm_operations_struct xfs_file_vm_ops = {
         .fault          = xfs_filemap_fault,
+       .pmd_fault      = xfs_filemap_pmd_fault,
         .map_pages      = filemap_map_pages,
         .page_mkwrite   = xfs_filemap_page_mkwrite,
  };
@@ -1560,7 +1599,7 @@ xfs_file_mmap(
         file_accessed(filp);
         vma->vm_ops = &xfs_file_vm_ops;
         if (IS_DAX(file_inode(filp)))
-               vma->vm_flags |= VM_MIXEDMAP;
+               vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
         return 0;
  }