xfs: optimise away log forces on timestamp updates for fdatasync
[deliverable/linux.git] / fs / xfs / xfs_file.c
index db4acc1c3e73479cdf32322944a7fa1bcf34b96f..c94699cbc667fd64954ca3ea2d2fe100e872a1c9 100644 (file)
@@ -242,19 +242,30 @@ xfs_file_fsync(
        }
 
        /*
-        * All metadata updates are logged, which means that we just have
-        * to flush the log up to the latest LSN that touched the inode.
+        * All metadata updates are logged, which means that we just have to
+        * flush the log up to the latest LSN that touched the inode. If we have
+        * concurrent fsync/fdatasync() calls, we need them to all block on the
+        * log force before we clear the ili_fsync_fields field. This ensures
+        * that we don't get a racing sync operation that does not wait for the
+        * metadata to hit the journal before returning. If we race with
+        * clearing the ili_fsync_fields, then all that will happen is the log
+        * force will do nothing as the lsn will already be on disk. We can't
+        * race with setting ili_fsync_fields because that is done under
+        * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared
+        * until after the ili_fsync_fields is cleared.
         */
        xfs_ilock(ip, XFS_ILOCK_SHARED);
        if (xfs_ipincount(ip)) {
                if (!datasync ||
-                   (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP))
+                   (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
                        lsn = ip->i_itemp->ili_last_lsn;
        }
-       xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
-       if (lsn)
+       if (lsn) {
                error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
+               ip->i_itemp->ili_fsync_fields = 0;
+       }
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
        /*
         * If we only have a single device, and the log force about was
@@ -317,24 +328,33 @@ xfs_file_read_iter(
                return -EIO;
 
        /*
-        * Locking is a bit tricky here. If we take an exclusive lock
-        * for direct IO, we effectively serialise all new concurrent
-        * read IO to this file and block it behind IO that is currently in
-        * progress because IO in progress holds the IO lock shared. We only
-        * need to hold the lock exclusive to blow away the page cache, so
-        * only take lock exclusively if the page cache needs invalidation.
-        * This allows the normal direct IO case of no page cache pages to
-        * proceeed concurrently without serialisation.
+        * Locking is a bit tricky here. If we take an exclusive lock for direct
+        * IO, we effectively serialise all new concurrent read IO to this file
+        * and block it behind IO that is currently in progress because IO in
+        * progress holds the IO lock shared. We only need to hold the lock
+        * exclusive to blow away the page cache, so only take lock exclusively
+        * if the page cache needs invalidation. This allows the normal direct
+        * IO case of no page cache pages to proceeed concurrently without
+        * serialisation.
         */
        xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
        if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) {
                xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
                xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
 
+               /*
+                * The generic dio code only flushes the range of the particular
+                * I/O. Because we take an exclusive lock here, this whole
+                * sequence is considerably more expensive for us. This has a
+                * noticeable performance impact for any file with cached pages,
+                * even when outside of the range of the particular I/O.
+                *
+                * Hence, amortize the cost of the lock against a full file
+                * flush and reduce the chances of repeated iolock cycles going
+                * forward.
+                */
                if (inode->i_mapping->nrpages) {
-                       ret = filemap_write_and_wait_range(
-                                                       VFS_I(ip)->i_mapping,
-                                                       pos, pos + size - 1);
+                       ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
                        if (ret) {
                                xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
                                return ret;
@@ -345,9 +365,7 @@ xfs_file_read_iter(
                         * we fail to invalidate a page, but this should never
                         * happen on XFS. Warn if it does fail.
                         */
-                       ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
-                                       pos >> PAGE_CACHE_SHIFT,
-                                       (pos + size - 1) >> PAGE_CACHE_SHIFT);
+                       ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
                        WARN_ON_ONCE(ret);
                        ret = 0;
                }
@@ -733,19 +751,19 @@ xfs_file_dio_aio_write(
        pos = iocb->ki_pos;
        end = pos + count - 1;
 
+       /*
+        * See xfs_file_read_iter() for why we do a full-file flush here.
+        */
        if (mapping->nrpages) {
-               ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
-                                                  pos, end);
+               ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
                if (ret)
                        goto out;
                /*
-                * Invalidate whole pages. This can return an error if
-                * we fail to invalidate a page, but this should never
-                * happen on XFS. Warn if it does fail.
+                * Invalidate whole pages. This can return an error if we fail
+                * to invalidate a page, but this should never happen on XFS.
+                * Warn if it does fail.
                 */
-               ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
-                                       pos >> PAGE_CACHE_SHIFT,
-                                       end >> PAGE_CACHE_SHIFT);
+               ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
                WARN_ON_ONCE(ret);
                ret = 0;
        }
@@ -1539,8 +1557,36 @@ xfs_filemap_fault(
        return ret;
 }
 
+STATIC int
+xfs_filemap_pmd_fault(
+       struct vm_area_struct   *vma,
+       unsigned long           addr,
+       pmd_t                   *pmd,
+       unsigned int            flags)
+{
+       struct inode            *inode = file_inode(vma->vm_file);
+       struct xfs_inode        *ip = XFS_I(inode);
+       int                     ret;
+
+       if (!IS_DAX(inode))
+               return VM_FAULT_FALLBACK;
+
+       trace_xfs_filemap_pmd_fault(ip);
+
+       sb_start_pagefault(inode->i_sb);
+       file_update_time(vma->vm_file);
+       xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+       ret = __dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_direct,
+                                   xfs_end_io_dax_write);
+       xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+       sb_end_pagefault(inode->i_sb);
+
+       return ret;
+}
+
 static const struct vm_operations_struct xfs_file_vm_ops = {
        .fault          = xfs_filemap_fault,
+       .pmd_fault      = xfs_filemap_pmd_fault,
        .map_pages      = filemap_map_pages,
        .page_mkwrite   = xfs_filemap_page_mkwrite,
 };
@@ -1553,7 +1599,7 @@ xfs_file_mmap(
        file_accessed(filp);
        vma->vm_ops = &xfs_file_vm_ops;
        if (IS_DAX(file_inode(filp)))
-               vma->vm_flags |= VM_MIXEDMAP;
+               vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
        return 0;
 }
 
This page took 0.027617 seconds and 5 git commands to generate.