| 1 | /* |
| 2 | * linux/fs/ext4/fsync.c |
| 3 | * |
| 4 | * Copyright (C) 1993 Stephen Tweedie (sct@redhat.com) |
| 5 | * from |
| 6 | * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) |
| 7 | * Laboratoire MASI - Institut Blaise Pascal |
| 8 | * Universite Pierre et Marie Curie (Paris VI) |
| 9 | * from |
| 10 | * linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds |
| 11 | * |
| 12 | * ext4fs fsync primitive |
| 13 | * |
| 14 | * Big-endian to little-endian byte-swapping/bitmaps by |
| 15 | * David S. Miller (davem@caip.rutgers.edu), 1995 |
| 16 | * |
| 17 | * Removed unnecessary code duplication for little endian machines |
| 18 | * and excessive __inline__s. |
| 19 | * Andi Kleen, 1997 |
| 20 | * |
| 21 | * Major simplications and cleanup - we only need to do the metadata, because |
| 22 | * we can depend on generic_block_fdatasync() to sync the data blocks. |
| 23 | */ |
| 24 | |
| 25 | #include <linux/time.h> |
| 26 | #include <linux/fs.h> |
| 27 | #include <linux/sched.h> |
| 28 | #include <linux/writeback.h> |
| 29 | #include <linux/blkdev.h> |
| 30 | |
| 31 | #include "ext4.h" |
| 32 | #include "ext4_jbd2.h" |
| 33 | |
| 34 | #include <trace/events/ext4.h> |
| 35 | |
| 36 | /* |
| 37 | * If we're not journaling and this is a just-created file, we have to |
| 38 | * sync our parent directory (if it was freshly created) since |
| 39 | * otherwise it will only be written by writeback, leaving a huge |
| 40 | * window during which a crash may lose the file. This may apply for |
| 41 | * the parent directory's parent as well, and so on recursively, if |
| 42 | * they are also freshly created. |
| 43 | */ |
| 44 | static int ext4_sync_parent(struct inode *inode) |
| 45 | { |
| 46 | struct dentry *dentry = NULL; |
| 47 | struct inode *next; |
| 48 | int ret = 0; |
| 49 | |
| 50 | if (!ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) |
| 51 | return 0; |
| 52 | inode = igrab(inode); |
| 53 | while (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { |
| 54 | ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); |
| 55 | dentry = d_find_any_alias(inode); |
| 56 | if (!dentry) |
| 57 | break; |
| 58 | next = igrab(d_inode(dentry->d_parent)); |
| 59 | dput(dentry); |
| 60 | if (!next) |
| 61 | break; |
| 62 | iput(inode); |
| 63 | inode = next; |
| 64 | /* |
| 65 | * The directory inode may have gone through rmdir by now. But |
| 66 | * the inode itself and its blocks are still allocated (we hold |
| 67 | * a reference to the inode so it didn't go through |
| 68 | * ext4_evict_inode()) and so we are safe to flush metadata |
| 69 | * blocks and the inode. |
| 70 | */ |
| 71 | ret = sync_mapping_buffers(inode->i_mapping); |
| 72 | if (ret) |
| 73 | break; |
| 74 | ret = sync_inode_metadata(inode, 1); |
| 75 | if (ret) |
| 76 | break; |
| 77 | } |
| 78 | iput(inode); |
| 79 | return ret; |
| 80 | } |
| 81 | |
| 82 | /* |
| 83 | * akpm: A new design for ext4_sync_file(). |
| 84 | * |
| 85 | * This is only called from sys_fsync(), sys_fdatasync() and sys_msync(). |
| 86 | * There cannot be a transaction open by this task. |
| 87 | * Another task could have dirtied this inode. Its data can be in any |
| 88 | * state in the journalling system. |
| 89 | * |
| 90 | * What we do is just kick off a commit and wait on it. This will snapshot the |
| 91 | * inode to disk. |
| 92 | */ |
| 93 | |
| 94 | int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) |
| 95 | { |
| 96 | struct inode *inode = file->f_mapping->host; |
| 97 | struct ext4_inode_info *ei = EXT4_I(inode); |
| 98 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; |
| 99 | int ret = 0, err; |
| 100 | tid_t commit_tid; |
| 101 | bool needs_barrier = false; |
| 102 | |
| 103 | J_ASSERT(ext4_journal_current_handle() == NULL); |
| 104 | |
| 105 | trace_ext4_sync_file_enter(file, datasync); |
| 106 | |
| 107 | if (inode->i_sb->s_flags & MS_RDONLY) { |
| 108 | /* Make sure that we read updated s_mount_flags value */ |
| 109 | smp_rmb(); |
| 110 | if (EXT4_SB(inode->i_sb)->s_mount_flags & EXT4_MF_FS_ABORTED) |
| 111 | ret = -EROFS; |
| 112 | goto out; |
| 113 | } |
| 114 | |
| 115 | if (!journal) { |
| 116 | ret = __generic_file_fsync(file, start, end, datasync); |
| 117 | if (!ret) |
| 118 | ret = ext4_sync_parent(inode); |
| 119 | if (test_opt(inode->i_sb, BARRIER)) |
| 120 | goto issue_flush; |
| 121 | goto out; |
| 122 | } |
| 123 | |
| 124 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); |
| 125 | if (ret) |
| 126 | return ret; |
| 127 | /* |
| 128 | * data=writeback,ordered: |
| 129 | * The caller's filemap_fdatawrite()/wait will sync the data. |
| 130 | * Metadata is in the journal, we wait for proper transaction to |
| 131 | * commit here. |
| 132 | * |
| 133 | * data=journal: |
| 134 | * filemap_fdatawrite won't do anything (the buffers are clean). |
| 135 | * ext4_force_commit will write the file data into the journal and |
| 136 | * will wait on that. |
| 137 | * filemap_fdatawait() will encounter a ton of newly-dirtied pages |
| 138 | * (they were dirtied by commit). But that's OK - the blocks are |
| 139 | * safe in-journal, which is all fsync() needs to ensure. |
| 140 | */ |
| 141 | if (ext4_should_journal_data(inode)) { |
| 142 | ret = ext4_force_commit(inode->i_sb); |
| 143 | goto out; |
| 144 | } |
| 145 | |
| 146 | commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; |
| 147 | if (journal->j_flags & JBD2_BARRIER && |
| 148 | !jbd2_trans_will_send_data_barrier(journal, commit_tid)) |
| 149 | needs_barrier = true; |
| 150 | ret = jbd2_complete_transaction(journal, commit_tid); |
| 151 | if (needs_barrier) { |
| 152 | issue_flush: |
| 153 | err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); |
| 154 | if (!ret) |
| 155 | ret = err; |
| 156 | } |
| 157 | out: |
| 158 | trace_ext4_sync_file_exit(inode, ret); |
| 159 | return ret; |
| 160 | } |