ext4: fix ext4_writepages() in presence of truncate
[deliverable/linux.git] / fs / ext4 / inode.c
index 0188e65e1f589efbf1be3726ad1cc1a2236a0739..19fa2e076275952779a443ab1ee5656a11c29a88 100644 (file)
@@ -465,7 +465,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
        if (es_map->m_lblk != map->m_lblk ||
            es_map->m_flags != map->m_flags ||
            es_map->m_pblk != map->m_pblk) {
-               printk("ES cache assertation failed for inode: %lu "
+               printk("ES cache assertion failed for inode: %lu "
                       "es_cached ex [%d/%d/%llu/%x] != "
                       "found ex [%d/%d/%llu/%x] retval %d flags %x\n",
                       inode->i_ino, es_map->m_lblk, es_map->m_len,
@@ -514,10 +514,9 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
                  "logical block %lu\n", inode->i_ino, flags, map->m_len,
                  (unsigned long) map->m_lblk);
 
-       ext4_es_lru_add(inode);
-
        /* Lookup extent status tree firstly */
        if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
+               ext4_es_lru_add(inode);
                if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
                        map->m_pblk = ext4_es_pblock(&es) +
                                        map->m_lblk - es.es_lblk;
@@ -554,16 +553,15 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
        }
        if (retval > 0) {
                int ret;
-               unsigned long long status;
+               unsigned int status;
 
-#ifdef ES_AGGRESSIVE_TEST
-               if (retval != map->m_len) {
-                       printk("ES len assertation failed for inode: %lu "
-                              "retval %d != map->m_len %d "
-                              "in %s (lookup)\n", inode->i_ino, retval,
-                              map->m_len, __func__);
+               if (unlikely(retval != map->m_len)) {
+                       ext4_warning(inode->i_sb,
+                                    "ES len assertion failed for inode "
+                                    "%lu: retval %d != map->m_len %d",
+                                    inode->i_ino, retval, map->m_len);
+                       WARN_ON(1);
                }
-#endif
 
                status = map->m_flags & EXT4_MAP_UNWRITTEN ?
                                EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
@@ -655,16 +653,15 @@ found:
 
        if (retval > 0) {
                int ret;
-               unsigned long long status;
+               unsigned int status;
 
-#ifdef ES_AGGRESSIVE_TEST
-               if (retval != map->m_len) {
-                       printk("ES len assertation failed for inode: %lu "
-                              "retval %d != map->m_len %d "
-                              "in %s (allocation)\n", inode->i_ino, retval,
-                              map->m_len, __func__);
+               if (unlikely(retval != map->m_len)) {
+                       ext4_warning(inode->i_sb,
+                                    "ES len assertion failed for inode "
+                                    "%lu: retval %d != map->m_len %d",
+                                    inode->i_ino, retval, map->m_len);
+                       WARN_ON(1);
                }
-#endif
 
                /*
                 * If the extent has been zeroed out, we don't need to update
@@ -1529,11 +1526,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
                  "logical block %lu\n", inode->i_ino, map->m_len,
                  (unsigned long) map->m_lblk);
 
-       ext4_es_lru_add(inode);
-
        /* Lookup extent status tree firstly */
        if (ext4_es_lookup_extent(inode, iblock, &es)) {
-
+               ext4_es_lru_add(inode);
                if (ext4_es_is_hole(&es)) {
                        retval = 0;
                        down_read((&EXT4_I(inode)->i_data_sem));
@@ -1638,16 +1633,15 @@ add_delayed:
                set_buffer_delay(bh);
        } else if (retval > 0) {
                int ret;
-               unsigned long long status;
+               unsigned int status;
 
-#ifdef ES_AGGRESSIVE_TEST
-               if (retval != map->m_len) {
-                       printk("ES len assertation failed for inode: %lu "
-                              "retval %d != map->m_len %d "
-                              "in %s (lookup)\n", inode->i_ino, retval,
-                              map->m_len, __func__);
+               if (unlikely(retval != map->m_len)) {
+                       ext4_warning(inode->i_sb,
+                                    "ES len assertion failed for inode "
+                                    "%lu: retval %d != map->m_len %d",
+                                    inode->i_ino, retval, map->m_len);
+                       WARN_ON(1);
                }
-#endif
 
                status = map->m_flags & EXT4_MAP_UNWRITTEN ?
                                EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
@@ -1896,6 +1890,26 @@ static int ext4_writepage(struct page *page,
        return ret;
 }
 
+static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
+{
+       int len;
+       loff_t size = i_size_read(mpd->inode);
+       int err;
+
+       BUG_ON(page->index != mpd->first_page);
+       if (page->index == size >> PAGE_CACHE_SHIFT)
+               len = size & ~PAGE_CACHE_MASK;
+       else
+               len = PAGE_CACHE_SIZE;
+       clear_page_dirty_for_io(page);
+       err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
+       if (!err)
+               mpd->wbc->nr_to_write--;
+       mpd->first_page++;
+
+       return err;
+}
+
 #define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay))
 
 /*
@@ -1910,82 +1924,94 @@ static int ext4_writepage(struct page *page,
  *
  * @mpd - extent of blocks
  * @lblk - logical number of the block in the file
- * @b_state - b_state of the buffer head added
+ * @bh - buffer head we want to add to the extent
  *
- * the function is used to collect contig. blocks in same state
+ * The function is used to collect contig. blocks in the same state. If the
+ * buffer doesn't require mapping for writeback and we haven't started the
+ * extent of buffers to map yet, the function returns 'true' immediately - the
+ * caller can write the buffer right away. Otherwise the function returns true
+ * if the block has been added to the extent, false if the block couldn't be
+ * added.
  */
-static int mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk,
-                                 unsigned long b_state)
+static bool mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk,
+                                  struct buffer_head *bh)
 {
        struct ext4_map_blocks *map = &mpd->map;
 
-       /* Don't go larger than mballoc is willing to allocate */
-       if (map->m_len >= MAX_WRITEPAGES_EXTENT_LEN)
-               return 0;
+       /* Buffer that doesn't need mapping for writeback? */
+       if (!buffer_dirty(bh) || !buffer_mapped(bh) ||
+           (!buffer_delay(bh) && !buffer_unwritten(bh))) {
+               /* So far no extent to map => we write the buffer right away */
+               if (map->m_len == 0)
+                       return true;
+               return false;
+       }
 
        /* First block in the extent? */
        if (map->m_len == 0) {
                map->m_lblk = lblk;
                map->m_len = 1;
-               map->m_flags = b_state & BH_FLAGS;
-               return 1;
+               map->m_flags = bh->b_state & BH_FLAGS;
+               return true;
        }
 
+       /* Don't go larger than mballoc is willing to allocate */
+       if (map->m_len >= MAX_WRITEPAGES_EXTENT_LEN)
+               return false;
+
        /* Can we merge the block to our big extent? */
        if (lblk == map->m_lblk + map->m_len &&
-           (b_state & BH_FLAGS) == map->m_flags) {
+           (bh->b_state & BH_FLAGS) == map->m_flags) {
                map->m_len++;
-               return 1;
+               return true;
        }
-       return 0;
+       return false;
 }
 
-static bool add_page_bufs_to_extent(struct mpage_da_data *mpd,
-                                   struct buffer_head *head,
-                                   struct buffer_head *bh,
-                                   ext4_lblk_t lblk)
+/*
+ * mpage_process_page_bufs - submit page buffers for IO or add them to extent
+ *
+ * @mpd - extent of blocks for mapping
+ * @head - the first buffer in the page
+ * @bh - buffer we should start processing from
+ * @lblk - logical number of the block in the file corresponding to @bh
+ *
+ * Walk through page buffers from @bh upto @head (exclusive) and either submit
+ * the page for IO if all buffers in this page were mapped and there's no
+ * accumulated extent of buffers to map or add buffers in the page to the
+ * extent of buffers to map. The function returns 1 if the caller can continue
+ * by processing the next page, 0 if it should stop adding buffers to the
+ * extent to map because we cannot extend it anymore. It can also return value
+ * < 0 in case of error during IO submission.
+ */
+static int mpage_process_page_bufs(struct mpage_da_data *mpd,
+                                  struct buffer_head *head,
+                                  struct buffer_head *bh,
+                                  ext4_lblk_t lblk)
 {
        struct inode *inode = mpd->inode;
+       int err;
        ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
                                                        >> inode->i_blkbits;
 
        do {
                BUG_ON(buffer_locked(bh));
 
-               if (!buffer_dirty(bh) || !buffer_mapped(bh) ||
-                   (!buffer_delay(bh) && !buffer_unwritten(bh)) ||
-                   lblk >= blocks) {
+               if (lblk >= blocks || !mpage_add_bh_to_extent(mpd, lblk, bh)) {
                        /* Found extent to map? */
                        if (mpd->map.m_len)
-                               return false;
-                       if (lblk >= blocks)
-                               return true;
-                       continue;
+                               return 0;
+                       /* Everything mapped so far and we hit EOF */
+                       break;
                }
-               if (!mpage_add_bh_to_extent(mpd, lblk, bh->b_state))
-                       return false;
        } while (lblk++, (bh = bh->b_this_page) != head);
-       return true;
-}
-
-static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
-{
-       int len;
-       loff_t size = i_size_read(mpd->inode);
-       int err;
-
-       BUG_ON(page->index != mpd->first_page);
-       if (page->index == size >> PAGE_CACHE_SHIFT)
-               len = size & ~PAGE_CACHE_MASK;
-       else
-               len = PAGE_CACHE_SIZE;
-       clear_page_dirty_for_io(page);
-       err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
-       if (!err)
-               mpd->wbc->nr_to_write--;
-       mpd->first_page++;
-
-       return err;
+       /* So far everything mapped? Submit the page for IO. */
+       if (mpd->map.m_len == 0) {
+               err = mpage_submit_page(mpd, head->b_page);
+               if (err < 0)
+                       return err;
+       }
+       return lblk < blocks;
 }
 
 /*
@@ -2009,8 +2035,6 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
        struct inode *inode = mpd->inode;
        struct buffer_head *head, *bh;
        int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits;
-       ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
-                                                       >> inode->i_blkbits;
        pgoff_t start, end;
        ext4_lblk_t lblk;
        sector_t pblock;
@@ -2045,18 +2069,26 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
                                         */
                                        mpd->map.m_len = 0;
                                        mpd->map.m_flags = 0;
-                                       add_page_bufs_to_extent(mpd, head, bh,
-                                                               lblk);
+                                       /*
+                                        * FIXME: If dioread_nolock supports
+                                        * blocksize < pagesize, we need to make
+                                        * sure we add size mapped so far to
+                                        * io_end->size as the following call
+                                        * can submit the page for IO.
+                                        */
+                                       err = mpage_process_page_bufs(mpd, head,
+                                                                     bh, lblk);
                                        pagevec_release(&pvec);
-                                       return 0;
+                                       if (err > 0)
+                                               err = 0;
+                                       return err;
                                }
                                if (buffer_delay(bh)) {
                                        clear_buffer_delay(bh);
                                        bh->b_blocknr = pblock++;
                                }
                                clear_buffer_unwritten(bh);
-                       } while (++lblk < blocks &&
-                                (bh = bh->b_this_page) != head);
+                       } while (lblk++, (bh = bh->b_this_page) != head);
 
                        /*
                         * FIXME: This is going to break if dioread_nolock
@@ -2163,7 +2195,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
 
        mpd->io_submit.io_end->offset =
                                ((loff_t)map->m_lblk) << inode->i_blkbits;
-       while (map->m_len) {
+       do {
                err = mpage_map_one_extent(handle, mpd);
                if (err < 0) {
                        struct super_block *sb = inode->i_sb;
@@ -2201,7 +2233,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
                err = mpage_map_and_submit_buffers(mpd);
                if (err < 0)
                        return err;
-       }
+       } while (map->m_len);
 
        /* Update on-disk size after IO is submitted */
        disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
@@ -2325,14 +2357,10 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
                        lblk = ((ext4_lblk_t)page->index) <<
                                (PAGE_CACHE_SHIFT - blkbits);
                        head = page_buffers(page);
-                       if (!add_page_bufs_to_extent(mpd, head, head, lblk))
+                       err = mpage_process_page_bufs(mpd, head, head, lblk);
+                       if (err <= 0)
                                goto out;
-                       /* So far everything mapped? Submit the page for IO. */
-                       if (mpd->map.m_len == 0) {
-                               err = mpage_submit_page(mpd, page);
-                               if (err < 0)
-                                       goto out;
-                       }
+                       err = 0;
 
                        /*
                         * Accumulated enough dirty pages? This doesn't apply
@@ -3539,6 +3567,18 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
                   offset;
        }
 
+       if (offset & (sb->s_blocksize - 1) ||
+           (offset + length) & (sb->s_blocksize - 1)) {
+               /*
+                * Attach jinode to inode for jbd2 if we do any zeroing of
+                * partial block
+                */
+               ret = ext4_inode_attach_jinode(inode);
+               if (ret < 0)
+                       goto out_mutex;
+
+       }
+
        first_block_offset = round_up(offset, sb->s_blocksize);
        last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
 
@@ -3607,6 +3647,31 @@ out_mutex:
        return ret;
 }
 
+int ext4_inode_attach_jinode(struct inode *inode)
+{
+       struct ext4_inode_info *ei = EXT4_I(inode);
+       struct jbd2_inode *jinode;
+
+       if (ei->jinode || !EXT4_SB(inode->i_sb)->s_journal)
+               return 0;
+
+       jinode = jbd2_alloc_inode(GFP_KERNEL);
+       spin_lock(&inode->i_lock);
+       if (!ei->jinode) {
+               if (!jinode) {
+                       spin_unlock(&inode->i_lock);
+                       return -ENOMEM;
+               }
+               ei->jinode = jinode;
+               jbd2_journal_init_jbd_inode(ei->jinode, inode);
+               jinode = NULL;
+       }
+       spin_unlock(&inode->i_lock);
+       if (unlikely(jinode != NULL))
+               jbd2_free_inode(jinode);
+       return 0;
+}
+
 /*
  * ext4_truncate()
  *
@@ -3667,6 +3732,12 @@ void ext4_truncate(struct inode *inode)
                        return;
        }
 
+       /* If we zero-out tail of the page, we have to create jinode for jbd2 */
+       if (inode->i_size & (inode->i_sb->s_blocksize - 1)) {
+               if (ext4_inode_attach_jinode(inode) < 0)
+                       return;
+       }
+
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
                credits = ext4_writepage_trans_blocks(inode);
        else
This page took 0.0301 seconds and 5 git commands to generate.