jbd2: remove journal_head from descriptor buffers
[deliverable/linux.git] / fs / jbd2 / commit.c
CommitLineData
470decc6 1/*
f7f4bccb 2 * linux/fs/jbd2/commit.c
470decc6
DK
3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
5 *
6 * Copyright 1998 Red Hat corp --- All Rights Reserved
7 *
8 * This file is part of the Linux kernel and is made available under
9 * the terms of the GNU General Public License, version 2, or at your
10 * option, any later version, incorporated herein by reference.
11 *
12 * Journal commit routines for the generic filesystem journaling code;
13 * part of the ext2fs journaling system.
14 */
15
16#include <linux/time.h>
17#include <linux/fs.h>
f7f4bccb 18#include <linux/jbd2.h>
470decc6
DK
19#include <linux/errno.h>
20#include <linux/slab.h>
21#include <linux/mm.h>
22#include <linux/pagemap.h>
8e85fb3f 23#include <linux/jiffies.h>
818d276c 24#include <linux/crc32.h>
cd1aac32
AK
25#include <linux/writeback.h>
26#include <linux/backing-dev.h>
fd98496f 27#include <linux/bio.h>
0e3d2a63 28#include <linux/blkdev.h>
39e3ac25 29#include <linux/bitops.h>
879c5e6b 30#include <trace/events/jbd2.h>
470decc6
DK
31
32/*
33 * Default IO end handler for temporary BJ_IO buffer_heads.
34 */
35static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
36{
37 BUFFER_TRACE(bh, "");
38 if (uptodate)
39 set_buffer_uptodate(bh);
40 else
41 clear_buffer_uptodate(bh);
42 unlock_buffer(bh);
43}
44
45/*
87c89c23
JK
46 * When an ext4 file is truncated, it is possible that some pages are not
47 * successfully freed, because they are attached to a committing transaction.
470decc6
DK
48 * After the transaction commits, these pages are left on the LRU, with no
49 * ->mapping, and with attached buffers. These pages are trivially reclaimable
50 * by the VM, but their apparent absence upsets the VM accounting, and it makes
51 * the numbers in /proc/meminfo look odd.
52 *
53 * So here, we have a buffer which has just come off the forget list. Look to
54 * see if we can strip all buffers from the backing page.
55 *
56 * Called under lock_journal(), and possibly under journal_datalist_lock. The
57 * caller provided us with a ref against the buffer, and we drop that here.
58 */
59static void release_buffer_page(struct buffer_head *bh)
60{
61 struct page *page;
62
63 if (buffer_dirty(bh))
64 goto nope;
65 if (atomic_read(&bh->b_count) != 1)
66 goto nope;
67 page = bh->b_page;
68 if (!page)
69 goto nope;
70 if (page->mapping)
71 goto nope;
72
73 /* OK, it's a truncated page */
529ae9aa 74 if (!trylock_page(page))
470decc6
DK
75 goto nope;
76
77 page_cache_get(page);
78 __brelse(bh);
79 try_to_free_buffers(page);
80 unlock_page(page);
81 page_cache_release(page);
82 return;
83
84nope:
85 __brelse(bh);
86}
87
e5a120ae 88static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh)
1f56c589
DW
89{
90 struct commit_header *h;
91 __u32 csum;
92
93 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
94 return;
95
e5a120ae 96 h = (struct commit_header *)(bh->b_data);
1f56c589
DW
97 h->h_chksum_type = 0;
98 h->h_chksum_size = 0;
99 h->h_chksum[0] = 0;
e5a120ae 100 csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
1f56c589
DW
101 h->h_chksum[0] = cpu_to_be32(csum);
102}
103
818d276c
GS
104/*
105 * Done it all: now submit the commit record. We should have
470decc6
DK
106 * cleaned up our previous buffers by now, so if we are in abort
107 * mode we can now just skip the rest of the journal write
108 * entirely.
109 *
110 * Returns 1 if the journal needs to be aborted or 0 on success
111 */
818d276c
GS
112static int journal_submit_commit_record(journal_t *journal,
113 transaction_t *commit_transaction,
114 struct buffer_head **cbh,
115 __u32 crc32_sum)
470decc6 116{
818d276c 117 struct commit_header *tmp;
470decc6 118 struct buffer_head *bh;
818d276c 119 int ret;
736603ab 120 struct timespec now = current_kernel_time();
470decc6 121
6cba611e
ZH
122 *cbh = NULL;
123
470decc6
DK
124 if (is_journal_aborted(journal))
125 return 0;
126
e5a120ae
JK
127 bh = jbd2_journal_get_descriptor_buffer(journal);
128 if (!bh)
470decc6
DK
129 return 1;
130
818d276c
GS
131 tmp = (struct commit_header *)bh->b_data;
132 tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
133 tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK);
134 tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
736603ab
TT
135 tmp->h_commit_sec = cpu_to_be64(now.tv_sec);
136 tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec);
818d276c
GS
137
138 if (JBD2_HAS_COMPAT_FEATURE(journal,
139 JBD2_FEATURE_COMPAT_CHECKSUM)) {
140 tmp->h_chksum_type = JBD2_CRC32_CHKSUM;
141 tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE;
142 tmp->h_chksum[0] = cpu_to_be32(crc32_sum);
470decc6 143 }
e5a120ae 144 jbd2_commit_block_csum_set(journal, bh);
470decc6 145
e5a120ae 146 BUFFER_TRACE(bh, "submit commit block");
818d276c 147 lock_buffer(bh);
45a90bfd 148 clear_buffer_dirty(bh);
818d276c
GS
149 set_buffer_uptodate(bh);
150 bh->b_end_io = journal_end_buffer_io_sync;
151
152 if (journal->j_flags & JBD2_BARRIER &&
0e3d2a63 153 !JBD2_HAS_INCOMPAT_FEATURE(journal,
9c35575b 154 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT))
721a9602 155 ret = submit_bh(WRITE_SYNC | WRITE_FLUSH_FUA, bh);
9c35575b 156 else
721a9602 157 ret = submit_bh(WRITE_SYNC, bh);
9c35575b 158
818d276c
GS
159 *cbh = bh;
160 return ret;
161}
162
163/*
164 * This function along with journal_submit_commit_record
165 * allows to write the commit record asynchronously.
166 */
fd98496f
TT
167static int journal_wait_on_commit_record(journal_t *journal,
168 struct buffer_head *bh)
818d276c
GS
169{
170 int ret = 0;
171
172 clear_buffer_dirty(bh);
173 wait_on_buffer(bh);
470decc6 174
818d276c
GS
175 if (unlikely(!buffer_uptodate(bh)))
176 ret = -EIO;
177 put_bh(bh); /* One for getblk() */
818d276c
GS
178
179 return ret;
470decc6
DK
180}
181
cd1aac32
AK
182/*
183 * write the filemap data using writepage() address_space_operations.
184 * We don't do block allocation here even for delalloc. We don't
185 * use writepages() because with dealyed allocation we may be doing
186 * block allocation in writepages().
187 */
188static int journal_submit_inode_data_buffers(struct address_space *mapping)
189{
190 int ret;
191 struct writeback_control wbc = {
192 .sync_mode = WB_SYNC_ALL,
193 .nr_to_write = mapping->nrpages * 2,
194 .range_start = 0,
195 .range_end = i_size_read(mapping->host),
cd1aac32
AK
196 };
197
198 ret = generic_writepages(mapping, &wbc);
199 return ret;
200}
201
c851ed54
JK
202/*
203 * Submit all the data buffers of inode associated with the transaction to
204 * disk.
205 *
206 * We are in a committing transaction. Therefore no new inode can be added to
207 * our inode list. We use JI_COMMIT_RUNNING flag to protect inode we currently
208 * operate on from being released while we write out pages.
209 */
cd1aac32 210static int journal_submit_data_buffers(journal_t *journal,
c851ed54
JK
211 transaction_t *commit_transaction)
212{
213 struct jbd2_inode *jinode;
214 int err, ret = 0;
215 struct address_space *mapping;
216
217 spin_lock(&journal->j_list_lock);
218 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
219 mapping = jinode->i_vfs_inode->i_mapping;
39e3ac25 220 set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
c851ed54 221 spin_unlock(&journal->j_list_lock);
cd1aac32
AK
222 /*
223 * submit the inode data buffers. We use writepage
224 * instead of writepages. Because writepages can do
225 * block allocation with delalloc. We need to write
226 * only allocated blocks here.
227 */
879c5e6b 228 trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
cd1aac32 229 err = journal_submit_inode_data_buffers(mapping);
c851ed54
JK
230 if (!ret)
231 ret = err;
232 spin_lock(&journal->j_list_lock);
233 J_ASSERT(jinode->i_transaction == commit_transaction);
39e3ac25
BK
234 clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
235 smp_mb__after_clear_bit();
c851ed54
JK
236 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
237 }
238 spin_unlock(&journal->j_list_lock);
239 return ret;
240}
241
242/*
243 * Wait for data submitted for writeout, refile inodes to proper
244 * transaction if needed.
245 *
246 */
247static int journal_finish_inode_data_buffers(journal_t *journal,
248 transaction_t *commit_transaction)
249{
250 struct jbd2_inode *jinode, *next_i;
251 int err, ret = 0;
252
cd1aac32 253 /* For locking, see the comment in journal_submit_data_buffers() */
c851ed54
JK
254 spin_lock(&journal->j_list_lock);
255 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
39e3ac25 256 set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
c851ed54
JK
257 spin_unlock(&journal->j_list_lock);
258 err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
e9e34f4e
HK
259 if (err) {
260 /*
261 * Because AS_EIO is cleared by
94004ed7 262 * filemap_fdatawait_range(), set it again so
e9e34f4e
HK
263 * that user process can get -EIO from fsync().
264 */
265 set_bit(AS_EIO,
266 &jinode->i_vfs_inode->i_mapping->flags);
267
268 if (!ret)
269 ret = err;
270 }
c851ed54 271 spin_lock(&journal->j_list_lock);
39e3ac25
BK
272 clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
273 smp_mb__after_clear_bit();
c851ed54
JK
274 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
275 }
276
277 /* Now refile inode to proper lists */
278 list_for_each_entry_safe(jinode, next_i,
279 &commit_transaction->t_inode_list, i_list) {
280 list_del(&jinode->i_list);
281 if (jinode->i_next_transaction) {
282 jinode->i_transaction = jinode->i_next_transaction;
283 jinode->i_next_transaction = NULL;
284 list_add(&jinode->i_list,
285 &jinode->i_transaction->t_inode_list);
286 } else {
287 jinode->i_transaction = NULL;
288 }
289 }
290 spin_unlock(&journal->j_list_lock);
291
292 return ret;
293}
294
818d276c
GS
295static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
296{
297 struct page *page = bh->b_page;
298 char *addr;
299 __u32 checksum;
300
303a8f2a 301 addr = kmap_atomic(page);
818d276c
GS
302 checksum = crc32_be(crc32_sum,
303 (void *)(addr + offset_in_page(bh->b_data)), bh->b_size);
303a8f2a 304 kunmap_atomic(addr);
818d276c
GS
305
306 return checksum;
307}
308
309static void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
18eba7aa 310 unsigned long long block)
b517bea1
ZB
311{
312 tag->t_blocknr = cpu_to_be32(block & (u32)~0);
cd02ff0b 313 if (tag_bytes > JBD2_TAG_SIZE32)
b517bea1
ZB
314 tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
315}
316
3caa487f 317static void jbd2_descr_block_csum_set(journal_t *j,
e5a120ae 318 struct buffer_head *bh)
3caa487f
DW
319{
320 struct jbd2_journal_block_tail *tail;
321 __u32 csum;
322
323 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
324 return;
325
e5a120ae 326 tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize -
3caa487f
DW
327 sizeof(struct jbd2_journal_block_tail));
328 tail->t_checksum = 0;
e5a120ae 329 csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
3caa487f
DW
330 tail->t_checksum = cpu_to_be32(csum);
331}
332
c3900875
DW
333static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
334 struct buffer_head *bh, __u32 sequence)
335{
336 struct page *page = bh->b_page;
337 __u8 *addr;
eee06c56 338 __u32 csum32;
c3900875
DW
339
340 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
341 return;
342
343 sequence = cpu_to_be32(sequence);
906adea1 344 addr = kmap_atomic(page);
eee06c56
DW
345 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence,
346 sizeof(sequence));
347 csum32 = jbd2_chksum(j, csum32, addr + offset_in_page(bh->b_data),
348 bh->b_size);
906adea1 349 kunmap_atomic(addr);
c3900875 350
eee06c56
DW
351 /* We only have space to store the lower 16 bits of the crc32c. */
352 tag->t_checksum = cpu_to_be16(csum32);
c3900875 353}
470decc6 354/*
f7f4bccb 355 * jbd2_journal_commit_transaction
470decc6
DK
356 *
357 * The primary function for committing a transaction to the log. This
358 * function is called by the journal thread to begin a complete commit.
359 */
f7f4bccb 360void jbd2_journal_commit_transaction(journal_t *journal)
470decc6 361{
8e85fb3f 362 struct transaction_stats_s stats;
470decc6 363 transaction_t *commit_transaction;
e5a120ae
JK
364 struct journal_head *jh;
365 struct buffer_head *descriptor;
470decc6
DK
366 struct buffer_head **wbuf = journal->j_wbuf;
367 int bufs;
368 int flags;
369 int err;
18eba7aa 370 unsigned long long blocknr;
e07f7183
JB
371 ktime_t start_time;
372 u64 commit_time;
470decc6
DK
373 char *tagp = NULL;
374 journal_header_t *header;
375 journal_block_tag_t *tag = NULL;
376 int space_left = 0;
377 int first_tag = 0;
378 int tag_flag;
794446c6 379 int i;
b517bea1 380 int tag_bytes = journal_tag_bytes(journal);
818d276c
GS
381 struct buffer_head *cbh = NULL; /* For transactional checksums */
382 __u32 crc32_sum = ~0;
82f04ab4 383 struct blk_plug plug;
3339578f
JK
384 /* Tail of the journal */
385 unsigned long first_block;
386 tid_t first_tid;
387 int update_tail;
3caa487f 388 int csum_size = 0;
f5113eff 389 LIST_HEAD(io_bufs);
e5a120ae 390 LIST_HEAD(log_bufs);
3caa487f
DW
391
392 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
393 csum_size = sizeof(struct jbd2_journal_block_tail);
470decc6
DK
394
395 /*
396 * First job: lock down the current transaction and wait for
397 * all outstanding updates to complete.
398 */
399
f7f4bccb
MC
400 /* Do we need to erase the effects of a prior jbd2_journal_flush? */
401 if (journal->j_flags & JBD2_FLUSHED) {
470decc6 402 jbd_debug(3, "super block updated\n");
a78bb11d 403 mutex_lock(&journal->j_checkpoint_mutex);
79feb521
JK
404 /*
405 * We hold j_checkpoint_mutex so tail cannot change under us.
406 * We don't need any special data guarantees for writing sb
407 * since journal is empty and it is ok for write to be
408 * flushed only with transaction commit.
409 */
410 jbd2_journal_update_sb_log_tail(journal,
411 journal->j_tail_sequence,
412 journal->j_tail,
413 WRITE_SYNC);
a78bb11d 414 mutex_unlock(&journal->j_checkpoint_mutex);
470decc6
DK
415 } else {
416 jbd_debug(3, "superblock not updated\n");
417 }
418
419 J_ASSERT(journal->j_running_transaction != NULL);
420 J_ASSERT(journal->j_committing_transaction == NULL);
421
422 commit_transaction = journal->j_running_transaction;
423 J_ASSERT(commit_transaction->t_state == T_RUNNING);
424
879c5e6b 425 trace_jbd2_start_commit(journal, commit_transaction);
f2a44523 426 jbd_debug(1, "JBD2: starting commit of transaction %d\n",
470decc6
DK
427 commit_transaction->t_tid);
428
a931da6a 429 write_lock(&journal->j_state_lock);
470decc6
DK
430 commit_transaction->t_state = T_LOCKED;
431
879c5e6b 432 trace_jbd2_commit_locking(journal, commit_transaction);
bf699327 433 stats.run.rs_wait = commit_transaction->t_max_wait;
9fff24aa 434 stats.run.rs_request_delay = 0;
bf699327 435 stats.run.rs_locked = jiffies;
9fff24aa
TT
436 if (commit_transaction->t_requested)
437 stats.run.rs_request_delay =
438 jbd2_time_diff(commit_transaction->t_requested,
439 stats.run.rs_locked);
bf699327
TT
440 stats.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
441 stats.run.rs_locked);
8e85fb3f 442
470decc6 443 spin_lock(&commit_transaction->t_handle_lock);
a51dca9c 444 while (atomic_read(&commit_transaction->t_updates)) {
470decc6
DK
445 DEFINE_WAIT(wait);
446
447 prepare_to_wait(&journal->j_wait_updates, &wait,
448 TASK_UNINTERRUPTIBLE);
a51dca9c 449 if (atomic_read(&commit_transaction->t_updates)) {
470decc6 450 spin_unlock(&commit_transaction->t_handle_lock);
a931da6a 451 write_unlock(&journal->j_state_lock);
470decc6 452 schedule();
a931da6a 453 write_lock(&journal->j_state_lock);
470decc6
DK
454 spin_lock(&commit_transaction->t_handle_lock);
455 }
456 finish_wait(&journal->j_wait_updates, &wait);
457 }
458 spin_unlock(&commit_transaction->t_handle_lock);
459
a51dca9c 460 J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <=
470decc6
DK
461 journal->j_max_transaction_buffers);
462
463 /*
464 * First thing we are allowed to do is to discard any remaining
465 * BJ_Reserved buffers. Note, it is _not_ permissible to assume
466 * that there are no such buffers: if a large filesystem
467 * operation like a truncate needs to split itself over multiple
f7f4bccb 468 * transactions, then it may try to do a jbd2_journal_restart() while
470decc6
DK
469 * there are still BJ_Reserved buffers outstanding. These must
470 * be released cleanly from the current transaction.
471 *
472 * In this case, the filesystem must still reserve write access
473 * again before modifying the buffer in the new transaction, but
474 * we do not require it to remember exactly which old buffers it
475 * has reserved. This is consistent with the existing behaviour
f7f4bccb 476 * that multiple jbd2_journal_get_write_access() calls to the same
25985edc 477 * buffer are perfectly permissible.
470decc6
DK
478 */
479 while (commit_transaction->t_reserved_list) {
480 jh = commit_transaction->t_reserved_list;
481 JBUFFER_TRACE(jh, "reserved, unused: refile");
482 /*
f7f4bccb 483 * A jbd2_journal_get_undo_access()+jbd2_journal_release_buffer() may
470decc6
DK
484 * leave undo-committed data.
485 */
486 if (jh->b_committed_data) {
487 struct buffer_head *bh = jh2bh(jh);
488
489 jbd_lock_bh_state(bh);
af1e76d6 490 jbd2_free(jh->b_committed_data, bh->b_size);
470decc6
DK
491 jh->b_committed_data = NULL;
492 jbd_unlock_bh_state(bh);
493 }
f7f4bccb 494 jbd2_journal_refile_buffer(journal, jh);
470decc6
DK
495 }
496
497 /*
498 * Now try to drop any written-back buffers from the journal's
499 * checkpoint lists. We do this *before* commit because it potentially
500 * frees some memory
501 */
502 spin_lock(&journal->j_list_lock);
f7f4bccb 503 __jbd2_journal_clean_checkpoint_list(journal);
470decc6
DK
504 spin_unlock(&journal->j_list_lock);
505
f2a44523 506 jbd_debug(3, "JBD2: commit phase 1\n");
470decc6 507
1ba37268
YY
508 /*
509 * Clear revoked flag to reflect there is no revoked buffers
510 * in the next transaction which is going to be started.
511 */
512 jbd2_clear_buffer_revoked_flags(journal);
513
470decc6
DK
514 /*
515 * Switch to a new revoke table.
516 */
f7f4bccb 517 jbd2_journal_switch_revoke_table(journal);
470decc6 518
879c5e6b 519 trace_jbd2_commit_flushing(journal, commit_transaction);
bf699327
TT
520 stats.run.rs_flushing = jiffies;
521 stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked,
522 stats.run.rs_flushing);
8e85fb3f 523
470decc6
DK
524 commit_transaction->t_state = T_FLUSH;
525 journal->j_committing_transaction = commit_transaction;
526 journal->j_running_transaction = NULL;
e07f7183 527 start_time = ktime_get();
470decc6
DK
528 commit_transaction->t_log_start = journal->j_head;
529 wake_up(&journal->j_wait_transaction_locked);
a931da6a 530 write_unlock(&journal->j_state_lock);
470decc6 531
f2a44523 532 jbd_debug(3, "JBD2: commit phase 2\n");
470decc6 533
470decc6
DK
534 /*
535 * Now start flushing things to disk, in the order they appear
536 * on the transaction lists. Data blocks go first.
537 */
cd1aac32 538 err = journal_submit_data_buffers(journal, commit_transaction);
470decc6 539 if (err)
a7fa2baf 540 jbd2_journal_abort(journal, err);
470decc6 541
82f04ab4 542 blk_start_plug(&plug);
67c457a8 543 jbd2_journal_write_revoke_records(journal, commit_transaction,
e5a120ae 544 &log_bufs, WRITE_SYNC);
82f04ab4 545 blk_finish_plug(&plug);
470decc6 546
f2a44523 547 jbd_debug(3, "JBD2: commit phase 2\n");
470decc6 548
470decc6
DK
549 /*
550 * Way to go: we have now written out all of the data for a
551 * transaction! Now comes the tricky part: we need to write out
552 * metadata. Loop over the transaction's entire buffer list:
553 */
a931da6a 554 write_lock(&journal->j_state_lock);
470decc6 555 commit_transaction->t_state = T_COMMIT;
a931da6a 556 write_unlock(&journal->j_state_lock);
470decc6 557
879c5e6b 558 trace_jbd2_commit_logging(journal, commit_transaction);
bf699327
TT
559 stats.run.rs_logging = jiffies;
560 stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing,
561 stats.run.rs_logging);
a51dca9c
TT
562 stats.run.rs_blocks =
563 atomic_read(&commit_transaction->t_outstanding_credits);
bf699327 564 stats.run.rs_blocks_logged = 0;
8e85fb3f 565
1dfc3220 566 J_ASSERT(commit_transaction->t_nr_buffers <=
a51dca9c 567 atomic_read(&commit_transaction->t_outstanding_credits));
1dfc3220 568
87c89c23 569 err = 0;
470decc6 570 bufs = 0;
e5a120ae 571 descriptor = NULL;
82f04ab4 572 blk_start_plug(&plug);
470decc6
DK
573 while (commit_transaction->t_buffers) {
574
575 /* Find the next buffer to be journaled... */
576
577 jh = commit_transaction->t_buffers;
578
579 /* If we're in abort mode, we just un-journal the buffer and
7ad7445f 580 release it. */
470decc6
DK
581
582 if (is_journal_aborted(journal)) {
7ad7445f 583 clear_buffer_jbddirty(jh2bh(jh));
470decc6 584 JBUFFER_TRACE(jh, "journal is aborting: refile");
e06c8227
JB
585 jbd2_buffer_abort_trigger(jh,
586 jh->b_frozen_data ?
587 jh->b_frozen_triggers :
588 jh->b_triggers);
f7f4bccb 589 jbd2_journal_refile_buffer(journal, jh);
470decc6
DK
590 /* If that was the last one, we need to clean up
591 * any descriptor buffers which may have been
592 * already allocated, even if we are now
593 * aborting. */
594 if (!commit_transaction->t_buffers)
595 goto start_journal_io;
596 continue;
597 }
598
599 /* Make sure we have a descriptor block in which to
600 record the metadata buffer. */
601
602 if (!descriptor) {
470decc6
DK
603 J_ASSERT (bufs == 0);
604
f2a44523 605 jbd_debug(4, "JBD2: get descriptor\n");
470decc6 606
f7f4bccb 607 descriptor = jbd2_journal_get_descriptor_buffer(journal);
470decc6 608 if (!descriptor) {
a7fa2baf 609 jbd2_journal_abort(journal, -EIO);
470decc6
DK
610 continue;
611 }
612
f2a44523 613 jbd_debug(4, "JBD2: got buffer %llu (%p)\n",
e5a120ae
JK
614 (unsigned long long)descriptor->b_blocknr,
615 descriptor->b_data);
616 header = (journal_header_t *)descriptor->b_data;
f7f4bccb
MC
617 header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
618 header->h_blocktype = cpu_to_be32(JBD2_DESCRIPTOR_BLOCK);
470decc6
DK
619 header->h_sequence = cpu_to_be32(commit_transaction->t_tid);
620
e5a120ae
JK
621 tagp = &descriptor->b_data[sizeof(journal_header_t)];
622 space_left = descriptor->b_size -
623 sizeof(journal_header_t);
470decc6 624 first_tag = 1;
e5a120ae
JK
625 set_buffer_jwrite(descriptor);
626 set_buffer_dirty(descriptor);
627 wbuf[bufs++] = descriptor;
470decc6
DK
628
629 /* Record it so that we can wait for IO
630 completion later */
e5a120ae
JK
631 BUFFER_TRACE(descriptor, "ph3: file as descriptor");
632 jbd2_file_log_bh(&log_bufs, descriptor);
470decc6
DK
633 }
634
635 /* Where is the buffer to be written? */
636
f7f4bccb 637 err = jbd2_journal_next_log_block(journal, &blocknr);
470decc6
DK
638 /* If the block mapping failed, just abandon the buffer
639 and repeat this loop: we'll fall into the
640 refile-on-abort condition above. */
641 if (err) {
a7fa2baf 642 jbd2_journal_abort(journal, err);
470decc6
DK
643 continue;
644 }
645
646 /*
647 * start_this_handle() uses t_outstanding_credits to determine
648 * the free space in the log, but this counter is changed
f7f4bccb 649 * by jbd2_journal_next_log_block() also.
470decc6 650 */
a51dca9c 651 atomic_dec(&commit_transaction->t_outstanding_credits);
470decc6
DK
652
653 /* Bump b_count to prevent truncate from stumbling over
654 the shadowed buffer! @@@ This can go if we ever get
f5113eff 655 rid of the shadow pairing of buffers. */
470decc6
DK
656 atomic_inc(&jh2bh(jh)->b_count);
657
470decc6 658 /*
f5113eff
JK
659 * Make a temporary IO buffer with which to write it out
660 * (this will requeue the metadata buffer to BJ_Shadow).
470decc6 661 */
f5113eff 662 set_bit(BH_JWrite, &jh2bh(jh)->b_state);
470decc6 663 JBUFFER_TRACE(jh, "ph3: write metadata");
f7f4bccb 664 flags = jbd2_journal_write_metadata_buffer(commit_transaction,
f5113eff 665 jh, &wbuf[bufs], blocknr);
e6ec116b
TT
666 if (flags < 0) {
667 jbd2_journal_abort(journal, flags);
668 continue;
669 }
f5113eff 670 jbd2_file_log_bh(&io_bufs, wbuf[bufs]);
470decc6
DK
671
672 /* Record the new block's tag in the current descriptor
673 buffer */
674
675 tag_flag = 0;
676 if (flags & 1)
f7f4bccb 677 tag_flag |= JBD2_FLAG_ESCAPE;
470decc6 678 if (!first_tag)
f7f4bccb 679 tag_flag |= JBD2_FLAG_SAME_UUID;
470decc6
DK
680
681 tag = (journal_block_tag_t *) tagp;
b517bea1 682 write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr);
8f888ef8 683 tag->t_flags = cpu_to_be16(tag_flag);
f5113eff 684 jbd2_block_tag_csum_set(journal, tag, wbuf[bufs],
c3900875 685 commit_transaction->t_tid);
b517bea1
ZB
686 tagp += tag_bytes;
687 space_left -= tag_bytes;
f5113eff 688 bufs++;
470decc6
DK
689
690 if (first_tag) {
691 memcpy (tagp, journal->j_uuid, 16);
692 tagp += 16;
693 space_left -= 16;
694 first_tag = 0;
695 }
696
697 /* If there's no more to do, or if the descriptor is full,
698 let the IO rip! */
699
700 if (bufs == journal->j_wbufsize ||
701 commit_transaction->t_buffers == NULL ||
3caa487f 702 space_left < tag_bytes + 16 + csum_size) {
470decc6 703
f2a44523 704 jbd_debug(4, "JBD2: Submit %d IOs\n", bufs);
470decc6
DK
705
706 /* Write an end-of-descriptor marker before
707 submitting the IOs. "tag" still points to
708 the last tag we set up. */
709
8f888ef8 710 tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG);
470decc6 711
3caa487f 712 jbd2_descr_block_csum_set(journal, descriptor);
470decc6
DK
713start_journal_io:
714 for (i = 0; i < bufs; i++) {
715 struct buffer_head *bh = wbuf[i];
818d276c
GS
716 /*
717 * Compute checksum.
718 */
719 if (JBD2_HAS_COMPAT_FEATURE(journal,
720 JBD2_FEATURE_COMPAT_CHECKSUM)) {
721 crc32_sum =
722 jbd2_checksum_data(crc32_sum, bh);
723 }
724
470decc6
DK
725 lock_buffer(bh);
726 clear_buffer_dirty(bh);
727 set_buffer_uptodate(bh);
728 bh->b_end_io = journal_end_buffer_io_sync;
82f04ab4 729 submit_bh(WRITE_SYNC, bh);
470decc6
DK
730 }
731 cond_resched();
bf699327 732 stats.run.rs_blocks_logged += bufs;
470decc6
DK
733
734 /* Force a new descriptor to be generated next
735 time round the loop. */
736 descriptor = NULL;
737 bufs = 0;
738 }
739 }
740
f73bee49
JK
741 err = journal_finish_inode_data_buffers(journal, commit_transaction);
742 if (err) {
743 printk(KERN_WARNING
744 "JBD2: Detected IO errors while flushing file data "
745 "on %s\n", journal->j_devname);
746 if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR)
747 jbd2_journal_abort(journal, err);
748 err = 0;
749 }
750
3339578f
JK
751 /*
752 * Get current oldest transaction in the log before we issue flush
753 * to the filesystem device. After the flush we can be sure that
754 * blocks of all older transactions are checkpointed to persistent
755 * storage and we will be safe to update journal start in the
756 * superblock with the numbers we get here.
757 */
758 update_tail =
759 jbd2_journal_get_log_tail(journal, &first_tid, &first_block);
760
bbd2be36 761 write_lock(&journal->j_state_lock);
3339578f
JK
762 if (update_tail) {
763 long freed = first_block - journal->j_tail;
764
765 if (first_block < journal->j_tail)
766 freed += journal->j_last - journal->j_first;
767 /* Update tail only if we free significant amount of space */
768 if (freed < journal->j_maxlen / 4)
769 update_tail = 0;
770 }
bbd2be36
JK
771 J_ASSERT(commit_transaction->t_state == T_COMMIT);
772 commit_transaction->t_state = T_COMMIT_DFLUSH;
773 write_unlock(&journal->j_state_lock);
3339578f 774
cc3e1bea
TT
775 /*
776 * If the journal is not located on the file system device,
777 * then we must flush the file system device before we issue
778 * the commit record
779 */
81be12c8 780 if (commit_transaction->t_need_data_flush &&
cc3e1bea
TT
781 (journal->j_fs_dev != journal->j_dev) &&
782 (journal->j_flags & JBD2_BARRIER))
99aa7846 783 blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL);
818d276c 784
cc3e1bea 785 /* Done it all: now write the commit record asynchronously. */
818d276c 786 if (JBD2_HAS_INCOMPAT_FEATURE(journal,
0e3d2a63 787 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
818d276c
GS
788 err = journal_submit_commit_record(journal, commit_transaction,
789 &cbh, crc32_sum);
790 if (err)
791 __jbd2_journal_abort_hard(journal);
e9e34f4e 792 }
c851ed54 793
82f04ab4
JA
794 blk_finish_plug(&plug);
795
470decc6
DK
796 /* Lo and behold: we have just managed to send a transaction to
797 the log. Before we can commit it, wait for the IO so far to
798 complete. Control buffers being written are on the
799 transaction's t_log_list queue, and metadata buffers are on
f5113eff 800 the io_bufs list.
470decc6
DK
801
802 Wait for the buffers in reverse order. That way we are
803 less likely to be woken up until all IOs have completed, and
804 so we incur less scheduling load.
805 */
806
f2a44523 807 jbd_debug(3, "JBD2: commit phase 3\n");
470decc6 808
f5113eff
JK
809 while (!list_empty(&io_bufs)) {
810 struct buffer_head *bh = list_entry(io_bufs.prev,
811 struct buffer_head,
812 b_assoc_buffers);
470decc6 813
f5113eff
JK
814 wait_on_buffer(bh);
815 cond_resched();
470decc6
DK
816
817 if (unlikely(!buffer_uptodate(bh)))
818 err = -EIO;
f5113eff 819 jbd2_unfile_log_bh(bh);
470decc6
DK
820
821 /*
f5113eff
JK
822 * The list contains temporary buffer heads created by
823 * jbd2_journal_write_metadata_buffer().
470decc6
DK
824 */
825 BUFFER_TRACE(bh, "dumping temporary bh");
470decc6
DK
826 __brelse(bh);
827 J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0);
828 free_buffer_head(bh);
829
f5113eff 830 /* We also have to refile the corresponding shadowed buffer */
470decc6
DK
831 jh = commit_transaction->t_shadow_list->b_tprev;
832 bh = jh2bh(jh);
f5113eff 833 clear_buffer_jwrite(bh);
470decc6
DK
834 J_ASSERT_BH(bh, buffer_jbddirty(bh));
835
836 /* The metadata is now released for reuse, but we need
837 to remember it against this transaction so that when
838 we finally commit, we can do any checkpointing
839 required. */
840 JBUFFER_TRACE(jh, "file as BJ_Forget");
f7f4bccb 841 jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget);
229309ca
JK
842 /*
843 * Wake up any transactions which were waiting for this IO to
844 * complete. The barrier must be here so that changes by
845 * jbd2_journal_file_buffer() take effect before wake_up_bit()
846 * does the waitqueue check.
847 */
848 smp_mb();
470decc6
DK
849 wake_up_bit(&bh->b_state, BH_Unshadow);
850 JBUFFER_TRACE(jh, "brelse shadowed buffer");
851 __brelse(bh);
852 }
853
854 J_ASSERT (commit_transaction->t_shadow_list == NULL);
855
f2a44523 856 jbd_debug(3, "JBD2: commit phase 4\n");
470decc6
DK
857
858 /* Here we wait for the revoke record and descriptor record buffers */
e5a120ae 859 while (!list_empty(&log_bufs)) {
470decc6
DK
860 struct buffer_head *bh;
861
e5a120ae
JK
862 bh = list_entry(log_bufs.prev, struct buffer_head, b_assoc_buffers);
863 wait_on_buffer(bh);
864 cond_resched();
470decc6
DK
865
866 if (unlikely(!buffer_uptodate(bh)))
867 err = -EIO;
868
869 BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
870 clear_buffer_jwrite(bh);
e5a120ae 871 jbd2_unfile_log_bh(bh);
470decc6
DK
872 __brelse(bh); /* One for getblk */
873 /* AKPM: bforget here */
874 }
875
77e841de
HK
876 if (err)
877 jbd2_journal_abort(journal, err);
878
f2a44523 879 jbd_debug(3, "JBD2: commit phase 5\n");
bbd2be36
JK
880 write_lock(&journal->j_state_lock);
881 J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH);
882 commit_transaction->t_state = T_COMMIT_JFLUSH;
883 write_unlock(&journal->j_state_lock);
470decc6 884
818d276c 885 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
0e3d2a63 886 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
818d276c
GS
887 err = journal_submit_commit_record(journal, commit_transaction,
888 &cbh, crc32_sum);
889 if (err)
890 __jbd2_journal_abort_hard(journal);
891 }
6cba611e 892 if (cbh)
fd98496f 893 err = journal_wait_on_commit_record(journal, cbh);
f73bee49
JK
894 if (JBD2_HAS_INCOMPAT_FEATURE(journal,
895 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) &&
896 journal->j_flags & JBD2_BARRIER) {
99aa7846 897 blkdev_issue_flush(journal->j_dev, GFP_NOFS, NULL);
f73bee49 898 }
470decc6
DK
899
900 if (err)
a7fa2baf 901 jbd2_journal_abort(journal, err);
470decc6 902
3339578f
JK
903 /*
904 * Now disk caches for filesystem device are flushed so we are safe to
905 * erase checkpointed transactions from the log by updating journal
906 * superblock.
907 */
908 if (update_tail)
909 jbd2_update_log_tail(journal, first_tid, first_block);
910
470decc6
DK
911 /* End of a transaction! Finally, we can do checkpoint
912 processing: any buffers committed as a result of this
913 transaction can be removed from any checkpoint list it was on
914 before. */
915
f2a44523 916 jbd_debug(3, "JBD2: commit phase 6\n");
470decc6 917
c851ed54 918 J_ASSERT(list_empty(&commit_transaction->t_inode_list));
470decc6
DK
919 J_ASSERT(commit_transaction->t_buffers == NULL);
920 J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
470decc6 921 J_ASSERT(commit_transaction->t_shadow_list == NULL);
470decc6
DK
922
923restart_loop:
924 /*
925 * As there are other places (journal_unmap_buffer()) adding buffers
926 * to this list we have to be careful and hold the j_list_lock.
927 */
928 spin_lock(&journal->j_list_lock);
929 while (commit_transaction->t_forget) {
930 transaction_t *cp_transaction;
931 struct buffer_head *bh;
de1b7941 932 int try_to_free = 0;
470decc6
DK
933
934 jh = commit_transaction->t_forget;
935 spin_unlock(&journal->j_list_lock);
936 bh = jh2bh(jh);
de1b7941
JK
937 /*
938 * Get a reference so that bh cannot be freed before we are
939 * done with it.
940 */
941 get_bh(bh);
470decc6 942 jbd_lock_bh_state(bh);
23e2af35 943 J_ASSERT_JH(jh, jh->b_transaction == commit_transaction);
470decc6
DK
944
945 /*
946 * If there is undo-protected committed data against
947 * this buffer, then we can remove it now. If it is a
948 * buffer needing such protection, the old frozen_data
949 * field now points to a committed version of the
950 * buffer, so rotate that field to the new committed
951 * data.
952 *
953 * Otherwise, we can just throw away the frozen data now.
e06c8227
JB
954 *
955 * We also know that the frozen data has already fired
956 * its triggers if they exist, so we can clear that too.
470decc6
DK
957 */
958 if (jh->b_committed_data) {
af1e76d6 959 jbd2_free(jh->b_committed_data, bh->b_size);
470decc6
DK
960 jh->b_committed_data = NULL;
961 if (jh->b_frozen_data) {
962 jh->b_committed_data = jh->b_frozen_data;
963 jh->b_frozen_data = NULL;
e06c8227 964 jh->b_frozen_triggers = NULL;
470decc6
DK
965 }
966 } else if (jh->b_frozen_data) {
af1e76d6 967 jbd2_free(jh->b_frozen_data, bh->b_size);
470decc6 968 jh->b_frozen_data = NULL;
e06c8227 969 jh->b_frozen_triggers = NULL;
470decc6
DK
970 }
971
972 spin_lock(&journal->j_list_lock);
973 cp_transaction = jh->b_cp_transaction;
974 if (cp_transaction) {
975 JBUFFER_TRACE(jh, "remove from old cp transaction");
8e85fb3f 976 cp_transaction->t_chp_stats.cs_dropped++;
f7f4bccb 977 __jbd2_journal_remove_checkpoint(jh);
470decc6
DK
978 }
979
980 /* Only re-checkpoint the buffer_head if it is marked
981 * dirty. If the buffer was added to the BJ_Forget list
f7f4bccb 982 * by jbd2_journal_forget, it may no longer be dirty and
470decc6
DK
983 * there's no point in keeping a checkpoint record for
984 * it. */
985
b794e7a6
JK
986 /*
987 * A buffer which has been freed while still being journaled by
988 * a previous transaction.
989 */
990 if (buffer_freed(bh)) {
991 /*
992 * If the running transaction is the one containing
993 * "add to orphan" operation (b_next_transaction !=
994 * NULL), we have to wait for that transaction to
995 * commit before we can really get rid of the buffer.
996 * So just clear b_modified to not confuse transaction
997 * credit accounting and refile the buffer to
998 * BJ_Forget of the running transaction. If the just
999 * committed transaction contains "add to orphan"
1000 * operation, we can completely invalidate the buffer
1001 * now. We are rather through in that since the
1002 * buffer may be still accessible when blocksize <
1003 * pagesize and it is attached to the last partial
1004 * page.
1005 */
1006 jh->b_modified = 0;
1007 if (!jh->b_next_transaction) {
1008 clear_buffer_freed(bh);
1009 clear_buffer_jbddirty(bh);
1010 clear_buffer_mapped(bh);
1011 clear_buffer_new(bh);
1012 clear_buffer_req(bh);
1013 bh->b_bdev = NULL;
1014 }
470decc6
DK
1015 }
1016
1017 if (buffer_jbddirty(bh)) {
1018 JBUFFER_TRACE(jh, "add to new checkpointing trans");
f7f4bccb 1019 __jbd2_journal_insert_checkpoint(jh, commit_transaction);
7ad7445f
HK
1020 if (is_journal_aborted(journal))
1021 clear_buffer_jbddirty(bh);
470decc6
DK
1022 } else {
1023 J_ASSERT_BH(bh, !buffer_dirty(bh));
de1b7941
JK
1024 /*
1025 * The buffer on BJ_Forget list and not jbddirty means
470decc6
DK
1026 * it has been freed by this transaction and hence it
1027 * could not have been reallocated until this
1028 * transaction has committed. *BUT* it could be
1029 * reallocated once we have written all the data to
1030 * disk and before we process the buffer on BJ_Forget
de1b7941
JK
1031 * list.
1032 */
1033 if (!jh->b_next_transaction)
1034 try_to_free = 1;
470decc6 1035 }
de1b7941
JK
1036 JBUFFER_TRACE(jh, "refile or unfile buffer");
1037 __jbd2_journal_refile_buffer(jh);
1038 jbd_unlock_bh_state(bh);
1039 if (try_to_free)
1040 release_buffer_page(bh); /* Drops bh reference */
1041 else
1042 __brelse(bh);
470decc6
DK
1043 cond_resched_lock(&journal->j_list_lock);
1044 }
1045 spin_unlock(&journal->j_list_lock);
1046 /*
f5a7a6b0
JK
1047 * This is a bit sleazy. We use j_list_lock to protect transition
1048 * of a transaction into T_FINISHED state and calling
1049 * __jbd2_journal_drop_transaction(). Otherwise we could race with
1050 * other checkpointing code processing the transaction...
470decc6 1051 */
a931da6a 1052 write_lock(&journal->j_state_lock);
470decc6
DK
1053 spin_lock(&journal->j_list_lock);
1054 /*
1055 * Now recheck if some buffers did not get attached to the transaction
1056 * while the lock was dropped...
1057 */
1058 if (commit_transaction->t_forget) {
1059 spin_unlock(&journal->j_list_lock);
a931da6a 1060 write_unlock(&journal->j_state_lock);
470decc6
DK
1061 goto restart_loop;
1062 }
1063
1064 /* Done with this transaction! */
1065
f2a44523 1066 jbd_debug(3, "JBD2: commit phase 7\n");
470decc6 1067
bbd2be36 1068 J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH);
470decc6 1069
8e85fb3f 1070 commit_transaction->t_start = jiffies;
bf699327
TT
1071 stats.run.rs_logging = jbd2_time_diff(stats.run.rs_logging,
1072 commit_transaction->t_start);
8e85fb3f
JL
1073
1074 /*
bf699327 1075 * File the transaction statistics
8e85fb3f 1076 */
8e85fb3f 1077 stats.ts_tid = commit_transaction->t_tid;
8dd42046
TT
1078 stats.run.rs_handle_count =
1079 atomic_read(&commit_transaction->t_handle_count);
bf699327
TT
1080 trace_jbd2_run_stats(journal->j_fs_dev->bd_dev,
1081 commit_transaction->t_tid, &stats.run);
8e85fb3f
JL
1082
1083 /*
1084 * Calculate overall stats
1085 */
bf699327 1086 spin_lock(&journal->j_history_lock);
8e85fb3f 1087 journal->j_stats.ts_tid++;
9fff24aa
TT
1088 if (commit_transaction->t_requested)
1089 journal->j_stats.ts_requested++;
bf699327 1090 journal->j_stats.run.rs_wait += stats.run.rs_wait;
9fff24aa 1091 journal->j_stats.run.rs_request_delay += stats.run.rs_request_delay;
bf699327
TT
1092 journal->j_stats.run.rs_running += stats.run.rs_running;
1093 journal->j_stats.run.rs_locked += stats.run.rs_locked;
1094 journal->j_stats.run.rs_flushing += stats.run.rs_flushing;
1095 journal->j_stats.run.rs_logging += stats.run.rs_logging;
1096 journal->j_stats.run.rs_handle_count += stats.run.rs_handle_count;
1097 journal->j_stats.run.rs_blocks += stats.run.rs_blocks;
1098 journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged;
8e85fb3f
JL
1099 spin_unlock(&journal->j_history_lock);
1100
794446c6 1101 commit_transaction->t_state = T_COMMIT_CALLBACK;
470decc6
DK
1102 J_ASSERT(commit_transaction == journal->j_committing_transaction);
1103 journal->j_commit_sequence = commit_transaction->t_tid;
1104 journal->j_committing_transaction = NULL;
e07f7183 1105 commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
470decc6 1106
e07f7183
JB
1107 /*
1108 * weight the commit time higher than the average time so we don't
1109 * react too strongly to vast changes in the commit time
1110 */
1111 if (likely(journal->j_average_commit_time))
1112 journal->j_average_commit_time = (commit_time +
1113 journal->j_average_commit_time*3) / 4;
1114 else
1115 journal->j_average_commit_time = commit_time;
794446c6 1116
a931da6a 1117 write_unlock(&journal->j_state_lock);
6c20ec85 1118
794446c6
DM
1119 if (journal->j_checkpoint_transactions == NULL) {
1120 journal->j_checkpoint_transactions = commit_transaction;
1121 commit_transaction->t_cpnext = commit_transaction;
1122 commit_transaction->t_cpprev = commit_transaction;
470decc6 1123 } else {
794446c6
DM
1124 commit_transaction->t_cpnext =
1125 journal->j_checkpoint_transactions;
1126 commit_transaction->t_cpprev =
1127 commit_transaction->t_cpnext->t_cpprev;
1128 commit_transaction->t_cpnext->t_cpprev =
1129 commit_transaction;
1130 commit_transaction->t_cpprev->t_cpnext =
470decc6 1131 commit_transaction;
470decc6
DK
1132 }
1133 spin_unlock(&journal->j_list_lock);
794446c6
DM
1134 /* Drop all spin_locks because commit_callback may be block.
1135 * __journal_remove_checkpoint() can not destroy transaction
1136 * under us because it is not marked as T_FINISHED yet */
fb68407b
AK
1137 if (journal->j_commit_callback)
1138 journal->j_commit_callback(journal, commit_transaction);
1139
879c5e6b 1140 trace_jbd2_end_commit(journal, commit_transaction);
f2a44523 1141 jbd_debug(1, "JBD2: commit %d complete, head %d\n",
470decc6
DK
1142 journal->j_commit_sequence, journal->j_tail_sequence);
1143
794446c6
DM
1144 write_lock(&journal->j_state_lock);
1145 spin_lock(&journal->j_list_lock);
1146 commit_transaction->t_state = T_FINISHED;
1147 /* Recheck checkpoint lists after j_list_lock was dropped */
1148 if (commit_transaction->t_checkpoint_list == NULL &&
1149 commit_transaction->t_checkpoint_io_list == NULL) {
1150 __jbd2_journal_drop_transaction(journal, commit_transaction);
1151 jbd2_journal_free_transaction(commit_transaction);
1152 }
1153 spin_unlock(&journal->j_list_lock);
1154 write_unlock(&journal->j_state_lock);
470decc6
DK
1155 wake_up(&journal->j_wait_done_commit);
1156}
This page took 0.541375 seconds and 5 git commands to generate.