2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
20 #include <linux/sched.h>
21 #include <linux/writeback.h>
22 #include <linux/pagemap.h>
25 #include "transaction.h"
27 static int total_trans
= 0;
28 extern struct kmem_cache
*btrfs_trans_handle_cachep
;
29 extern struct kmem_cache
*btrfs_transaction_cachep
;
31 static struct workqueue_struct
*trans_wq
;
33 #define BTRFS_ROOT_TRANS_TAG 0
34 #define BTRFS_ROOT_DEFRAG_TAG 1
36 static void put_transaction(struct btrfs_transaction
*transaction
)
38 WARN_ON(transaction
->use_count
== 0);
39 transaction
->use_count
--;
40 if (transaction
->use_count
== 0) {
41 WARN_ON(total_trans
== 0);
43 list_del_init(&transaction
->list
);
44 memset(transaction
, 0, sizeof(*transaction
));
45 kmem_cache_free(btrfs_transaction_cachep
, transaction
);
49 static int join_transaction(struct btrfs_root
*root
)
51 struct btrfs_transaction
*cur_trans
;
52 cur_trans
= root
->fs_info
->running_transaction
;
54 cur_trans
= kmem_cache_alloc(btrfs_transaction_cachep
,
58 root
->fs_info
->generation
++;
59 root
->fs_info
->running_transaction
= cur_trans
;
60 cur_trans
->num_writers
= 1;
61 cur_trans
->num_joined
= 0;
62 cur_trans
->transid
= root
->fs_info
->generation
;
63 init_waitqueue_head(&cur_trans
->writer_wait
);
64 init_waitqueue_head(&cur_trans
->commit_wait
);
65 cur_trans
->in_commit
= 0;
66 cur_trans
->use_count
= 1;
67 cur_trans
->commit_done
= 0;
68 cur_trans
->start_time
= get_seconds();
69 list_add_tail(&cur_trans
->list
, &root
->fs_info
->trans_list
);
70 extent_map_tree_init(&cur_trans
->dirty_pages
,
71 root
->fs_info
->btree_inode
->i_mapping
,
74 cur_trans
->num_writers
++;
75 cur_trans
->num_joined
++;
81 static int record_root_in_trans(struct btrfs_root
*root
)
83 u64 running_trans_id
= root
->fs_info
->running_transaction
->transid
;
84 if (root
->ref_cows
&& root
->last_trans
< running_trans_id
) {
85 WARN_ON(root
== root
->fs_info
->extent_root
);
86 if (root
->root_item
.refs
!= 0) {
87 radix_tree_tag_set(&root
->fs_info
->fs_roots_radix
,
88 (unsigned long)root
->root_key
.objectid
,
89 BTRFS_ROOT_TRANS_TAG
);
90 radix_tree_tag_set(&root
->fs_info
->fs_roots_radix
,
91 (unsigned long)root
->root_key
.objectid
,
92 BTRFS_ROOT_DEFRAG_TAG
);
93 root
->commit_root
= root
->node
;
94 extent_buffer_get(root
->node
);
98 root
->last_trans
= running_trans_id
;
103 struct btrfs_trans_handle
*btrfs_start_transaction(struct btrfs_root
*root
,
106 struct btrfs_trans_handle
*h
=
107 kmem_cache_alloc(btrfs_trans_handle_cachep
, GFP_NOFS
);
110 mutex_lock(&root
->fs_info
->trans_mutex
);
111 ret
= join_transaction(root
);
114 record_root_in_trans(root
);
115 h
->transid
= root
->fs_info
->running_transaction
->transid
;
116 h
->transaction
= root
->fs_info
->running_transaction
;
117 h
->blocks_reserved
= num_blocks
;
119 h
->block_group
= NULL
;
120 h
->alloc_exclude_nr
= 0;
121 h
->alloc_exclude_start
= 0;
122 root
->fs_info
->running_transaction
->use_count
++;
123 mutex_unlock(&root
->fs_info
->trans_mutex
);
127 int btrfs_end_transaction(struct btrfs_trans_handle
*trans
,
128 struct btrfs_root
*root
)
130 struct btrfs_transaction
*cur_trans
;
132 mutex_lock(&root
->fs_info
->trans_mutex
);
133 cur_trans
= root
->fs_info
->running_transaction
;
134 WARN_ON(cur_trans
!= trans
->transaction
);
135 WARN_ON(cur_trans
->num_writers
< 1);
136 cur_trans
->num_writers
--;
137 if (waitqueue_active(&cur_trans
->writer_wait
))
138 wake_up(&cur_trans
->writer_wait
);
139 put_transaction(cur_trans
);
140 mutex_unlock(&root
->fs_info
->trans_mutex
);
141 memset(trans
, 0, sizeof(*trans
));
142 kmem_cache_free(btrfs_trans_handle_cachep
, trans
);
147 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle
*trans
,
148 struct btrfs_root
*root
)
153 struct extent_map_tree
*dirty_pages
;
155 struct inode
*btree_inode
= root
->fs_info
->btree_inode
;
160 if (!trans
|| !trans
->transaction
) {
161 return filemap_write_and_wait(btree_inode
->i_mapping
);
163 dirty_pages
= &trans
->transaction
->dirty_pages
;
165 ret
= find_first_extent_bit(dirty_pages
, 0, &start
, &end
,
169 clear_extent_dirty(dirty_pages
, start
, end
, GFP_NOFS
);
170 while(start
<= end
) {
171 index
= start
>> PAGE_CACHE_SHIFT
;
172 start
= (index
+ 1) << PAGE_CACHE_SHIFT
;
173 page
= find_lock_page(btree_inode
->i_mapping
, index
);
176 if (PageWriteback(page
)) {
178 wait_on_page_writeback(page
);
181 page_cache_release(page
);
185 err
= write_one_page(page
, 0);
188 page_cache_release(page
);
191 err
= filemap_fdatawait(btree_inode
->i_mapping
);
197 int btrfs_commit_tree_roots(struct btrfs_trans_handle
*trans
,
198 struct btrfs_root
*root
)
201 u64 old_extent_block
;
202 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
203 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
204 struct btrfs_root
*extent_root
= fs_info
->extent_root
;
206 btrfs_write_dirty_block_groups(trans
, extent_root
);
208 old_extent_block
= btrfs_root_blocknr(&extent_root
->root_item
);
209 if (old_extent_block
==
210 extent_buffer_blocknr(extent_root
->node
))
212 btrfs_set_root_blocknr(&extent_root
->root_item
,
213 extent_buffer_blocknr(extent_root
->node
));
214 ret
= btrfs_update_root(trans
, tree_root
,
215 &extent_root
->root_key
,
216 &extent_root
->root_item
);
218 btrfs_write_dirty_block_groups(trans
, extent_root
);
223 static int wait_for_commit(struct btrfs_root
*root
,
224 struct btrfs_transaction
*commit
)
227 mutex_lock(&root
->fs_info
->trans_mutex
);
228 while(!commit
->commit_done
) {
229 prepare_to_wait(&commit
->commit_wait
, &wait
,
230 TASK_UNINTERRUPTIBLE
);
231 if (commit
->commit_done
)
233 mutex_unlock(&root
->fs_info
->trans_mutex
);
235 mutex_lock(&root
->fs_info
->trans_mutex
);
237 mutex_unlock(&root
->fs_info
->trans_mutex
);
238 finish_wait(&commit
->commit_wait
, &wait
);
243 struct list_head list
;
244 struct btrfs_root
*root
;
245 struct btrfs_root
*latest_root
;
248 int btrfs_add_dead_root(struct btrfs_root
*root
,
249 struct btrfs_root
*latest
,
250 struct list_head
*dead_list
)
252 struct dirty_root
*dirty
;
254 dirty
= kmalloc(sizeof(*dirty
), GFP_NOFS
);
258 dirty
->latest_root
= latest
;
259 list_add(&dirty
->list
, dead_list
);
263 static int add_dirty_roots(struct btrfs_trans_handle
*trans
,
264 struct radix_tree_root
*radix
,
265 struct list_head
*list
)
267 struct dirty_root
*dirty
;
268 struct btrfs_root
*gang
[8];
269 struct btrfs_root
*root
;
276 ret
= radix_tree_gang_lookup_tag(radix
, (void **)gang
, 0,
278 BTRFS_ROOT_TRANS_TAG
);
281 for (i
= 0; i
< ret
; i
++) {
283 radix_tree_tag_clear(radix
,
284 (unsigned long)root
->root_key
.objectid
,
285 BTRFS_ROOT_TRANS_TAG
);
286 if (root
->commit_root
== root
->node
) {
287 WARN_ON(extent_buffer_blocknr(root
->node
) !=
288 btrfs_root_blocknr(&root
->root_item
));
289 free_extent_buffer(root
->commit_root
);
290 root
->commit_root
= NULL
;
292 /* make sure to update the root on disk
293 * so we get any updates to the block used
296 err
= btrfs_update_root(trans
,
297 root
->fs_info
->tree_root
,
302 dirty
= kmalloc(sizeof(*dirty
), GFP_NOFS
);
304 dirty
->root
= kmalloc(sizeof(*dirty
->root
), GFP_NOFS
);
305 BUG_ON(!dirty
->root
);
307 memset(&root
->root_item
.drop_progress
, 0,
308 sizeof(struct btrfs_disk_key
));
309 root
->root_item
.drop_level
= 0;
311 memcpy(dirty
->root
, root
, sizeof(*root
));
312 dirty
->root
->node
= root
->commit_root
;
313 dirty
->latest_root
= root
;
314 root
->commit_root
= NULL
;
316 root
->root_key
.offset
= root
->fs_info
->generation
;
317 btrfs_set_root_blocknr(&root
->root_item
,
318 extent_buffer_blocknr(root
->node
));
319 err
= btrfs_insert_root(trans
, root
->fs_info
->tree_root
,
325 refs
= btrfs_root_refs(&dirty
->root
->root_item
);
326 btrfs_set_root_refs(&dirty
->root
->root_item
, refs
- 1);
327 err
= btrfs_update_root(trans
, root
->fs_info
->tree_root
,
328 &dirty
->root
->root_key
,
329 &dirty
->root
->root_item
);
333 list_add(&dirty
->list
, list
);
344 int btrfs_defrag_root(struct btrfs_root
*root
, int cacheonly
)
346 struct btrfs_fs_info
*info
= root
->fs_info
;
348 struct btrfs_trans_handle
*trans
;
351 if (root
->defrag_running
)
354 trans
= btrfs_start_transaction(root
, 1);
356 root
->defrag_running
= 1;
357 ret
= btrfs_defrag_leaves(trans
, root
, cacheonly
);
358 nr
= trans
->blocks_used
;
359 btrfs_end_transaction(trans
, root
);
360 mutex_unlock(&info
->fs_mutex
);
362 btrfs_btree_balance_dirty(info
->tree_root
, nr
);
365 mutex_lock(&info
->fs_mutex
);
366 trans
= btrfs_start_transaction(root
, 1);
370 root
->defrag_running
= 0;
371 radix_tree_tag_clear(&info
->fs_roots_radix
,
372 (unsigned long)root
->root_key
.objectid
,
373 BTRFS_ROOT_DEFRAG_TAG
);
374 btrfs_end_transaction(trans
, root
);
378 int btrfs_defrag_dirty_roots(struct btrfs_fs_info
*info
)
380 struct btrfs_root
*gang
[1];
381 struct btrfs_root
*root
;
388 ret
= radix_tree_gang_lookup_tag(&info
->fs_roots_radix
,
391 BTRFS_ROOT_DEFRAG_TAG
);
394 for (i
= 0; i
< ret
; i
++) {
396 last
= root
->root_key
.objectid
+ 1;
397 btrfs_defrag_root(root
, 1);
400 // btrfs_defrag_root(info->extent_root, 1);
404 static int drop_dirty_roots(struct btrfs_root
*tree_root
,
405 struct list_head
*list
)
407 struct dirty_root
*dirty
;
408 struct btrfs_trans_handle
*trans
;
415 while(!list_empty(list
)) {
416 struct btrfs_root
*root
;
418 mutex_lock(&tree_root
->fs_info
->fs_mutex
);
419 dirty
= list_entry(list
->next
, struct dirty_root
, list
);
420 list_del_init(&dirty
->list
);
422 num_blocks
= btrfs_root_used(&dirty
->root
->root_item
);
423 root
= dirty
->latest_root
;
426 trans
= btrfs_start_transaction(tree_root
, 1);
427 ret
= btrfs_drop_snapshot(trans
, dirty
->root
);
428 if (ret
!= -EAGAIN
) {
432 err
= btrfs_update_root(trans
,
434 &dirty
->root
->root_key
,
435 &dirty
->root
->root_item
);
438 nr
= trans
->blocks_used
;
439 ret
= btrfs_end_transaction(trans
, tree_root
);
441 mutex_unlock(&tree_root
->fs_info
->fs_mutex
);
442 btrfs_btree_balance_dirty(tree_root
, nr
);
445 mutex_lock(&tree_root
->fs_info
->fs_mutex
);
449 num_blocks
-= btrfs_root_used(&dirty
->root
->root_item
);
450 blocks_used
= btrfs_root_used(&root
->root_item
);
452 record_root_in_trans(root
);
453 btrfs_set_root_used(&root
->root_item
,
454 blocks_used
- num_blocks
);
456 ret
= btrfs_del_root(trans
, tree_root
, &dirty
->root
->root_key
);
461 nr
= trans
->blocks_used
;
462 ret
= btrfs_end_transaction(trans
, tree_root
);
465 free_extent_buffer(dirty
->root
->node
);
468 mutex_unlock(&tree_root
->fs_info
->fs_mutex
);
470 btrfs_btree_balance_dirty(tree_root
, nr
);
476 int btrfs_commit_transaction(struct btrfs_trans_handle
*trans
,
477 struct btrfs_root
*root
)
479 unsigned long joined
= 0;
480 unsigned long timeout
= 1;
481 struct btrfs_transaction
*cur_trans
;
482 struct btrfs_transaction
*prev_trans
= NULL
;
483 struct list_head dirty_fs_roots
;
484 struct radix_tree_root pinned_copy
;
488 init_bit_radix(&pinned_copy
);
489 INIT_LIST_HEAD(&dirty_fs_roots
);
491 mutex_lock(&root
->fs_info
->trans_mutex
);
492 if (trans
->transaction
->in_commit
) {
493 cur_trans
= trans
->transaction
;
494 trans
->transaction
->use_count
++;
495 mutex_unlock(&root
->fs_info
->trans_mutex
);
496 btrfs_end_transaction(trans
, root
);
498 mutex_unlock(&root
->fs_info
->fs_mutex
);
499 ret
= wait_for_commit(root
, cur_trans
);
502 mutex_lock(&root
->fs_info
->trans_mutex
);
503 put_transaction(cur_trans
);
504 mutex_unlock(&root
->fs_info
->trans_mutex
);
506 mutex_lock(&root
->fs_info
->fs_mutex
);
509 trans
->transaction
->in_commit
= 1;
510 cur_trans
= trans
->transaction
;
511 if (cur_trans
->list
.prev
!= &root
->fs_info
->trans_list
) {
512 prev_trans
= list_entry(cur_trans
->list
.prev
,
513 struct btrfs_transaction
, list
);
514 if (!prev_trans
->commit_done
) {
515 prev_trans
->use_count
++;
516 mutex_unlock(&root
->fs_info
->fs_mutex
);
517 mutex_unlock(&root
->fs_info
->trans_mutex
);
519 wait_for_commit(root
, prev_trans
);
521 mutex_lock(&root
->fs_info
->fs_mutex
);
522 mutex_lock(&root
->fs_info
->trans_mutex
);
523 put_transaction(prev_trans
);
528 joined
= cur_trans
->num_joined
;
529 WARN_ON(cur_trans
!= trans
->transaction
);
530 prepare_to_wait(&cur_trans
->writer_wait
, &wait
,
531 TASK_UNINTERRUPTIBLE
);
533 if (cur_trans
->num_writers
> 1)
534 timeout
= MAX_SCHEDULE_TIMEOUT
;
538 mutex_unlock(&root
->fs_info
->fs_mutex
);
539 mutex_unlock(&root
->fs_info
->trans_mutex
);
541 schedule_timeout(timeout
);
543 mutex_lock(&root
->fs_info
->fs_mutex
);
544 mutex_lock(&root
->fs_info
->trans_mutex
);
545 finish_wait(&cur_trans
->writer_wait
, &wait
);
546 } while (cur_trans
->num_writers
> 1 ||
547 (cur_trans
->num_joined
!= joined
));
549 WARN_ON(cur_trans
!= trans
->transaction
);
550 ret
= add_dirty_roots(trans
, &root
->fs_info
->fs_roots_radix
,
554 ret
= btrfs_commit_tree_roots(trans
, root
);
557 cur_trans
= root
->fs_info
->running_transaction
;
558 root
->fs_info
->running_transaction
= NULL
;
559 btrfs_set_super_generation(&root
->fs_info
->super_copy
,
561 btrfs_set_super_root(&root
->fs_info
->super_copy
,
562 extent_buffer_blocknr(root
->fs_info
->tree_root
->node
));
564 write_extent_buffer(root
->fs_info
->sb_buffer
,
565 &root
->fs_info
->super_copy
, 0,
566 sizeof(root
->fs_info
->super_copy
));
568 btrfs_copy_pinned(root
, &pinned_copy
);
570 mutex_unlock(&root
->fs_info
->trans_mutex
);
571 mutex_unlock(&root
->fs_info
->fs_mutex
);
572 ret
= btrfs_write_and_wait_transaction(trans
, root
);
574 write_ctree_super(trans
, root
);
575 mutex_lock(&root
->fs_info
->fs_mutex
);
576 btrfs_finish_extent_commit(trans
, root
, &pinned_copy
);
577 mutex_lock(&root
->fs_info
->trans_mutex
);
578 cur_trans
->commit_done
= 1;
579 root
->fs_info
->last_trans_committed
= cur_trans
->transid
;
580 wake_up(&cur_trans
->commit_wait
);
581 put_transaction(cur_trans
);
582 put_transaction(cur_trans
);
584 if (root
->fs_info
->closing
)
585 list_splice_init(&root
->fs_info
->dead_roots
, &dirty_fs_roots
);
587 list_splice_init(&dirty_fs_roots
, &root
->fs_info
->dead_roots
);
589 mutex_unlock(&root
->fs_info
->trans_mutex
);
590 kmem_cache_free(btrfs_trans_handle_cachep
, trans
);
592 if (root
->fs_info
->closing
) {
593 mutex_unlock(&root
->fs_info
->fs_mutex
);
594 drop_dirty_roots(root
->fs_info
->tree_root
, &dirty_fs_roots
);
595 mutex_lock(&root
->fs_info
->fs_mutex
);
600 int btrfs_clean_old_snapshots(struct btrfs_root
*root
)
602 struct list_head dirty_roots
;
603 INIT_LIST_HEAD(&dirty_roots
);
605 mutex_lock(&root
->fs_info
->trans_mutex
);
606 list_splice_init(&root
->fs_info
->dead_roots
, &dirty_roots
);
607 mutex_unlock(&root
->fs_info
->trans_mutex
);
609 if (!list_empty(&dirty_roots
)) {
610 drop_dirty_roots(root
, &dirty_roots
);
614 void btrfs_transaction_cleaner(struct work_struct
*work
)
616 struct btrfs_fs_info
*fs_info
= container_of(work
,
617 struct btrfs_fs_info
,
620 struct btrfs_root
*root
= fs_info
->tree_root
;
621 struct btrfs_transaction
*cur
;
622 struct btrfs_trans_handle
*trans
;
624 unsigned long delay
= HZ
* 30;
627 mutex_lock(&root
->fs_info
->fs_mutex
);
628 mutex_lock(&root
->fs_info
->trans_mutex
);
629 cur
= root
->fs_info
->running_transaction
;
631 mutex_unlock(&root
->fs_info
->trans_mutex
);
635 if (now
< cur
->start_time
|| now
- cur
->start_time
< 30) {
636 mutex_unlock(&root
->fs_info
->trans_mutex
);
640 mutex_unlock(&root
->fs_info
->trans_mutex
);
641 btrfs_defrag_dirty_roots(root
->fs_info
);
642 trans
= btrfs_start_transaction(root
, 1);
643 ret
= btrfs_commit_transaction(trans
, root
);
645 mutex_unlock(&root
->fs_info
->fs_mutex
);
646 btrfs_clean_old_snapshots(root
);
647 btrfs_transaction_queue_work(root
, delay
);
650 void btrfs_transaction_queue_work(struct btrfs_root
*root
, int delay
)
652 queue_delayed_work(trans_wq
, &root
->fs_info
->trans_work
, delay
);
655 void btrfs_transaction_flush_work(struct btrfs_root
*root
)
657 cancel_rearming_delayed_workqueue(trans_wq
, &root
->fs_info
->trans_work
);
658 flush_workqueue(trans_wq
);
661 void __init
btrfs_init_transaction_sys(void)
663 trans_wq
= create_workqueue("btrfs");
666 void __exit
btrfs_exit_transaction_sys(void)
668 destroy_workqueue(trans_wq
);