2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <linux/sched.h>
22 #include "print-tree.h"
23 #include "transaction.h"
25 #define BLOCK_GROUP_DATA EXTENT_WRITEBACK
26 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
27 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
29 static int finish_current_insert(struct btrfs_trans_handle
*trans
, struct
30 btrfs_root
*extent_root
);
31 static int del_pending_extents(struct btrfs_trans_handle
*trans
, struct
32 btrfs_root
*extent_root
);
34 static int cache_block_group(struct btrfs_root
*root
,
35 struct btrfs_block_group_cache
*block_group
)
37 struct btrfs_path
*path
;
40 struct extent_buffer
*leaf
;
41 struct extent_map_tree
*free_space_cache
;
48 root
= root
->fs_info
->extent_root
;
49 free_space_cache
= &root
->fs_info
->free_space_cache
;
51 if (block_group
->cached
)
54 path
= btrfs_alloc_path();
59 first_free
= block_group
->key
.objectid
;
60 key
.objectid
= block_group
->key
.objectid
;
63 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
64 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
69 if (ret
&& path
->slots
[0] > 0)
73 leaf
= path
->nodes
[0];
74 slot
= path
->slots
[0];
75 if (slot
>= btrfs_header_nritems(leaf
)) {
76 ret
= btrfs_next_leaf(root
, path
);
86 btrfs_item_key_to_cpu(leaf
, &key
, slot
);
87 if (key
.objectid
< block_group
->key
.objectid
) {
88 if (key
.objectid
+ key
.offset
> first_free
)
89 first_free
= key
.objectid
+ key
.offset
;
93 if (key
.objectid
>= block_group
->key
.objectid
+
94 block_group
->key
.offset
) {
98 if (btrfs_key_type(&key
) == BTRFS_EXTENT_ITEM_KEY
) {
103 if (key
.objectid
> last
) {
104 hole_size
= key
.objectid
- last
;
105 set_extent_dirty(free_space_cache
, last
,
106 last
+ hole_size
- 1,
109 last
= key
.objectid
+ key
.offset
;
117 if (block_group
->key
.objectid
+
118 block_group
->key
.offset
> last
) {
119 hole_size
= block_group
->key
.objectid
+
120 block_group
->key
.offset
- last
;
121 set_extent_dirty(free_space_cache
, last
,
122 last
+ hole_size
- 1, GFP_NOFS
);
124 block_group
->cached
= 1;
126 btrfs_free_path(path
);
130 struct btrfs_block_group_cache
*btrfs_lookup_block_group(struct
134 struct extent_map_tree
*block_group_cache
;
135 struct btrfs_block_group_cache
*block_group
= NULL
;
141 block_group_cache
= &info
->block_group_cache
;
142 ret
= find_first_extent_bit(block_group_cache
,
143 bytenr
, &start
, &end
,
144 BLOCK_GROUP_DATA
| BLOCK_GROUP_METADATA
);
148 ret
= get_state_private(block_group_cache
, start
, &ptr
);
152 block_group
= (struct btrfs_block_group_cache
*)(unsigned long)ptr
;
153 if (block_group
->key
.objectid
<= bytenr
&& bytenr
<
154 block_group
->key
.objectid
+ block_group
->key
.offset
)
158 static u64
find_search_start(struct btrfs_root
*root
,
159 struct btrfs_block_group_cache
**cache_ret
,
160 u64 search_start
, int num
,
161 int data
, int full_scan
)
164 struct btrfs_block_group_cache
*cache
= *cache_ret
;
172 ret
= cache_block_group(root
, cache
);
176 last
= max(search_start
, cache
->key
.objectid
);
179 ret
= find_first_extent_bit(&root
->fs_info
->free_space_cache
,
180 last
, &start
, &end
, EXTENT_DIRTY
);
187 start
= max(last
, start
);
189 if (last
- start
< num
) {
190 if (last
== cache
->key
.objectid
+ cache
->key
.offset
)
194 if (data
!= BTRFS_BLOCK_GROUP_MIXED
&&
195 start
+ num
> cache
->key
.objectid
+ cache
->key
.offset
)
203 last
= cache
->key
.objectid
+ cache
->key
.offset
;
205 cache
= btrfs_lookup_block_group(root
->fs_info
, last
);
211 data
= BTRFS_BLOCK_GROUP_MIXED
;
216 if (cache_miss
&& !cache
->cached
) {
217 cache_block_group(root
, cache
);
219 cache
= btrfs_lookup_block_group(root
->fs_info
, last
);
222 cache
= btrfs_find_block_group(root
, cache
, last
, data
, 0);
230 static u64
div_factor(u64 num
, int factor
)
239 struct btrfs_block_group_cache
*btrfs_find_block_group(struct btrfs_root
*root
,
240 struct btrfs_block_group_cache
241 *hint
, u64 search_start
,
244 struct btrfs_block_group_cache
*cache
;
245 struct extent_map_tree
*block_group_cache
;
246 struct btrfs_block_group_cache
*found_group
= NULL
;
247 struct btrfs_fs_info
*info
= root
->fs_info
;
261 block_group_cache
= &info
->block_group_cache
;
266 if (data
== BTRFS_BLOCK_GROUP_MIXED
) {
267 bit
= BLOCK_GROUP_DATA
| BLOCK_GROUP_METADATA
;
270 bit
= BLOCK_GROUP_DATA
;
272 bit
= BLOCK_GROUP_METADATA
;
275 struct btrfs_block_group_cache
*shint
;
276 shint
= btrfs_lookup_block_group(info
, search_start
);
277 if (shint
&& (shint
->data
== data
||
278 shint
->data
== BTRFS_BLOCK_GROUP_MIXED
)) {
279 used
= btrfs_block_group_used(&shint
->item
);
280 if (used
+ shint
->pinned
<
281 div_factor(shint
->key
.offset
, factor
)) {
286 if (hint
&& (hint
->data
== data
||
287 hint
->data
== BTRFS_BLOCK_GROUP_MIXED
)) {
288 used
= btrfs_block_group_used(&hint
->item
);
289 if (used
+ hint
->pinned
<
290 div_factor(hint
->key
.offset
, factor
)) {
293 last
= hint
->key
.objectid
+ hint
->key
.offset
;
297 hint_last
= max(hint
->key
.objectid
, search_start
);
299 hint_last
= search_start
;
305 ret
= find_first_extent_bit(block_group_cache
, last
,
310 ret
= get_state_private(block_group_cache
, start
, &ptr
);
314 cache
= (struct btrfs_block_group_cache
*)(unsigned long)ptr
;
315 last
= cache
->key
.objectid
+ cache
->key
.offset
;
316 used
= btrfs_block_group_used(&cache
->item
);
319 free_check
= cache
->key
.offset
;
321 free_check
= div_factor(cache
->key
.offset
, factor
);
322 if (used
+ cache
->pinned
< free_check
) {
335 bit
= BLOCK_GROUP_DATA
| BLOCK_GROUP_METADATA
;
343 int btrfs_inc_extent_ref(struct btrfs_trans_handle
*trans
,
344 struct btrfs_root
*root
,
345 u64 bytenr
, u64 num_bytes
)
347 struct btrfs_path
*path
;
349 struct btrfs_key key
;
350 struct extent_buffer
*l
;
351 struct btrfs_extent_item
*item
;
354 WARN_ON(num_bytes
< root
->sectorsize
);
355 path
= btrfs_alloc_path();
359 key
.objectid
= bytenr
;
360 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
361 key
.offset
= num_bytes
;
362 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
, &key
, path
,
371 item
= btrfs_item_ptr(l
, path
->slots
[0], struct btrfs_extent_item
);
372 refs
= btrfs_extent_refs(l
, item
);
373 btrfs_set_extent_refs(l
, item
, refs
+ 1);
374 btrfs_mark_buffer_dirty(path
->nodes
[0]);
376 btrfs_release_path(root
->fs_info
->extent_root
, path
);
377 btrfs_free_path(path
);
378 finish_current_insert(trans
, root
->fs_info
->extent_root
);
379 del_pending_extents(trans
, root
->fs_info
->extent_root
);
383 int btrfs_extent_post_op(struct btrfs_trans_handle
*trans
,
384 struct btrfs_root
*root
)
386 finish_current_insert(trans
, root
->fs_info
->extent_root
);
387 del_pending_extents(trans
, root
->fs_info
->extent_root
);
391 static int lookup_extent_ref(struct btrfs_trans_handle
*trans
,
392 struct btrfs_root
*root
, u64 bytenr
,
393 u64 num_bytes
, u32
*refs
)
395 struct btrfs_path
*path
;
397 struct btrfs_key key
;
398 struct extent_buffer
*l
;
399 struct btrfs_extent_item
*item
;
401 WARN_ON(num_bytes
< root
->sectorsize
);
402 path
= btrfs_alloc_path();
403 key
.objectid
= bytenr
;
404 key
.offset
= num_bytes
;
405 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
406 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
, &key
, path
,
411 btrfs_print_leaf(root
, path
->nodes
[0]);
412 printk("failed to find block number %Lu\n", bytenr
);
416 item
= btrfs_item_ptr(l
, path
->slots
[0], struct btrfs_extent_item
);
417 *refs
= btrfs_extent_refs(l
, item
);
419 btrfs_free_path(path
);
423 int btrfs_inc_root_ref(struct btrfs_trans_handle
*trans
,
424 struct btrfs_root
*root
)
426 return btrfs_inc_extent_ref(trans
, root
, root
->node
->start
,
430 int btrfs_inc_ref(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
431 struct extent_buffer
*buf
)
435 struct btrfs_key key
;
436 struct btrfs_file_extent_item
*fi
;
446 level
= btrfs_header_level(buf
);
447 nritems
= btrfs_header_nritems(buf
);
448 for (i
= 0; i
< nritems
; i
++) {
451 btrfs_item_key_to_cpu(buf
, &key
, i
);
452 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
454 fi
= btrfs_item_ptr(buf
, i
,
455 struct btrfs_file_extent_item
);
456 if (btrfs_file_extent_type(buf
, fi
) ==
457 BTRFS_FILE_EXTENT_INLINE
)
459 disk_bytenr
= btrfs_file_extent_disk_bytenr(buf
, fi
);
460 if (disk_bytenr
== 0)
462 ret
= btrfs_inc_extent_ref(trans
, root
, disk_bytenr
,
463 btrfs_file_extent_disk_num_bytes(buf
, fi
));
469 bytenr
= btrfs_node_blockptr(buf
, i
);
470 ret
= btrfs_inc_extent_ref(trans
, root
, bytenr
,
471 btrfs_level_size(root
, level
- 1));
481 for (i
=0; i
< faili
; i
++) {
484 btrfs_item_key_to_cpu(buf
, &key
, i
);
485 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
487 fi
= btrfs_item_ptr(buf
, i
,
488 struct btrfs_file_extent_item
);
489 if (btrfs_file_extent_type(buf
, fi
) ==
490 BTRFS_FILE_EXTENT_INLINE
)
492 disk_bytenr
= btrfs_file_extent_disk_bytenr(buf
, fi
);
493 if (disk_bytenr
== 0)
495 err
= btrfs_free_extent(trans
, root
, disk_bytenr
,
496 btrfs_file_extent_disk_num_bytes(buf
,
500 bytenr
= btrfs_node_blockptr(buf
, i
);
501 err
= btrfs_free_extent(trans
, root
, bytenr
,
502 btrfs_level_size(root
, level
- 1), 0);
509 static int write_one_cache_group(struct btrfs_trans_handle
*trans
,
510 struct btrfs_root
*root
,
511 struct btrfs_path
*path
,
512 struct btrfs_block_group_cache
*cache
)
516 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
518 struct extent_buffer
*leaf
;
520 ret
= btrfs_search_slot(trans
, extent_root
, &cache
->key
, path
, 0, 1);
525 leaf
= path
->nodes
[0];
526 bi
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
527 write_extent_buffer(leaf
, &cache
->item
, bi
, sizeof(cache
->item
));
528 btrfs_mark_buffer_dirty(leaf
);
529 btrfs_release_path(extent_root
, path
);
531 finish_current_insert(trans
, extent_root
);
532 pending_ret
= del_pending_extents(trans
, extent_root
);
541 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle
*trans
,
542 struct btrfs_root
*root
)
544 struct extent_map_tree
*block_group_cache
;
545 struct btrfs_block_group_cache
*cache
;
549 struct btrfs_path
*path
;
555 block_group_cache
= &root
->fs_info
->block_group_cache
;
556 path
= btrfs_alloc_path();
561 ret
= find_first_extent_bit(block_group_cache
, last
,
562 &start
, &end
, BLOCK_GROUP_DIRTY
);
567 ret
= get_state_private(block_group_cache
, start
, &ptr
);
571 cache
= (struct btrfs_block_group_cache
*)(unsigned long)ptr
;
572 err
= write_one_cache_group(trans
, root
,
575 * if we fail to write the cache group, we want
576 * to keep it marked dirty in hopes that a later
583 clear_extent_bits(block_group_cache
, start
, end
,
584 BLOCK_GROUP_DIRTY
, GFP_NOFS
);
586 btrfs_free_path(path
);
590 static int update_block_group(struct btrfs_trans_handle
*trans
,
591 struct btrfs_root
*root
,
592 u64 bytenr
, u64 num_bytes
, int alloc
,
593 int mark_free
, int data
)
595 struct btrfs_block_group_cache
*cache
;
596 struct btrfs_fs_info
*info
= root
->fs_info
;
597 u64 total
= num_bytes
;
604 cache
= btrfs_lookup_block_group(info
, bytenr
);
608 byte_in_group
= bytenr
- cache
->key
.objectid
;
609 WARN_ON(byte_in_group
> cache
->key
.offset
);
610 start
= cache
->key
.objectid
;
611 end
= start
+ cache
->key
.offset
- 1;
612 set_extent_bits(&info
->block_group_cache
, start
, end
,
613 BLOCK_GROUP_DIRTY
, GFP_NOFS
);
615 old_val
= btrfs_block_group_used(&cache
->item
);
616 num_bytes
= min(total
, cache
->key
.offset
- byte_in_group
);
618 if (cache
->data
!= data
&&
619 old_val
< (cache
->key
.offset
>> 1)) {
624 bit_to_clear
= BLOCK_GROUP_METADATA
;
625 bit_to_set
= BLOCK_GROUP_DATA
;
627 ~BTRFS_BLOCK_GROUP_MIXED
;
629 BTRFS_BLOCK_GROUP_DATA
;
631 bit_to_clear
= BLOCK_GROUP_DATA
;
632 bit_to_set
= BLOCK_GROUP_METADATA
;
634 ~BTRFS_BLOCK_GROUP_MIXED
;
636 ~BTRFS_BLOCK_GROUP_DATA
;
638 clear_extent_bits(&info
->block_group_cache
,
639 start
, end
, bit_to_clear
,
641 set_extent_bits(&info
->block_group_cache
,
642 start
, end
, bit_to_set
,
644 } else if (cache
->data
!= data
&&
645 cache
->data
!= BTRFS_BLOCK_GROUP_MIXED
) {
646 cache
->data
= BTRFS_BLOCK_GROUP_MIXED
;
647 set_extent_bits(&info
->block_group_cache
,
650 BLOCK_GROUP_METADATA
,
653 old_val
+= num_bytes
;
655 old_val
-= num_bytes
;
657 set_extent_dirty(&info
->free_space_cache
,
658 bytenr
, bytenr
+ num_bytes
- 1,
662 btrfs_set_block_group_used(&cache
->item
, old_val
);
668 static int update_pinned_extents(struct btrfs_root
*root
,
669 u64 bytenr
, u64 num
, int pin
)
672 struct btrfs_block_group_cache
*cache
;
673 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
676 set_extent_dirty(&fs_info
->pinned_extents
,
677 bytenr
, bytenr
+ num
- 1, GFP_NOFS
);
679 clear_extent_dirty(&fs_info
->pinned_extents
,
680 bytenr
, bytenr
+ num
- 1, GFP_NOFS
);
683 cache
= btrfs_lookup_block_group(fs_info
, bytenr
);
685 len
= min(num
, cache
->key
.offset
-
686 (bytenr
- cache
->key
.objectid
));
688 cache
->pinned
+= len
;
689 fs_info
->total_pinned
+= len
;
691 cache
->pinned
-= len
;
692 fs_info
->total_pinned
-= len
;
700 int btrfs_copy_pinned(struct btrfs_root
*root
, struct extent_map_tree
*copy
)
705 struct extent_map_tree
*pinned_extents
= &root
->fs_info
->pinned_extents
;
709 ret
= find_first_extent_bit(pinned_extents
, last
,
710 &start
, &end
, EXTENT_DIRTY
);
713 set_extent_dirty(copy
, start
, end
, GFP_NOFS
);
719 int btrfs_finish_extent_commit(struct btrfs_trans_handle
*trans
,
720 struct btrfs_root
*root
,
721 struct extent_map_tree
*unpin
)
726 struct extent_map_tree
*free_space_cache
;
727 free_space_cache
= &root
->fs_info
->free_space_cache
;
730 ret
= find_first_extent_bit(unpin
, 0, &start
, &end
,
734 update_pinned_extents(root
, start
, end
+ 1 - start
, 0);
735 clear_extent_dirty(unpin
, start
, end
, GFP_NOFS
);
736 set_extent_dirty(free_space_cache
, start
, end
, GFP_NOFS
);
741 static int finish_current_insert(struct btrfs_trans_handle
*trans
, struct
742 btrfs_root
*extent_root
)
744 struct btrfs_key ins
;
745 struct btrfs_extent_item extent_item
;
750 struct btrfs_fs_info
*info
= extent_root
->fs_info
;
752 btrfs_set_stack_extent_refs(&extent_item
, 1);
753 btrfs_set_key_type(&ins
, BTRFS_EXTENT_ITEM_KEY
);
754 btrfs_set_stack_extent_owner(&extent_item
,
755 extent_root
->root_key
.objectid
);
758 ret
= find_first_extent_bit(&info
->extent_ins
, 0, &start
,
759 &end
, EXTENT_LOCKED
);
763 ins
.objectid
= start
;
764 ins
.offset
= end
+ 1 - start
;
765 err
= btrfs_insert_item(trans
, extent_root
, &ins
,
766 &extent_item
, sizeof(extent_item
));
767 clear_extent_bits(&info
->extent_ins
, start
, end
, EXTENT_LOCKED
,
773 static int pin_down_bytes(struct btrfs_root
*root
, u64 bytenr
, u32 num_bytes
,
777 struct extent_buffer
*buf
;
780 buf
= btrfs_find_tree_block(root
, bytenr
, num_bytes
);
782 if (btrfs_buffer_uptodate(buf
)) {
784 root
->fs_info
->running_transaction
->transid
;
785 if (btrfs_header_generation(buf
) == transid
) {
786 free_extent_buffer(buf
);
790 free_extent_buffer(buf
);
792 update_pinned_extents(root
, bytenr
, num_bytes
, 1);
794 set_extent_bits(&root
->fs_info
->pending_del
,
795 bytenr
, bytenr
+ num_bytes
- 1,
796 EXTENT_LOCKED
, GFP_NOFS
);
803 * remove an extent from the root, returns 0 on success
805 static int __free_extent(struct btrfs_trans_handle
*trans
, struct btrfs_root
806 *root
, u64 bytenr
, u64 num_bytes
, int pin
,
809 struct btrfs_path
*path
;
810 struct btrfs_key key
;
811 struct btrfs_fs_info
*info
= root
->fs_info
;
812 struct btrfs_root
*extent_root
= info
->extent_root
;
813 struct extent_buffer
*leaf
;
815 struct btrfs_extent_item
*ei
;
818 key
.objectid
= bytenr
;
819 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
820 key
.offset
= num_bytes
;
822 path
= btrfs_alloc_path();
826 ret
= btrfs_search_slot(trans
, extent_root
, &key
, path
, -1, 1);
831 leaf
= path
->nodes
[0];
832 ei
= btrfs_item_ptr(leaf
, path
->slots
[0],
833 struct btrfs_extent_item
);
834 refs
= btrfs_extent_refs(leaf
, ei
);
837 btrfs_set_extent_refs(leaf
, ei
, refs
);
838 btrfs_mark_buffer_dirty(leaf
);
845 ret
= pin_down_bytes(root
, bytenr
, num_bytes
, 0);
851 /* block accounting for super block */
852 super_used
= btrfs_super_bytes_used(&info
->super_copy
);
853 btrfs_set_super_bytes_used(&info
->super_copy
,
854 super_used
- num_bytes
);
856 /* block accounting for root item */
857 root_used
= btrfs_root_used(&root
->root_item
);
858 btrfs_set_root_used(&root
->root_item
,
859 root_used
- num_bytes
);
861 ret
= btrfs_del_item(trans
, extent_root
, path
);
865 ret
= update_block_group(trans
, root
, bytenr
, num_bytes
, 0,
869 btrfs_free_path(path
);
870 finish_current_insert(trans
, extent_root
);
875 * find all the blocks marked as pending in the radix tree and remove
876 * them from the extent map
878 static int del_pending_extents(struct btrfs_trans_handle
*trans
, struct
879 btrfs_root
*extent_root
)
885 struct extent_map_tree
*pending_del
;
886 struct extent_map_tree
*pinned_extents
;
888 pending_del
= &extent_root
->fs_info
->pending_del
;
889 pinned_extents
= &extent_root
->fs_info
->pinned_extents
;
892 ret
= find_first_extent_bit(pending_del
, 0, &start
, &end
,
896 update_pinned_extents(extent_root
, start
, end
+ 1 - start
, 1);
897 clear_extent_bits(pending_del
, start
, end
, EXTENT_LOCKED
,
899 ret
= __free_extent(trans
, extent_root
,
900 start
, end
+ 1 - start
, 0, 0);
908 * remove an extent from the root, returns 0 on success
910 int btrfs_free_extent(struct btrfs_trans_handle
*trans
, struct btrfs_root
911 *root
, u64 bytenr
, u64 num_bytes
, int pin
)
913 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
917 WARN_ON(num_bytes
< root
->sectorsize
);
918 if (root
== extent_root
) {
919 pin_down_bytes(root
, bytenr
, num_bytes
, 1);
922 ret
= __free_extent(trans
, root
, bytenr
, num_bytes
, pin
, pin
== 0);
923 pending_ret
= del_pending_extents(trans
, root
->fs_info
->extent_root
);
924 return ret
? ret
: pending_ret
;
928 * walks the btree of allocated extents and find a hole of a given size.
929 * The key ins is changed to record the hole:
930 * ins->objectid == block start
931 * ins->flags = BTRFS_EXTENT_ITEM_KEY
932 * ins->offset == number of blocks
933 * Any available blocks before search_start are skipped.
935 static int find_free_extent(struct btrfs_trans_handle
*trans
, struct btrfs_root
936 *orig_root
, u64 num_bytes
, u64 empty_size
,
937 u64 search_start
, u64 search_end
, u64 hint_byte
,
938 struct btrfs_key
*ins
, u64 exclude_start
,
939 u64 exclude_nr
, int data
)
941 struct btrfs_path
*path
;
942 struct btrfs_key key
;
947 u64 orig_search_start
= search_start
;
949 struct extent_buffer
*l
;
950 struct btrfs_root
* root
= orig_root
->fs_info
->extent_root
;
951 struct btrfs_fs_info
*info
= root
->fs_info
;
952 u64 total_needed
= num_bytes
;
954 struct btrfs_block_group_cache
*block_group
;
959 WARN_ON(num_bytes
< root
->sectorsize
);
960 btrfs_set_key_type(ins
, BTRFS_EXTENT_ITEM_KEY
);
962 level
= btrfs_header_level(root
->node
);
964 if (num_bytes
>= 32 * 1024 * 1024 && hint_byte
) {
965 data
= BTRFS_BLOCK_GROUP_MIXED
;
968 if (search_end
== (u64
)-1)
969 search_end
= btrfs_super_total_bytes(&info
->super_copy
);
971 block_group
= btrfs_lookup_block_group(info
, hint_byte
);
972 block_group
= btrfs_find_block_group(root
, block_group
,
975 block_group
= btrfs_find_block_group(root
,
976 trans
->block_group
, 0,
980 total_needed
+= empty_size
;
981 path
= btrfs_alloc_path();
983 search_start
= find_search_start(root
, &block_group
, search_start
,
984 total_needed
, data
, full_scan
);
985 cached_start
= search_start
;
986 btrfs_init_path(path
);
987 ins
->objectid
= search_start
;
992 ret
= btrfs_search_slot(trans
, root
, ins
, path
, 0, 0);
996 if (path
->slots
[0] > 0) {
1001 btrfs_item_key_to_cpu(l
, &key
, path
->slots
[0]);
1004 * a rare case, go back one key if we hit a block group item
1005 * instead of an extent item
1007 if (btrfs_key_type(&key
) != BTRFS_EXTENT_ITEM_KEY
&&
1008 key
.objectid
+ key
.offset
>= search_start
) {
1009 ins
->objectid
= key
.objectid
;
1010 ins
->offset
= key
.offset
- 1;
1011 btrfs_release_path(root
, path
);
1012 ret
= btrfs_search_slot(trans
, root
, ins
, path
, 0, 0);
1016 if (path
->slots
[0] > 0) {
1023 slot
= path
->slots
[0];
1024 if (slot
>= btrfs_header_nritems(l
)) {
1025 ret
= btrfs_next_leaf(root
, path
);
1031 search_start
= max(search_start
,
1032 block_group
->key
.objectid
);
1034 ins
->objectid
= search_start
;
1035 ins
->offset
= search_end
- search_start
;
1039 ins
->objectid
= last_byte
> search_start
?
1040 last_byte
: search_start
;
1041 ins
->offset
= search_end
- ins
->objectid
;
1042 BUG_ON(ins
->objectid
>= search_end
);
1045 btrfs_item_key_to_cpu(l
, &key
, slot
);
1047 if (key
.objectid
>= search_start
&& key
.objectid
> last_byte
&&
1049 if (last_byte
< search_start
)
1050 last_byte
= search_start
;
1051 hole_size
= key
.objectid
- last_byte
;
1052 if (hole_size
>= num_bytes
) {
1053 ins
->objectid
= last_byte
;
1054 ins
->offset
= hole_size
;
1058 if (btrfs_key_type(&key
) != BTRFS_EXTENT_ITEM_KEY
) {
1060 last_byte
= key
.objectid
;
1068 last_byte
= key
.objectid
+ key
.offset
;
1070 if (!full_scan
&& data
!= BTRFS_BLOCK_GROUP_MIXED
&&
1071 last_byte
>= block_group
->key
.objectid
+
1072 block_group
->key
.offset
) {
1073 btrfs_release_path(root
, path
);
1074 search_start
= block_group
->key
.objectid
+
1075 block_group
->key
.offset
;
1083 /* we have to make sure we didn't find an extent that has already
1084 * been allocated by the map tree or the original allocation
1086 btrfs_release_path(root
, path
);
1087 BUG_ON(ins
->objectid
< search_start
);
1089 if (ins
->objectid
+ num_bytes
>= search_end
)
1091 if (!full_scan
&& data
!= BTRFS_BLOCK_GROUP_MIXED
&&
1092 ins
->objectid
+ num_bytes
> block_group
->
1093 key
.objectid
+ block_group
->key
.offset
) {
1094 search_start
= block_group
->key
.objectid
+
1095 block_group
->key
.offset
;
1098 if (test_range_bit(&info
->extent_ins
, ins
->objectid
,
1099 ins
->objectid
+ num_bytes
-1, EXTENT_LOCKED
, 0)) {
1100 search_start
= ins
->objectid
+ num_bytes
;
1103 if (test_range_bit(&info
->pinned_extents
, ins
->objectid
,
1104 ins
->objectid
+ num_bytes
-1, EXTENT_DIRTY
, 0)) {
1105 search_start
= ins
->objectid
+ num_bytes
;
1108 if (exclude_nr
> 0 && (ins
->objectid
+ num_bytes
> exclude_start
&&
1109 ins
->objectid
< exclude_start
+ exclude_nr
)) {
1110 search_start
= exclude_start
+ exclude_nr
;
1114 block_group
= btrfs_lookup_block_group(info
, ins
->objectid
);
1116 trans
->block_group
= block_group
;
1118 ins
->offset
= num_bytes
;
1119 btrfs_free_path(path
);
1123 if (search_start
+ num_bytes
>= search_end
) {
1125 search_start
= orig_search_start
;
1132 total_needed
-= empty_size
;
1137 block_group
= btrfs_lookup_block_group(info
, search_start
);
1140 block_group
= btrfs_find_block_group(root
, block_group
,
1141 search_start
, data
, 0);
1145 btrfs_release_path(root
, path
);
1146 btrfs_free_path(path
);
1150 * finds a free extent and does all the dirty work required for allocation
1151 * returns the key for the extent through ins, and a tree buffer for
1152 * the first block of the extent through buf.
1154 * returns 0 if everything worked, non-zero otherwise.
1156 int btrfs_alloc_extent(struct btrfs_trans_handle
*trans
,
1157 struct btrfs_root
*root
, u64 owner
,
1158 u64 num_bytes
, u64 empty_size
, u64 hint_byte
,
1159 u64 search_end
, struct btrfs_key
*ins
, int data
)
1163 u64 super_used
, root_used
;
1164 u64 search_start
= 0;
1165 struct btrfs_fs_info
*info
= root
->fs_info
;
1166 struct btrfs_root
*extent_root
= info
->extent_root
;
1167 struct btrfs_extent_item extent_item
;
1169 btrfs_set_stack_extent_refs(&extent_item
, 1);
1170 btrfs_set_stack_extent_owner(&extent_item
, owner
);
1172 WARN_ON(num_bytes
< root
->sectorsize
);
1173 ret
= find_free_extent(trans
, root
, num_bytes
, empty_size
,
1174 search_start
, search_end
, hint_byte
, ins
,
1175 trans
->alloc_exclude_start
,
1176 trans
->alloc_exclude_nr
, data
);
1181 /* block accounting for super block */
1182 super_used
= btrfs_super_bytes_used(&info
->super_copy
);
1183 btrfs_set_super_bytes_used(&info
->super_copy
, super_used
+ num_bytes
);
1185 /* block accounting for root item */
1186 root_used
= btrfs_root_used(&root
->root_item
);
1187 btrfs_set_root_used(&root
->root_item
, root_used
+ num_bytes
);
1189 clear_extent_dirty(&root
->fs_info
->free_space_cache
,
1190 ins
->objectid
, ins
->objectid
+ ins
->offset
- 1,
1193 if (root
== extent_root
) {
1194 set_extent_bits(&root
->fs_info
->extent_ins
, ins
->objectid
,
1195 ins
->objectid
+ ins
->offset
- 1,
1196 EXTENT_LOCKED
, GFP_NOFS
);
1201 WARN_ON(trans
->alloc_exclude_nr
);
1202 trans
->alloc_exclude_start
= ins
->objectid
;
1203 trans
->alloc_exclude_nr
= ins
->offset
;
1204 ret
= btrfs_insert_item(trans
, extent_root
, ins
, &extent_item
,
1205 sizeof(extent_item
));
1207 trans
->alloc_exclude_start
= 0;
1208 trans
->alloc_exclude_nr
= 0;
1211 finish_current_insert(trans
, extent_root
);
1212 pending_ret
= del_pending_extents(trans
, extent_root
);
1222 ret
= update_block_group(trans
, root
, ins
->objectid
, ins
->offset
, 1, 0,
1229 * helper function to allocate a block for a given tree
1230 * returns the tree buffer or NULL.
1232 struct extent_buffer
*btrfs_alloc_free_block(struct btrfs_trans_handle
*trans
,
1233 struct btrfs_root
*root
,
1234 u32 blocksize
, u64 hint
,
1237 struct btrfs_key ins
;
1239 struct extent_buffer
*buf
;
1241 ret
= btrfs_alloc_extent(trans
, root
, root
->root_key
.objectid
,
1242 blocksize
, empty_size
, hint
,
1246 return ERR_PTR(ret
);
1248 buf
= btrfs_find_create_tree_block(root
, ins
.objectid
, blocksize
);
1250 btrfs_free_extent(trans
, root
, ins
.objectid
, blocksize
, 0);
1251 return ERR_PTR(-ENOMEM
);
1253 btrfs_set_buffer_uptodate(buf
);
1254 set_extent_dirty(&trans
->transaction
->dirty_pages
, buf
->start
,
1255 buf
->start
+ buf
->len
- 1, GFP_NOFS
);
1256 set_extent_bits(&BTRFS_I(root
->fs_info
->btree_inode
)->extent_tree
,
1257 buf
->start
, buf
->start
+ buf
->len
- 1,
1258 EXTENT_CSUM
, GFP_NOFS
);
1259 buf
->flags
|= EXTENT_CSUM
;
1260 btrfs_set_buffer_defrag(buf
);
1261 trans
->blocks_used
++;
1265 static int drop_leaf_ref(struct btrfs_trans_handle
*trans
,
1266 struct btrfs_root
*root
, struct extent_buffer
*leaf
)
1268 struct btrfs_key key
;
1269 struct btrfs_file_extent_item
*fi
;
1274 BUG_ON(!btrfs_is_leaf(leaf
));
1275 nritems
= btrfs_header_nritems(leaf
);
1276 for (i
= 0; i
< nritems
; i
++) {
1279 btrfs_item_key_to_cpu(leaf
, &key
, i
);
1280 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
1282 fi
= btrfs_item_ptr(leaf
, i
, struct btrfs_file_extent_item
);
1283 if (btrfs_file_extent_type(leaf
, fi
) ==
1284 BTRFS_FILE_EXTENT_INLINE
)
1287 * FIXME make sure to insert a trans record that
1288 * repeats the snapshot del on crash
1290 disk_bytenr
= btrfs_file_extent_disk_bytenr(leaf
, fi
);
1291 if (disk_bytenr
== 0)
1293 ret
= btrfs_free_extent(trans
, root
, disk_bytenr
,
1294 btrfs_file_extent_disk_num_bytes(leaf
, fi
), 0);
1300 static void reada_walk_down(struct btrfs_root
*root
,
1301 struct extent_buffer
*node
)
1311 nritems
= btrfs_header_nritems(node
);
1312 level
= btrfs_header_level(node
);
1313 for (i
= 0; i
< nritems
; i
++) {
1314 bytenr
= btrfs_node_blockptr(node
, i
);
1315 blocksize
= btrfs_level_size(root
, level
- 1);
1316 ret
= lookup_extent_ref(NULL
, root
, bytenr
, blocksize
, &refs
);
1320 mutex_unlock(&root
->fs_info
->fs_mutex
);
1321 ret
= readahead_tree_block(root
, bytenr
, blocksize
);
1323 mutex_lock(&root
->fs_info
->fs_mutex
);
1330 * helper function for drop_snapshot, this walks down the tree dropping ref
1331 * counts as it goes.
1333 static int walk_down_tree(struct btrfs_trans_handle
*trans
, struct btrfs_root
1334 *root
, struct btrfs_path
*path
, int *level
)
1336 struct extent_buffer
*next
;
1337 struct extent_buffer
*cur
;
1343 WARN_ON(*level
< 0);
1344 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
1345 ret
= lookup_extent_ref(trans
, root
,
1346 path
->nodes
[*level
]->start
,
1347 path
->nodes
[*level
]->len
, &refs
);
1353 * walk down to the last node level and free all the leaves
1355 while(*level
>= 0) {
1356 WARN_ON(*level
< 0);
1357 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
1358 cur
= path
->nodes
[*level
];
1360 if (*level
> 0 && path
->slots
[*level
] == 0)
1361 reada_walk_down(root
, cur
);
1363 if (btrfs_header_level(cur
) != *level
)
1366 if (path
->slots
[*level
] >=
1367 btrfs_header_nritems(cur
))
1370 ret
= drop_leaf_ref(trans
, root
, cur
);
1374 bytenr
= btrfs_node_blockptr(cur
, path
->slots
[*level
]);
1375 blocksize
= btrfs_level_size(root
, *level
- 1);
1376 ret
= lookup_extent_ref(trans
, root
, bytenr
, blocksize
, &refs
);
1379 path
->slots
[*level
]++;
1380 ret
= btrfs_free_extent(trans
, root
, bytenr
,
1385 next
= btrfs_find_tree_block(root
, bytenr
, blocksize
);
1386 if (!next
|| !btrfs_buffer_uptodate(next
)) {
1387 free_extent_buffer(next
);
1388 mutex_unlock(&root
->fs_info
->fs_mutex
);
1389 next
= read_tree_block(root
, bytenr
, blocksize
);
1390 mutex_lock(&root
->fs_info
->fs_mutex
);
1392 /* we dropped the lock, check one more time */
1393 ret
= lookup_extent_ref(trans
, root
, bytenr
,
1397 path
->slots
[*level
]++;
1398 free_extent_buffer(next
);
1399 ret
= btrfs_free_extent(trans
, root
,
1400 bytenr
, blocksize
, 1);
1405 WARN_ON(*level
<= 0);
1406 if (path
->nodes
[*level
-1])
1407 free_extent_buffer(path
->nodes
[*level
-1]);
1408 path
->nodes
[*level
-1] = next
;
1409 *level
= btrfs_header_level(next
);
1410 path
->slots
[*level
] = 0;
1413 WARN_ON(*level
< 0);
1414 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
1415 ret
= btrfs_free_extent(trans
, root
, path
->nodes
[*level
]->start
,
1416 path
->nodes
[*level
]->len
, 1);
1417 free_extent_buffer(path
->nodes
[*level
]);
1418 path
->nodes
[*level
] = NULL
;
1425 * helper for dropping snapshots. This walks back up the tree in the path
1426 * to find the first node higher up where we haven't yet gone through
1429 static int walk_up_tree(struct btrfs_trans_handle
*trans
, struct btrfs_root
1430 *root
, struct btrfs_path
*path
, int *level
)
1435 struct btrfs_root_item
*root_item
= &root
->root_item
;
1437 for(i
= *level
; i
< BTRFS_MAX_LEVEL
- 1 && path
->nodes
[i
]; i
++) {
1438 slot
= path
->slots
[i
];
1439 if (slot
< btrfs_header_nritems(path
->nodes
[i
]) - 1) {
1440 struct extent_buffer
*node
;
1441 struct btrfs_disk_key disk_key
;
1442 node
= path
->nodes
[i
];
1445 WARN_ON(*level
== 0);
1446 btrfs_node_key(node
, &disk_key
, path
->slots
[i
]);
1447 memcpy(&root_item
->drop_progress
,
1448 &disk_key
, sizeof(disk_key
));
1449 root_item
->drop_level
= i
;
1452 ret
= btrfs_free_extent(trans
, root
,
1453 path
->nodes
[*level
]->start
,
1454 path
->nodes
[*level
]->len
, 1);
1456 free_extent_buffer(path
->nodes
[*level
]);
1457 path
->nodes
[*level
] = NULL
;
1465 * drop the reference count on the tree rooted at 'snap'. This traverses
1466 * the tree freeing any blocks that have a ref count of zero after being
1469 int btrfs_drop_snapshot(struct btrfs_trans_handle
*trans
, struct btrfs_root
1475 struct btrfs_path
*path
;
1478 struct btrfs_root_item
*root_item
= &root
->root_item
;
1480 path
= btrfs_alloc_path();
1483 level
= btrfs_header_level(root
->node
);
1485 if (btrfs_disk_key_objectid(&root_item
->drop_progress
) == 0) {
1486 path
->nodes
[level
] = root
->node
;
1487 extent_buffer_get(root
->node
);
1488 path
->slots
[level
] = 0;
1490 struct btrfs_key key
;
1491 struct btrfs_disk_key found_key
;
1492 struct extent_buffer
*node
;
1494 btrfs_disk_key_to_cpu(&key
, &root_item
->drop_progress
);
1495 level
= root_item
->drop_level
;
1496 path
->lowest_level
= level
;
1497 wret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
1502 node
= path
->nodes
[level
];
1503 btrfs_node_key(node
, &found_key
, path
->slots
[level
]);
1504 WARN_ON(memcmp(&found_key
, &root_item
->drop_progress
,
1505 sizeof(found_key
)));
1508 wret
= walk_down_tree(trans
, root
, path
, &level
);
1514 wret
= walk_up_tree(trans
, root
, path
, &level
);
1522 for (i
= 0; i
<= orig_level
; i
++) {
1523 if (path
->nodes
[i
]) {
1524 free_extent_buffer(path
->nodes
[i
]);
1525 path
->nodes
[i
] = NULL
;
1529 btrfs_free_path(path
);
1533 int btrfs_free_block_groups(struct btrfs_fs_info
*info
)
1540 ret
= find_first_extent_bit(&info
->block_group_cache
, 0,
1541 &start
, &end
, (unsigned int)-1);
1544 ret
= get_state_private(&info
->block_group_cache
, start
, &ptr
);
1546 kfree((void *)(unsigned long)ptr
);
1547 clear_extent_bits(&info
->block_group_cache
, start
,
1548 end
, (unsigned int)-1, GFP_NOFS
);
1551 ret
= find_first_extent_bit(&info
->free_space_cache
, 0,
1552 &start
, &end
, EXTENT_DIRTY
);
1555 clear_extent_dirty(&info
->free_space_cache
, start
,
1561 int btrfs_read_block_groups(struct btrfs_root
*root
)
1563 struct btrfs_path
*path
;
1567 struct btrfs_block_group_cache
*cache
;
1568 struct btrfs_fs_info
*info
= root
->fs_info
;
1569 struct extent_map_tree
*block_group_cache
;
1570 struct btrfs_key key
;
1571 struct btrfs_key found_key
;
1572 struct extent_buffer
*leaf
;
1574 block_group_cache
= &info
->block_group_cache
;
1576 root
= info
->extent_root
;
1578 key
.offset
= BTRFS_BLOCK_GROUP_SIZE
;
1579 btrfs_set_key_type(&key
, BTRFS_BLOCK_GROUP_ITEM_KEY
);
1581 path
= btrfs_alloc_path();
1586 ret
= btrfs_search_slot(NULL
, info
->extent_root
,
1592 leaf
= path
->nodes
[0];
1593 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
1594 cache
= kmalloc(sizeof(*cache
), GFP_NOFS
);
1600 read_extent_buffer(leaf
, &cache
->item
,
1601 btrfs_item_ptr_offset(leaf
, path
->slots
[0]),
1602 sizeof(cache
->item
));
1603 memcpy(&cache
->key
, &found_key
, sizeof(found_key
));
1606 key
.objectid
= found_key
.objectid
+ found_key
.offset
;
1607 btrfs_release_path(root
, path
);
1609 if (cache
->item
.flags
& BTRFS_BLOCK_GROUP_MIXED
) {
1610 bit
= BLOCK_GROUP_DATA
| BLOCK_GROUP_METADATA
;
1611 cache
->data
= BTRFS_BLOCK_GROUP_MIXED
;
1612 } else if (cache
->item
.flags
& BTRFS_BLOCK_GROUP_DATA
) {
1613 bit
= BLOCK_GROUP_DATA
;
1614 cache
->data
= BTRFS_BLOCK_GROUP_DATA
;
1616 bit
= BLOCK_GROUP_METADATA
;
1620 /* use EXTENT_LOCKED to prevent merging */
1621 set_extent_bits(block_group_cache
, found_key
.objectid
,
1622 found_key
.objectid
+ found_key
.offset
- 1,
1623 bit
| EXTENT_LOCKED
, GFP_NOFS
);
1624 set_state_private(block_group_cache
, found_key
.objectid
,
1625 (unsigned long)cache
);
1628 btrfs_super_total_bytes(&info
->super_copy
))
1632 btrfs_free_path(path
);