2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <linux/sched.h>
22 #include "print-tree.h"
23 #include "transaction.h"
25 #define BLOCK_GROUP_DATA EXTENT_WRITEBACK
26 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
27 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
29 static int finish_current_insert(struct btrfs_trans_handle
*trans
, struct
30 btrfs_root
*extent_root
);
31 static int del_pending_extents(struct btrfs_trans_handle
*trans
, struct
32 btrfs_root
*extent_root
);
34 static int cache_block_group(struct btrfs_root
*root
,
35 struct btrfs_block_group_cache
*block_group
)
37 struct btrfs_path
*path
;
40 struct extent_buffer
*leaf
;
41 struct extent_map_tree
*free_space_cache
;
48 root
= root
->fs_info
->extent_root
;
49 free_space_cache
= &root
->fs_info
->free_space_cache
;
51 if (block_group
->cached
)
54 path
= btrfs_alloc_path();
59 first_free
= block_group
->key
.objectid
;
60 key
.objectid
= block_group
->key
.objectid
;
63 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
64 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
69 if (ret
&& path
->slots
[0] > 0)
73 leaf
= path
->nodes
[0];
74 slot
= path
->slots
[0];
75 if (slot
>= btrfs_header_nritems(leaf
)) {
76 ret
= btrfs_next_leaf(root
, path
);
86 btrfs_item_key_to_cpu(leaf
, &key
, slot
);
87 if (key
.objectid
< block_group
->key
.objectid
) {
88 if (key
.objectid
+ key
.offset
> first_free
)
89 first_free
= key
.objectid
+ key
.offset
;
93 if (key
.objectid
>= block_group
->key
.objectid
+
94 block_group
->key
.offset
) {
98 if (btrfs_key_type(&key
) == BTRFS_EXTENT_ITEM_KEY
) {
103 if (key
.objectid
> last
) {
104 hole_size
= key
.objectid
- last
;
105 set_extent_dirty(free_space_cache
, last
,
106 last
+ hole_size
- 1,
109 last
= key
.objectid
+ key
.offset
;
117 if (block_group
->key
.objectid
+
118 block_group
->key
.offset
> last
) {
119 hole_size
= block_group
->key
.objectid
+
120 block_group
->key
.offset
- last
;
121 set_extent_dirty(free_space_cache
, last
,
122 last
+ hole_size
- 1, GFP_NOFS
);
124 block_group
->cached
= 1;
126 btrfs_free_path(path
);
130 struct btrfs_block_group_cache
*btrfs_lookup_block_group(struct
134 struct extent_map_tree
*block_group_cache
;
135 struct btrfs_block_group_cache
*block_group
= NULL
;
141 block_group_cache
= &info
->block_group_cache
;
142 ret
= find_first_extent_bit(block_group_cache
,
143 bytenr
, &start
, &end
,
144 BLOCK_GROUP_DATA
| BLOCK_GROUP_METADATA
);
148 ret
= get_state_private(block_group_cache
, start
, &ptr
);
152 block_group
= (struct btrfs_block_group_cache
*)(unsigned long)ptr
;
155 if (block_group
->key
.objectid
<= bytenr
&& bytenr
<=
156 block_group
->key
.objectid
+ block_group
->key
.offset
)
162 static u64
find_search_start(struct btrfs_root
*root
,
163 struct btrfs_block_group_cache
**cache_ret
,
164 u64 search_start
, int num
, int data
)
167 struct btrfs_block_group_cache
*cache
= *cache_ret
;
175 ret
= cache_block_group(root
, cache
);
179 last
= max(search_start
, cache
->key
.objectid
);
182 ret
= find_first_extent_bit(&root
->fs_info
->free_space_cache
,
183 last
, &start
, &end
, EXTENT_DIRTY
);
190 start
= max(last
, start
);
192 if (last
- start
< num
) {
193 if (last
== cache
->key
.objectid
+ cache
->key
.offset
)
197 if (data
!= BTRFS_BLOCK_GROUP_MIXED
&&
198 start
+ num
>= cache
->key
.objectid
+ cache
->key
.offset
)
206 last
= cache
->key
.objectid
+ cache
->key
.offset
;
208 cache
= btrfs_lookup_block_group(root
->fs_info
, last
);
213 data
= BTRFS_BLOCK_GROUP_MIXED
;
218 if (cache_miss
&& !cache
->cached
) {
219 cache_block_group(root
, cache
);
222 cache
= btrfs_lookup_block_group(root
->fs_info
, last
);
224 cache
= btrfs_find_block_group(root
, cache
, last
, data
, 0);
230 static u64
div_factor(u64 num
, int factor
)
239 struct btrfs_block_group_cache
*btrfs_find_block_group(struct btrfs_root
*root
,
240 struct btrfs_block_group_cache
241 *hint
, u64 search_start
,
244 struct btrfs_block_group_cache
*cache
;
245 struct extent_map_tree
*block_group_cache
;
246 struct btrfs_block_group_cache
*found_group
= NULL
;
247 struct btrfs_fs_info
*info
= root
->fs_info
;
261 block_group_cache
= &info
->block_group_cache
;
266 if (data
== BTRFS_BLOCK_GROUP_MIXED
) {
267 bit
= BLOCK_GROUP_DATA
| BLOCK_GROUP_METADATA
;
270 bit
= BLOCK_GROUP_DATA
;
272 bit
= BLOCK_GROUP_METADATA
;
275 struct btrfs_block_group_cache
*shint
;
276 shint
= btrfs_lookup_block_group(info
, search_start
);
277 if (shint
&& (shint
->data
== data
||
278 shint
->data
== BTRFS_BLOCK_GROUP_MIXED
)) {
279 used
= btrfs_block_group_used(&shint
->item
);
280 if (used
+ shint
->pinned
<
281 div_factor(shint
->key
.offset
, factor
)) {
286 if (hint
&& (hint
->data
== data
||
287 hint
->data
== BTRFS_BLOCK_GROUP_MIXED
)) {
288 used
= btrfs_block_group_used(&hint
->item
);
289 if (used
+ hint
->pinned
<
290 div_factor(hint
->key
.offset
, factor
)) {
293 last
= hint
->key
.objectid
+ hint
->key
.offset
;
297 hint_last
= max(hint
->key
.objectid
, search_start
);
299 hint_last
= search_start
;
305 ret
= find_first_extent_bit(block_group_cache
, last
,
310 ret
= get_state_private(block_group_cache
, start
, &ptr
);
314 cache
= (struct btrfs_block_group_cache
*)(unsigned long)ptr
;
315 last
= cache
->key
.objectid
+ cache
->key
.offset
;
316 used
= btrfs_block_group_used(&cache
->item
);
319 free_check
= cache
->key
.offset
;
321 free_check
= div_factor(cache
->key
.offset
, factor
);
322 if (used
+ cache
->pinned
< free_check
) {
335 bit
= BLOCK_GROUP_DATA
| BLOCK_GROUP_METADATA
;
343 int btrfs_inc_extent_ref(struct btrfs_trans_handle
*trans
,
344 struct btrfs_root
*root
,
345 u64 bytenr
, u64 num_bytes
)
347 struct btrfs_path
*path
;
349 struct btrfs_key key
;
350 struct extent_buffer
*l
;
351 struct btrfs_extent_item
*item
;
354 WARN_ON(num_bytes
< root
->sectorsize
);
355 path
= btrfs_alloc_path();
359 key
.objectid
= bytenr
;
360 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
361 key
.offset
= num_bytes
;
362 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
, &key
, path
,
371 item
= btrfs_item_ptr(l
, path
->slots
[0], struct btrfs_extent_item
);
372 refs
= btrfs_extent_refs(l
, item
);
373 btrfs_set_extent_refs(l
, item
, refs
+ 1);
374 btrfs_mark_buffer_dirty(path
->nodes
[0]);
376 btrfs_release_path(root
->fs_info
->extent_root
, path
);
377 btrfs_free_path(path
);
378 finish_current_insert(trans
, root
->fs_info
->extent_root
);
379 del_pending_extents(trans
, root
->fs_info
->extent_root
);
383 int btrfs_extent_post_op(struct btrfs_trans_handle
*trans
,
384 struct btrfs_root
*root
)
386 finish_current_insert(trans
, root
->fs_info
->extent_root
);
387 del_pending_extents(trans
, root
->fs_info
->extent_root
);
391 static int lookup_extent_ref(struct btrfs_trans_handle
*trans
,
392 struct btrfs_root
*root
, u64 bytenr
,
393 u64 num_bytes
, u32
*refs
)
395 struct btrfs_path
*path
;
397 struct btrfs_key key
;
398 struct extent_buffer
*l
;
399 struct btrfs_extent_item
*item
;
401 WARN_ON(num_bytes
< root
->sectorsize
);
402 path
= btrfs_alloc_path();
403 key
.objectid
= bytenr
;
404 key
.offset
= num_bytes
;
405 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
406 ret
= btrfs_search_slot(trans
, root
->fs_info
->extent_root
, &key
, path
,
411 btrfs_print_leaf(root
, path
->nodes
[0]);
412 printk("failed to find block number %Lu\n", bytenr
);
416 item
= btrfs_item_ptr(l
, path
->slots
[0], struct btrfs_extent_item
);
417 *refs
= btrfs_extent_refs(l
, item
);
419 btrfs_free_path(path
);
423 int btrfs_inc_root_ref(struct btrfs_trans_handle
*trans
,
424 struct btrfs_root
*root
)
426 return btrfs_inc_extent_ref(trans
, root
, root
->node
->start
,
430 int btrfs_inc_ref(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
431 struct extent_buffer
*buf
)
435 struct btrfs_key key
;
436 struct btrfs_file_extent_item
*fi
;
446 level
= btrfs_header_level(buf
);
447 nritems
= btrfs_header_nritems(buf
);
448 for (i
= 0; i
< nritems
; i
++) {
451 btrfs_item_key_to_cpu(buf
, &key
, i
);
452 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
454 fi
= btrfs_item_ptr(buf
, i
,
455 struct btrfs_file_extent_item
);
456 if (btrfs_file_extent_type(buf
, fi
) ==
457 BTRFS_FILE_EXTENT_INLINE
)
459 disk_bytenr
= btrfs_file_extent_disk_bytenr(buf
, fi
);
460 if (disk_bytenr
== 0)
462 ret
= btrfs_inc_extent_ref(trans
, root
, disk_bytenr
,
463 btrfs_file_extent_disk_num_bytes(buf
, fi
));
469 bytenr
= btrfs_node_blockptr(buf
, i
);
470 ret
= btrfs_inc_extent_ref(trans
, root
, bytenr
,
471 btrfs_level_size(root
, level
- 1));
481 for (i
=0; i
< faili
; i
++) {
484 btrfs_item_key_to_cpu(buf
, &key
, i
);
485 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
487 fi
= btrfs_item_ptr(buf
, i
,
488 struct btrfs_file_extent_item
);
489 if (btrfs_file_extent_type(buf
, fi
) ==
490 BTRFS_FILE_EXTENT_INLINE
)
492 disk_bytenr
= btrfs_file_extent_disk_bytenr(buf
, fi
);
493 if (disk_bytenr
== 0)
495 err
= btrfs_free_extent(trans
, root
, disk_bytenr
,
496 btrfs_file_extent_disk_num_bytes(buf
,
500 bytenr
= btrfs_node_blockptr(buf
, i
);
501 err
= btrfs_free_extent(trans
, root
, bytenr
,
502 btrfs_level_size(root
, level
- 1), 0);
509 static int write_one_cache_group(struct btrfs_trans_handle
*trans
,
510 struct btrfs_root
*root
,
511 struct btrfs_path
*path
,
512 struct btrfs_block_group_cache
*cache
)
516 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
518 struct extent_buffer
*leaf
;
520 ret
= btrfs_search_slot(trans
, extent_root
, &cache
->key
, path
, 0, 1);
525 leaf
= path
->nodes
[0];
526 bi
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
527 write_extent_buffer(leaf
, &cache
->item
, bi
, sizeof(cache
->item
));
528 btrfs_mark_buffer_dirty(leaf
);
529 btrfs_release_path(extent_root
, path
);
531 finish_current_insert(trans
, extent_root
);
532 pending_ret
= del_pending_extents(trans
, extent_root
);
541 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle
*trans
,
542 struct btrfs_root
*root
)
544 struct extent_map_tree
*block_group_cache
;
545 struct btrfs_block_group_cache
*cache
;
549 struct btrfs_path
*path
;
555 block_group_cache
= &root
->fs_info
->block_group_cache
;
556 path
= btrfs_alloc_path();
561 ret
= find_first_extent_bit(block_group_cache
, last
,
562 &start
, &end
, BLOCK_GROUP_DIRTY
);
567 ret
= get_state_private(block_group_cache
, start
, &ptr
);
571 cache
= (struct btrfs_block_group_cache
*)(unsigned long)ptr
;
572 err
= write_one_cache_group(trans
, root
,
575 * if we fail to write the cache group, we want
576 * to keep it marked dirty in hopes that a later
583 clear_extent_bits(block_group_cache
, start
, end
,
584 BLOCK_GROUP_DIRTY
, GFP_NOFS
);
586 btrfs_free_path(path
);
590 static int update_block_group(struct btrfs_trans_handle
*trans
,
591 struct btrfs_root
*root
,
592 u64 bytenr
, u64 num_bytes
, int alloc
,
593 int mark_free
, int data
)
595 struct btrfs_block_group_cache
*cache
;
596 struct btrfs_fs_info
*info
= root
->fs_info
;
597 u64 total
= num_bytes
;
604 cache
= btrfs_lookup_block_group(info
, bytenr
);
608 byte_in_group
= bytenr
- cache
->key
.objectid
;
609 WARN_ON(byte_in_group
> cache
->key
.offset
);
610 start
= cache
->key
.objectid
;
611 end
= start
+ cache
->key
.offset
- 1;
612 set_extent_bits(&info
->block_group_cache
, start
, end
,
613 BLOCK_GROUP_DIRTY
, GFP_NOFS
);
615 old_val
= btrfs_block_group_used(&cache
->item
);
616 num_bytes
= min(total
, cache
->key
.offset
- byte_in_group
);
618 if (cache
->data
!= data
&&
619 old_val
< (cache
->key
.offset
>> 1)) {
624 bit_to_clear
= BLOCK_GROUP_METADATA
;
625 bit_to_set
= BLOCK_GROUP_DATA
;
627 ~BTRFS_BLOCK_GROUP_MIXED
;
629 BTRFS_BLOCK_GROUP_DATA
;
631 bit_to_clear
= BLOCK_GROUP_DATA
;
632 bit_to_set
= BLOCK_GROUP_METADATA
;
634 ~BTRFS_BLOCK_GROUP_MIXED
;
636 ~BTRFS_BLOCK_GROUP_DATA
;
638 clear_extent_bits(&info
->block_group_cache
,
639 start
, end
, bit_to_clear
,
641 set_extent_bits(&info
->block_group_cache
,
642 start
, end
, bit_to_set
,
644 } else if (cache
->data
!= data
&&
645 cache
->data
!= BTRFS_BLOCK_GROUP_MIXED
) {
646 cache
->data
= BTRFS_BLOCK_GROUP_MIXED
;
647 set_extent_bits(&info
->block_group_cache
,
650 BLOCK_GROUP_METADATA
,
653 old_val
+= num_bytes
;
655 old_val
-= num_bytes
;
657 set_extent_dirty(&info
->free_space_cache
,
658 bytenr
, bytenr
+ num_bytes
- 1,
662 btrfs_set_block_group_used(&cache
->item
, old_val
);
668 static int update_pinned_extents(struct btrfs_root
*root
,
669 u64 bytenr
, u64 num
, int pin
)
672 struct btrfs_block_group_cache
*cache
;
673 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
676 set_extent_dirty(&fs_info
->pinned_extents
,
677 bytenr
, bytenr
+ num
- 1, GFP_NOFS
);
679 clear_extent_dirty(&fs_info
->pinned_extents
,
680 bytenr
, bytenr
+ num
- 1, GFP_NOFS
);
683 cache
= btrfs_lookup_block_group(fs_info
, bytenr
);
685 len
= min(num
, cache
->key
.offset
-
686 (bytenr
- cache
->key
.objectid
));
688 cache
->pinned
+= len
;
689 fs_info
->total_pinned
+= len
;
691 cache
->pinned
-= len
;
692 fs_info
->total_pinned
-= len
;
700 int btrfs_copy_pinned(struct btrfs_root
*root
, struct extent_map_tree
*copy
)
705 struct extent_map_tree
*pinned_extents
= &root
->fs_info
->pinned_extents
;
709 ret
= find_first_extent_bit(pinned_extents
, last
,
710 &start
, &end
, EXTENT_DIRTY
);
713 set_extent_dirty(copy
, start
, end
, GFP_NOFS
);
719 int btrfs_finish_extent_commit(struct btrfs_trans_handle
*trans
,
720 struct btrfs_root
*root
,
721 struct extent_map_tree
*unpin
)
726 struct extent_map_tree
*free_space_cache
;
727 free_space_cache
= &root
->fs_info
->free_space_cache
;
730 ret
= find_first_extent_bit(unpin
, 0, &start
, &end
,
734 update_pinned_extents(root
, start
, end
+ 1 - start
, 0);
735 clear_extent_dirty(unpin
, start
, end
, GFP_NOFS
);
736 set_extent_dirty(free_space_cache
, start
, end
, GFP_NOFS
);
741 static int finish_current_insert(struct btrfs_trans_handle
*trans
, struct
742 btrfs_root
*extent_root
)
744 struct btrfs_key ins
;
745 struct btrfs_extent_item extent_item
;
750 struct btrfs_fs_info
*info
= extent_root
->fs_info
;
752 btrfs_set_stack_extent_refs(&extent_item
, 1);
753 btrfs_set_key_type(&ins
, BTRFS_EXTENT_ITEM_KEY
);
754 btrfs_set_stack_extent_owner(&extent_item
,
755 extent_root
->root_key
.objectid
);
758 ret
= find_first_extent_bit(&info
->extent_ins
, 0, &start
,
759 &end
, EXTENT_LOCKED
);
763 ins
.objectid
= start
;
764 ins
.offset
= end
+ 1 - start
;
765 err
= btrfs_insert_item(trans
, extent_root
, &ins
,
766 &extent_item
, sizeof(extent_item
));
767 clear_extent_bits(&info
->extent_ins
, start
, end
, EXTENT_LOCKED
,
773 static int pin_down_bytes(struct btrfs_root
*root
, u64 bytenr
, u32 num_bytes
,
777 struct extent_buffer
*buf
;
780 buf
= btrfs_find_tree_block(root
, bytenr
, num_bytes
);
782 if (btrfs_buffer_uptodate(buf
)) {
784 root
->fs_info
->running_transaction
->transid
;
785 if (btrfs_header_generation(buf
) == transid
) {
786 free_extent_buffer(buf
);
790 free_extent_buffer(buf
);
792 update_pinned_extents(root
, bytenr
, num_bytes
, 1);
794 set_extent_bits(&root
->fs_info
->pending_del
,
795 bytenr
, bytenr
+ num_bytes
- 1,
796 EXTENT_LOCKED
, GFP_NOFS
);
803 * remove an extent from the root, returns 0 on success
805 static int __free_extent(struct btrfs_trans_handle
*trans
, struct btrfs_root
806 *root
, u64 bytenr
, u64 num_bytes
, int pin
,
809 struct btrfs_path
*path
;
810 struct btrfs_key key
;
811 struct btrfs_fs_info
*info
= root
->fs_info
;
812 struct btrfs_root
*extent_root
= info
->extent_root
;
813 struct extent_buffer
*leaf
;
815 struct btrfs_extent_item
*ei
;
818 key
.objectid
= bytenr
;
819 btrfs_set_key_type(&key
, BTRFS_EXTENT_ITEM_KEY
);
820 key
.offset
= num_bytes
;
822 path
= btrfs_alloc_path();
826 ret
= btrfs_search_slot(trans
, extent_root
, &key
, path
, -1, 1);
831 leaf
= path
->nodes
[0];
832 ei
= btrfs_item_ptr(leaf
, path
->slots
[0],
833 struct btrfs_extent_item
);
834 refs
= btrfs_extent_refs(leaf
, ei
);
837 btrfs_set_extent_refs(leaf
, ei
, refs
);
838 btrfs_mark_buffer_dirty(leaf
);
845 ret
= pin_down_bytes(root
, bytenr
, num_bytes
, 0);
851 /* block accounting for super block */
852 super_used
= btrfs_super_bytes_used(&info
->super_copy
);
853 btrfs_set_super_bytes_used(&info
->super_copy
,
854 super_used
- num_bytes
);
856 /* block accounting for root item */
857 root_used
= btrfs_root_used(&root
->root_item
);
858 btrfs_set_root_used(&root
->root_item
,
859 root_used
- num_bytes
);
861 ret
= btrfs_del_item(trans
, extent_root
, path
);
865 ret
= update_block_group(trans
, root
, bytenr
, num_bytes
, 0,
869 btrfs_free_path(path
);
870 finish_current_insert(trans
, extent_root
);
875 * find all the blocks marked as pending in the radix tree and remove
876 * them from the extent map
878 static int del_pending_extents(struct btrfs_trans_handle
*trans
, struct
879 btrfs_root
*extent_root
)
885 struct extent_map_tree
*pending_del
;
886 struct extent_map_tree
*pinned_extents
;
888 pending_del
= &extent_root
->fs_info
->pending_del
;
889 pinned_extents
= &extent_root
->fs_info
->pinned_extents
;
892 ret
= find_first_extent_bit(pending_del
, 0, &start
, &end
,
896 update_pinned_extents(extent_root
, start
, end
+ 1 - start
, 1);
897 clear_extent_bits(pending_del
, start
, end
, EXTENT_LOCKED
,
899 ret
= __free_extent(trans
, extent_root
,
900 start
, end
+ 1 - start
, 0, 0);
908 * remove an extent from the root, returns 0 on success
910 int btrfs_free_extent(struct btrfs_trans_handle
*trans
, struct btrfs_root
911 *root
, u64 bytenr
, u64 num_bytes
, int pin
)
913 struct btrfs_root
*extent_root
= root
->fs_info
->extent_root
;
917 WARN_ON(num_bytes
< root
->sectorsize
);
918 if (root
== extent_root
) {
919 pin_down_bytes(root
, bytenr
, num_bytes
, 1);
922 ret
= __free_extent(trans
, root
, bytenr
, num_bytes
, pin
, pin
== 0);
923 pending_ret
= del_pending_extents(trans
, root
->fs_info
->extent_root
);
924 return ret
? ret
: pending_ret
;
928 * walks the btree of allocated extents and find a hole of a given size.
929 * The key ins is changed to record the hole:
930 * ins->objectid == block start
931 * ins->flags = BTRFS_EXTENT_ITEM_KEY
932 * ins->offset == number of blocks
933 * Any available blocks before search_start are skipped.
935 static int find_free_extent(struct btrfs_trans_handle
*trans
, struct btrfs_root
936 *orig_root
, u64 num_bytes
, u64 empty_size
,
937 u64 search_start
, u64 search_end
, u64 hint_byte
,
938 struct btrfs_key
*ins
, u64 exclude_start
,
939 u64 exclude_nr
, int data
)
941 struct btrfs_path
*path
;
942 struct btrfs_key key
;
947 u64 orig_search_start
= search_start
;
949 struct extent_buffer
*l
;
950 struct btrfs_root
* root
= orig_root
->fs_info
->extent_root
;
951 struct btrfs_fs_info
*info
= root
->fs_info
;
952 u64 total_needed
= num_bytes
;
954 struct btrfs_block_group_cache
*block_group
;
959 WARN_ON(num_bytes
< root
->sectorsize
);
960 btrfs_set_key_type(ins
, BTRFS_EXTENT_ITEM_KEY
);
962 level
= btrfs_header_level(root
->node
);
964 if (num_bytes
>= 96 * 1024 * 1024 && hint_byte
) {
965 data
= BTRFS_BLOCK_GROUP_MIXED
;
968 if (search_end
== (u64
)-1)
969 search_end
= btrfs_super_total_bytes(&info
->super_copy
);
971 block_group
= btrfs_lookup_block_group(info
, hint_byte
);
972 block_group
= btrfs_find_block_group(root
, block_group
,
975 block_group
= btrfs_find_block_group(root
,
976 trans
->block_group
, 0,
980 total_needed
+= empty_size
;
981 path
= btrfs_alloc_path();
984 search_start
= find_search_start(root
, &block_group
,
985 search_start
, total_needed
, data
);
986 cached_start
= search_start
;
988 btrfs_init_path(path
);
989 ins
->objectid
= search_start
;
994 ret
= btrfs_search_slot(trans
, root
, ins
, path
, 0, 0);
998 if (path
->slots
[0] > 0) {
1003 btrfs_item_key_to_cpu(l
, &key
, path
->slots
[0]);
1006 * a rare case, go back one key if we hit a block group item
1007 * instead of an extent item
1009 if (btrfs_key_type(&key
) != BTRFS_EXTENT_ITEM_KEY
&&
1010 key
.objectid
+ key
.offset
>= search_start
) {
1011 ins
->objectid
= key
.objectid
;
1012 ins
->offset
= key
.offset
- 1;
1013 btrfs_release_path(root
, path
);
1014 ret
= btrfs_search_slot(trans
, root
, ins
, path
, 0, 0);
1018 if (path
->slots
[0] > 0) {
1025 slot
= path
->slots
[0];
1026 if (slot
>= btrfs_header_nritems(l
)) {
1027 ret
= btrfs_next_leaf(root
, path
);
1033 search_start
= max(search_start
,
1034 block_group
->key
.objectid
);
1036 ins
->objectid
= search_start
;
1037 ins
->offset
= search_end
- search_start
;
1041 ins
->objectid
= last_byte
> search_start
?
1042 last_byte
: search_start
;
1043 ins
->offset
= search_end
- ins
->objectid
;
1044 BUG_ON(ins
->objectid
>= search_end
);
1047 btrfs_item_key_to_cpu(l
, &key
, slot
);
1049 if (key
.objectid
>= search_start
&& key
.objectid
> last_byte
&&
1051 if (last_byte
< search_start
)
1052 last_byte
= search_start
;
1053 hole_size
= key
.objectid
- last_byte
;
1054 if (hole_size
>= num_bytes
) {
1055 ins
->objectid
= last_byte
;
1056 ins
->offset
= hole_size
;
1060 if (btrfs_key_type(&key
) != BTRFS_EXTENT_ITEM_KEY
) {
1062 last_byte
= key
.objectid
;
1070 last_byte
= key
.objectid
+ key
.offset
;
1072 if (!full_scan
&& data
!= BTRFS_BLOCK_GROUP_MIXED
&&
1073 last_byte
>= block_group
->key
.objectid
+
1074 block_group
->key
.offset
) {
1075 btrfs_release_path(root
, path
);
1076 search_start
= block_group
->key
.objectid
+
1077 block_group
->key
.offset
;
1085 /* we have to make sure we didn't find an extent that has already
1086 * been allocated by the map tree or the original allocation
1088 btrfs_release_path(root
, path
);
1089 BUG_ON(ins
->objectid
< search_start
);
1091 if (ins
->objectid
+ num_bytes
>= search_end
)
1094 if (!full_scan
&& data
!= BTRFS_BLOCK_GROUP_MIXED
&&
1095 ins
->objectid
+ num_bytes
>= block_group
->
1096 key
.objectid
+ block_group
->key
.offset
) {
1097 search_start
= block_group
->key
.objectid
+
1098 block_group
->key
.offset
;
1101 if (test_range_bit(&info
->extent_ins
, ins
->objectid
,
1102 ins
->objectid
+ num_bytes
-1, EXTENT_LOCKED
, 0)) {
1103 search_start
= ins
->objectid
+ num_bytes
;
1106 if (test_range_bit(&info
->pinned_extents
, ins
->objectid
,
1107 ins
->objectid
+ num_bytes
-1, EXTENT_DIRTY
, 0)) {
1108 search_start
= ins
->objectid
+ num_bytes
;
1111 if (exclude_nr
> 0 && (ins
->objectid
+ num_bytes
> exclude_start
&&
1112 ins
->objectid
< exclude_start
+ exclude_nr
)) {
1113 search_start
= exclude_start
+ exclude_nr
;
1117 block_group
= btrfs_lookup_block_group(info
, ins
->objectid
);
1119 trans
->block_group
= block_group
;
1121 ins
->offset
= num_bytes
;
1122 btrfs_free_path(path
);
1126 if (search_start
+ num_bytes
>= search_end
) {
1128 search_start
= orig_search_start
;
1135 total_needed
-= empty_size
;
1140 block_group
= btrfs_lookup_block_group(info
, search_start
);
1143 block_group
= btrfs_find_block_group(root
, block_group
,
1144 search_start
, data
, 0);
1148 btrfs_release_path(root
, path
);
1149 btrfs_free_path(path
);
1153 * finds a free extent and does all the dirty work required for allocation
1154 * returns the key for the extent through ins, and a tree buffer for
1155 * the first block of the extent through buf.
1157 * returns 0 if everything worked, non-zero otherwise.
1159 int btrfs_alloc_extent(struct btrfs_trans_handle
*trans
,
1160 struct btrfs_root
*root
, u64 owner
,
1161 u64 num_bytes
, u64 empty_size
, u64 hint_byte
,
1162 u64 search_end
, struct btrfs_key
*ins
, int data
)
1166 u64 super_used
, root_used
;
1167 u64 search_start
= 0;
1168 struct btrfs_fs_info
*info
= root
->fs_info
;
1169 struct btrfs_root
*extent_root
= info
->extent_root
;
1170 struct btrfs_extent_item extent_item
;
1172 btrfs_set_stack_extent_refs(&extent_item
, 1);
1173 btrfs_set_stack_extent_owner(&extent_item
, owner
);
1175 WARN_ON(num_bytes
< root
->sectorsize
);
1176 ret
= find_free_extent(trans
, root
, num_bytes
, empty_size
,
1177 search_start
, search_end
, hint_byte
, ins
,
1178 trans
->alloc_exclude_start
,
1179 trans
->alloc_exclude_nr
, data
);
1184 /* block accounting for super block */
1185 super_used
= btrfs_super_bytes_used(&info
->super_copy
);
1186 btrfs_set_super_bytes_used(&info
->super_copy
, super_used
+ num_bytes
);
1188 /* block accounting for root item */
1189 root_used
= btrfs_root_used(&root
->root_item
);
1190 btrfs_set_root_used(&root
->root_item
, root_used
+ num_bytes
);
1192 clear_extent_dirty(&root
->fs_info
->free_space_cache
,
1193 ins
->objectid
, ins
->objectid
+ ins
->offset
- 1,
1196 if (root
== extent_root
) {
1197 set_extent_bits(&root
->fs_info
->extent_ins
, ins
->objectid
,
1198 ins
->objectid
+ ins
->offset
- 1,
1199 EXTENT_LOCKED
, GFP_NOFS
);
1204 WARN_ON(trans
->alloc_exclude_nr
);
1205 trans
->alloc_exclude_start
= ins
->objectid
;
1206 trans
->alloc_exclude_nr
= ins
->offset
;
1207 ret
= btrfs_insert_item(trans
, extent_root
, ins
, &extent_item
,
1208 sizeof(extent_item
));
1210 trans
->alloc_exclude_start
= 0;
1211 trans
->alloc_exclude_nr
= 0;
1214 finish_current_insert(trans
, extent_root
);
1215 pending_ret
= del_pending_extents(trans
, extent_root
);
1225 ret
= update_block_group(trans
, root
, ins
->objectid
, ins
->offset
, 1, 0,
1232 * helper function to allocate a block for a given tree
1233 * returns the tree buffer or NULL.
1235 struct extent_buffer
*btrfs_alloc_free_block(struct btrfs_trans_handle
*trans
,
1236 struct btrfs_root
*root
,
1237 u32 blocksize
, u64 hint
,
1240 struct btrfs_key ins
;
1242 struct extent_buffer
*buf
;
1244 ret
= btrfs_alloc_extent(trans
, root
, root
->root_key
.objectid
,
1245 blocksize
, empty_size
, hint
,
1249 return ERR_PTR(ret
);
1251 buf
= btrfs_find_create_tree_block(root
, ins
.objectid
, blocksize
);
1253 btrfs_free_extent(trans
, root
, ins
.objectid
, blocksize
, 0);
1254 return ERR_PTR(-ENOMEM
);
1256 btrfs_set_buffer_uptodate(buf
);
1257 set_extent_dirty(&trans
->transaction
->dirty_pages
, buf
->start
,
1258 buf
->start
+ buf
->len
- 1, GFP_NOFS
);
1259 set_extent_bits(&BTRFS_I(root
->fs_info
->btree_inode
)->extent_tree
,
1260 buf
->start
, buf
->start
+ buf
->len
- 1,
1261 EXTENT_CSUM
, GFP_NOFS
);
1262 buf
->flags
|= EXTENT_CSUM
;
1263 btrfs_set_buffer_defrag(buf
);
1264 trans
->blocks_used
++;
1268 static int drop_leaf_ref(struct btrfs_trans_handle
*trans
,
1269 struct btrfs_root
*root
, struct extent_buffer
*leaf
)
1271 struct btrfs_key key
;
1272 struct btrfs_file_extent_item
*fi
;
1277 BUG_ON(!btrfs_is_leaf(leaf
));
1278 nritems
= btrfs_header_nritems(leaf
);
1279 for (i
= 0; i
< nritems
; i
++) {
1282 btrfs_item_key_to_cpu(leaf
, &key
, i
);
1283 if (btrfs_key_type(&key
) != BTRFS_EXTENT_DATA_KEY
)
1285 fi
= btrfs_item_ptr(leaf
, i
, struct btrfs_file_extent_item
);
1286 if (btrfs_file_extent_type(leaf
, fi
) ==
1287 BTRFS_FILE_EXTENT_INLINE
)
1290 * FIXME make sure to insert a trans record that
1291 * repeats the snapshot del on crash
1293 disk_bytenr
= btrfs_file_extent_disk_bytenr(leaf
, fi
);
1294 if (disk_bytenr
== 0)
1296 ret
= btrfs_free_extent(trans
, root
, disk_bytenr
,
1297 btrfs_file_extent_disk_num_bytes(leaf
, fi
), 0);
1303 static void reada_walk_down(struct btrfs_root
*root
,
1304 struct extent_buffer
*node
)
1314 nritems
= btrfs_header_nritems(node
);
1315 level
= btrfs_header_level(node
);
1316 for (i
= 0; i
< nritems
; i
++) {
1317 bytenr
= btrfs_node_blockptr(node
, i
);
1318 blocksize
= btrfs_level_size(root
, level
- 1);
1319 ret
= lookup_extent_ref(NULL
, root
, bytenr
, blocksize
, &refs
);
1323 mutex_unlock(&root
->fs_info
->fs_mutex
);
1324 ret
= readahead_tree_block(root
, bytenr
, blocksize
);
1326 mutex_lock(&root
->fs_info
->fs_mutex
);
1333 * helper function for drop_snapshot, this walks down the tree dropping ref
1334 * counts as it goes.
1336 static int walk_down_tree(struct btrfs_trans_handle
*trans
, struct btrfs_root
1337 *root
, struct btrfs_path
*path
, int *level
)
1339 struct extent_buffer
*next
;
1340 struct extent_buffer
*cur
;
1346 WARN_ON(*level
< 0);
1347 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
1348 ret
= lookup_extent_ref(trans
, root
,
1349 path
->nodes
[*level
]->start
,
1350 path
->nodes
[*level
]->len
, &refs
);
1356 * walk down to the last node level and free all the leaves
1358 while(*level
>= 0) {
1359 WARN_ON(*level
< 0);
1360 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
1361 cur
= path
->nodes
[*level
];
1363 if (*level
> 0 && path
->slots
[*level
] == 0)
1364 reada_walk_down(root
, cur
);
1366 if (btrfs_header_level(cur
) != *level
)
1369 if (path
->slots
[*level
] >=
1370 btrfs_header_nritems(cur
))
1373 ret
= drop_leaf_ref(trans
, root
, cur
);
1377 bytenr
= btrfs_node_blockptr(cur
, path
->slots
[*level
]);
1378 blocksize
= btrfs_level_size(root
, *level
- 1);
1379 ret
= lookup_extent_ref(trans
, root
, bytenr
, blocksize
, &refs
);
1382 path
->slots
[*level
]++;
1383 ret
= btrfs_free_extent(trans
, root
, bytenr
,
1388 next
= btrfs_find_tree_block(root
, bytenr
, blocksize
);
1389 if (!next
|| !btrfs_buffer_uptodate(next
)) {
1390 free_extent_buffer(next
);
1391 mutex_unlock(&root
->fs_info
->fs_mutex
);
1392 next
= read_tree_block(root
, bytenr
, blocksize
);
1393 mutex_lock(&root
->fs_info
->fs_mutex
);
1395 /* we dropped the lock, check one more time */
1396 ret
= lookup_extent_ref(trans
, root
, bytenr
,
1400 path
->slots
[*level
]++;
1401 free_extent_buffer(next
);
1402 ret
= btrfs_free_extent(trans
, root
,
1403 bytenr
, blocksize
, 1);
1408 WARN_ON(*level
<= 0);
1409 if (path
->nodes
[*level
-1])
1410 free_extent_buffer(path
->nodes
[*level
-1]);
1411 path
->nodes
[*level
-1] = next
;
1412 *level
= btrfs_header_level(next
);
1413 path
->slots
[*level
] = 0;
1416 WARN_ON(*level
< 0);
1417 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
1418 ret
= btrfs_free_extent(trans
, root
, path
->nodes
[*level
]->start
,
1419 path
->nodes
[*level
]->len
, 1);
1420 free_extent_buffer(path
->nodes
[*level
]);
1421 path
->nodes
[*level
] = NULL
;
1428 * helper for dropping snapshots. This walks back up the tree in the path
1429 * to find the first node higher up where we haven't yet gone through
1432 static int walk_up_tree(struct btrfs_trans_handle
*trans
, struct btrfs_root
1433 *root
, struct btrfs_path
*path
, int *level
)
1438 struct btrfs_root_item
*root_item
= &root
->root_item
;
1440 for(i
= *level
; i
< BTRFS_MAX_LEVEL
- 1 && path
->nodes
[i
]; i
++) {
1441 slot
= path
->slots
[i
];
1442 if (slot
< btrfs_header_nritems(path
->nodes
[i
]) - 1) {
1443 struct extent_buffer
*node
;
1444 struct btrfs_disk_key disk_key
;
1445 node
= path
->nodes
[i
];
1448 WARN_ON(*level
== 0);
1449 btrfs_node_key(node
, &disk_key
, path
->slots
[i
]);
1450 memcpy(&root_item
->drop_progress
,
1451 &disk_key
, sizeof(disk_key
));
1452 root_item
->drop_level
= i
;
1455 ret
= btrfs_free_extent(trans
, root
,
1456 path
->nodes
[*level
]->start
,
1457 path
->nodes
[*level
]->len
, 1);
1459 free_extent_buffer(path
->nodes
[*level
]);
1460 path
->nodes
[*level
] = NULL
;
1468 * drop the reference count on the tree rooted at 'snap'. This traverses
1469 * the tree freeing any blocks that have a ref count of zero after being
1472 int btrfs_drop_snapshot(struct btrfs_trans_handle
*trans
, struct btrfs_root
1478 struct btrfs_path
*path
;
1481 struct btrfs_root_item
*root_item
= &root
->root_item
;
1483 path
= btrfs_alloc_path();
1486 level
= btrfs_header_level(root
->node
);
1488 if (btrfs_disk_key_objectid(&root_item
->drop_progress
) == 0) {
1489 path
->nodes
[level
] = root
->node
;
1490 extent_buffer_get(root
->node
);
1491 path
->slots
[level
] = 0;
1493 struct btrfs_key key
;
1494 struct btrfs_disk_key found_key
;
1495 struct extent_buffer
*node
;
1497 btrfs_disk_key_to_cpu(&key
, &root_item
->drop_progress
);
1498 level
= root_item
->drop_level
;
1499 path
->lowest_level
= level
;
1500 wret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
1505 node
= path
->nodes
[level
];
1506 btrfs_node_key(node
, &found_key
, path
->slots
[level
]);
1507 WARN_ON(memcmp(&found_key
, &root_item
->drop_progress
,
1508 sizeof(found_key
)));
1511 wret
= walk_down_tree(trans
, root
, path
, &level
);
1517 wret
= walk_up_tree(trans
, root
, path
, &level
);
1525 for (i
= 0; i
<= orig_level
; i
++) {
1526 if (path
->nodes
[i
]) {
1527 free_extent_buffer(path
->nodes
[i
]);
1528 path
->nodes
[i
] = NULL
;
1532 btrfs_free_path(path
);
1536 int btrfs_free_block_groups(struct btrfs_fs_info
*info
)
1543 ret
= find_first_extent_bit(&info
->block_group_cache
, 0,
1544 &start
, &end
, (unsigned int)-1);
1547 ret
= get_state_private(&info
->block_group_cache
, start
, &ptr
);
1549 kfree((void *)(unsigned long)ptr
);
1550 clear_extent_bits(&info
->block_group_cache
, start
,
1551 end
, (unsigned int)-1, GFP_NOFS
);
1554 ret
= find_first_extent_bit(&info
->free_space_cache
, 0,
1555 &start
, &end
, EXTENT_DIRTY
);
1558 clear_extent_dirty(&info
->free_space_cache
, start
,
1564 int btrfs_read_block_groups(struct btrfs_root
*root
)
1566 struct btrfs_path
*path
;
1570 struct btrfs_block_group_cache
*cache
;
1571 struct btrfs_fs_info
*info
= root
->fs_info
;
1572 struct extent_map_tree
*block_group_cache
;
1573 struct btrfs_key key
;
1574 struct btrfs_key found_key
;
1575 struct extent_buffer
*leaf
;
1577 block_group_cache
= &info
->block_group_cache
;
1579 root
= info
->extent_root
;
1581 key
.offset
= BTRFS_BLOCK_GROUP_SIZE
;
1582 btrfs_set_key_type(&key
, BTRFS_BLOCK_GROUP_ITEM_KEY
);
1584 path
= btrfs_alloc_path();
1589 ret
= btrfs_search_slot(NULL
, info
->extent_root
,
1595 leaf
= path
->nodes
[0];
1596 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
1597 cache
= kmalloc(sizeof(*cache
), GFP_NOFS
);
1603 read_extent_buffer(leaf
, &cache
->item
,
1604 btrfs_item_ptr_offset(leaf
, path
->slots
[0]),
1605 sizeof(cache
->item
));
1606 memcpy(&cache
->key
, &found_key
, sizeof(found_key
));
1609 key
.objectid
= found_key
.objectid
+ found_key
.offset
;
1610 btrfs_release_path(root
, path
);
1612 if (cache
->item
.flags
& BTRFS_BLOCK_GROUP_MIXED
) {
1613 bit
= BLOCK_GROUP_DATA
| BLOCK_GROUP_METADATA
;
1614 cache
->data
= BTRFS_BLOCK_GROUP_MIXED
;
1615 } else if (cache
->item
.flags
& BTRFS_BLOCK_GROUP_DATA
) {
1616 bit
= BLOCK_GROUP_DATA
;
1617 cache
->data
= BTRFS_BLOCK_GROUP_DATA
;
1619 bit
= BLOCK_GROUP_METADATA
;
1623 /* use EXTENT_LOCKED to prevent merging */
1624 set_extent_bits(block_group_cache
, found_key
.objectid
,
1625 found_key
.objectid
+ found_key
.offset
- 1,
1626 bit
| EXTENT_LOCKED
, GFP_NOFS
);
1627 set_state_private(block_group_cache
, found_key
.objectid
,
1628 (unsigned long)cache
);
1631 btrfs_super_total_bytes(&info
->super_copy
))
1635 btrfs_free_path(path
);