2 * f2fs extent cache support
4 * Copyright (c) 2015 Motorola Mobility
5 * Copyright (c) 2015 Samsung Electronics
6 * Authors: Jaegeuk Kim <jaegeuk@kernel.org>
7 * Chao Yu <chao2.yu@samsung.com>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
15 #include <linux/f2fs_fs.h>
19 #include <trace/events/f2fs.h>
21 static struct kmem_cache
*extent_tree_slab
;
22 static struct kmem_cache
*extent_node_slab
;
24 static struct extent_node
*__attach_extent_node(struct f2fs_sb_info
*sbi
,
25 struct extent_tree
*et
, struct extent_info
*ei
,
26 struct rb_node
*parent
, struct rb_node
**p
)
28 struct extent_node
*en
;
30 en
= kmem_cache_alloc(extent_node_slab
, GFP_ATOMIC
);
35 INIT_LIST_HEAD(&en
->list
);
37 rb_link_node(&en
->rb_node
, parent
, p
);
38 rb_insert_color(&en
->rb_node
, &et
->root
);
40 atomic_inc(&sbi
->total_ext_node
);
44 static void __detach_extent_node(struct f2fs_sb_info
*sbi
,
45 struct extent_tree
*et
, struct extent_node
*en
)
47 rb_erase(&en
->rb_node
, &et
->root
);
49 atomic_dec(&sbi
->total_ext_node
);
51 if (et
->cached_en
== en
)
55 static struct extent_tree
*__grab_extent_tree(struct inode
*inode
)
57 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
58 struct extent_tree
*et
;
59 nid_t ino
= inode
->i_ino
;
61 down_write(&sbi
->extent_tree_lock
);
62 et
= radix_tree_lookup(&sbi
->extent_tree_root
, ino
);
64 et
= f2fs_kmem_cache_alloc(extent_tree_slab
, GFP_NOFS
);
65 f2fs_radix_tree_insert(&sbi
->extent_tree_root
, ino
, et
);
66 memset(et
, 0, sizeof(struct extent_tree
));
70 rwlock_init(&et
->lock
);
71 atomic_set(&et
->refcount
, 0);
73 sbi
->total_ext_tree
++;
75 atomic_inc(&et
->refcount
);
76 up_write(&sbi
->extent_tree_lock
);
78 /* never died until evict_inode */
79 F2FS_I(inode
)->extent_tree
= et
;
84 static struct extent_node
*__lookup_extent_tree(struct f2fs_sb_info
*sbi
,
85 struct extent_tree
*et
, unsigned int fofs
)
87 struct rb_node
*node
= et
->root
.rb_node
;
88 struct extent_node
*en
= et
->cached_en
;
91 struct extent_info
*cei
= &en
->ei
;
93 if (cei
->fofs
<= fofs
&& cei
->fofs
+ cei
->len
> fofs
) {
94 stat_inc_cached_node_hit(sbi
);
100 en
= rb_entry(node
, struct extent_node
, rb_node
);
102 if (fofs
< en
->ei
.fofs
) {
103 node
= node
->rb_left
;
104 } else if (fofs
>= en
->ei
.fofs
+ en
->ei
.len
) {
105 node
= node
->rb_right
;
107 stat_inc_rbtree_node_hit(sbi
);
114 static struct extent_node
*__init_extent_tree(struct f2fs_sb_info
*sbi
,
115 struct extent_tree
*et
, struct extent_info
*ei
)
117 struct rb_node
**p
= &et
->root
.rb_node
;
118 struct extent_node
*en
;
120 en
= __attach_extent_node(sbi
, et
, ei
, NULL
, p
);
124 et
->largest
= en
->ei
;
129 static unsigned int __free_extent_tree(struct f2fs_sb_info
*sbi
,
130 struct extent_tree
*et
, bool free_all
)
132 struct rb_node
*node
, *next
;
133 struct extent_node
*en
;
134 unsigned int count
= et
->count
;
136 node
= rb_first(&et
->root
);
138 next
= rb_next(node
);
139 en
= rb_entry(node
, struct extent_node
, rb_node
);
142 spin_lock(&sbi
->extent_lock
);
143 if (!list_empty(&en
->list
))
144 list_del_init(&en
->list
);
145 spin_unlock(&sbi
->extent_lock
);
148 if (free_all
|| list_empty(&en
->list
)) {
149 __detach_extent_node(sbi
, et
, en
);
150 kmem_cache_free(extent_node_slab
, en
);
155 return count
- et
->count
;
158 static void __drop_largest_extent(struct inode
*inode
, pgoff_t fofs
)
160 struct extent_info
*largest
= &F2FS_I(inode
)->extent_tree
->largest
;
162 if (largest
->fofs
<= fofs
&& largest
->fofs
+ largest
->len
> fofs
)
166 void f2fs_drop_largest_extent(struct inode
*inode
, pgoff_t fofs
)
168 if (!f2fs_may_extent_tree(inode
))
171 __drop_largest_extent(inode
, fofs
);
174 void f2fs_init_extent_tree(struct inode
*inode
, struct f2fs_extent
*i_ext
)
176 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
177 struct extent_tree
*et
;
178 struct extent_node
*en
;
179 struct extent_info ei
;
181 if (!f2fs_may_extent_tree(inode
))
184 et
= __grab_extent_tree(inode
);
186 if (!i_ext
|| le32_to_cpu(i_ext
->len
) < F2FS_MIN_EXTENT_LEN
)
189 set_extent_info(&ei
, le32_to_cpu(i_ext
->fofs
),
190 le32_to_cpu(i_ext
->blk
), le32_to_cpu(i_ext
->len
));
192 write_lock(&et
->lock
);
196 en
= __init_extent_tree(sbi
, et
, &ei
);
198 spin_lock(&sbi
->extent_lock
);
199 list_add_tail(&en
->list
, &sbi
->extent_list
);
200 spin_unlock(&sbi
->extent_lock
);
203 write_unlock(&et
->lock
);
206 static bool f2fs_lookup_extent_tree(struct inode
*inode
, pgoff_t pgofs
,
207 struct extent_info
*ei
)
209 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
210 struct extent_tree
*et
= F2FS_I(inode
)->extent_tree
;
211 struct extent_node
*en
;
214 f2fs_bug_on(sbi
, !et
);
216 trace_f2fs_lookup_extent_tree_start(inode
, pgofs
);
218 read_lock(&et
->lock
);
220 if (et
->largest
.fofs
<= pgofs
&&
221 et
->largest
.fofs
+ et
->largest
.len
> pgofs
) {
224 stat_inc_largest_node_hit(sbi
);
228 en
= __lookup_extent_tree(sbi
, et
, pgofs
);
231 spin_lock(&sbi
->extent_lock
);
232 if (!list_empty(&en
->list
))
233 list_move_tail(&en
->list
, &sbi
->extent_list
);
235 spin_unlock(&sbi
->extent_lock
);
239 stat_inc_total_hit(sbi
);
240 read_unlock(&et
->lock
);
242 trace_f2fs_lookup_extent_tree_end(inode
, pgofs
, ei
);
248 * lookup extent at @fofs, if hit, return the extent
249 * if not, return NULL and
250 * @prev_ex: extent before fofs
251 * @next_ex: extent after fofs
252 * @insert_p: insert point for new extent at fofs
253 * in order to simpfy the insertion after.
254 * tree must stay unchanged between lookup and insertion.
256 static struct extent_node
*__lookup_extent_tree_ret(struct extent_tree
*et
,
258 struct extent_node
**prev_ex
,
259 struct extent_node
**next_ex
,
260 struct rb_node
***insert_p
,
261 struct rb_node
**insert_parent
)
263 struct rb_node
**pnode
= &et
->root
.rb_node
;
264 struct rb_node
*parent
= NULL
, *tmp_node
;
265 struct extent_node
*en
= et
->cached_en
;
268 *insert_parent
= NULL
;
272 if (RB_EMPTY_ROOT(&et
->root
))
276 struct extent_info
*cei
= &en
->ei
;
278 if (cei
->fofs
<= fofs
&& cei
->fofs
+ cei
->len
> fofs
)
279 goto lookup_neighbors
;
284 en
= rb_entry(*pnode
, struct extent_node
, rb_node
);
286 if (fofs
< en
->ei
.fofs
)
287 pnode
= &(*pnode
)->rb_left
;
288 else if (fofs
>= en
->ei
.fofs
+ en
->ei
.len
)
289 pnode
= &(*pnode
)->rb_right
;
291 goto lookup_neighbors
;
295 *insert_parent
= parent
;
297 en
= rb_entry(parent
, struct extent_node
, rb_node
);
299 if (parent
&& fofs
> en
->ei
.fofs
)
300 tmp_node
= rb_next(parent
);
301 *next_ex
= tmp_node
?
302 rb_entry(tmp_node
, struct extent_node
, rb_node
) : NULL
;
305 if (parent
&& fofs
< en
->ei
.fofs
)
306 tmp_node
= rb_prev(parent
);
307 *prev_ex
= tmp_node
?
308 rb_entry(tmp_node
, struct extent_node
, rb_node
) : NULL
;
312 if (fofs
== en
->ei
.fofs
) {
313 /* lookup prev node for merging backward later */
314 tmp_node
= rb_prev(&en
->rb_node
);
315 *prev_ex
= tmp_node
?
316 rb_entry(tmp_node
, struct extent_node
, rb_node
) : NULL
;
318 if (fofs
== en
->ei
.fofs
+ en
->ei
.len
- 1) {
319 /* lookup next node for merging frontward later */
320 tmp_node
= rb_next(&en
->rb_node
);
321 *next_ex
= tmp_node
?
322 rb_entry(tmp_node
, struct extent_node
, rb_node
) : NULL
;
327 static struct extent_node
*__try_merge_extent_node(struct f2fs_sb_info
*sbi
,
328 struct extent_tree
*et
, struct extent_info
*ei
,
329 struct extent_node
**den
,
330 struct extent_node
*prev_ex
,
331 struct extent_node
*next_ex
)
333 struct extent_node
*en
= NULL
;
335 if (prev_ex
&& __is_back_mergeable(ei
, &prev_ex
->ei
)) {
336 prev_ex
->ei
.len
+= ei
->len
;
341 if (next_ex
&& __is_front_mergeable(ei
, &next_ex
->ei
)) {
343 __detach_extent_node(sbi
, et
, prev_ex
);
346 next_ex
->ei
.fofs
= ei
->fofs
;
347 next_ex
->ei
.blk
= ei
->blk
;
348 next_ex
->ei
.len
+= ei
->len
;
353 if (en
->ei
.len
> et
->largest
.len
)
354 et
->largest
= en
->ei
;
360 static struct extent_node
*__insert_extent_tree(struct f2fs_sb_info
*sbi
,
361 struct extent_tree
*et
, struct extent_info
*ei
,
362 struct rb_node
**insert_p
,
363 struct rb_node
*insert_parent
)
365 struct rb_node
**p
= &et
->root
.rb_node
;
366 struct rb_node
*parent
= NULL
;
367 struct extent_node
*en
= NULL
;
369 if (insert_p
&& insert_parent
) {
370 parent
= insert_parent
;
377 en
= rb_entry(parent
, struct extent_node
, rb_node
);
379 if (ei
->fofs
< en
->ei
.fofs
)
381 else if (ei
->fofs
>= en
->ei
.fofs
+ en
->ei
.len
)
387 en
= __attach_extent_node(sbi
, et
, ei
, parent
, p
);
391 if (en
->ei
.len
> et
->largest
.len
)
392 et
->largest
= en
->ei
;
397 unsigned int f2fs_update_extent_tree_range(struct inode
*inode
,
398 pgoff_t fofs
, block_t blkaddr
, unsigned int len
)
400 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
401 struct extent_tree
*et
= F2FS_I(inode
)->extent_tree
;
402 struct extent_node
*en
= NULL
, *en1
= NULL
, *en2
= NULL
, *en3
= NULL
;
403 struct extent_node
*prev_en
= NULL
, *next_en
= NULL
;
404 struct extent_info ei
, dei
, prev
;
405 struct rb_node
**insert_p
= NULL
, *insert_parent
= NULL
;
406 unsigned int end
= fofs
+ len
;
407 unsigned int pos
= (unsigned int)fofs
;
412 write_lock(&et
->lock
);
414 if (is_inode_flag_set(F2FS_I(inode
), FI_NO_EXTENT
)) {
415 write_unlock(&et
->lock
);
422 /* we do not guarantee that the largest extent is cached all the time */
423 __drop_largest_extent(inode
, fofs
);
425 /* 1. lookup first extent node in range [fofs, fofs + len - 1] */
426 en
= __lookup_extent_tree_ret(et
, fofs
, &prev_en
, &next_en
,
427 &insert_p
, &insert_parent
);
431 f2fs_bug_on(sbi
, en
->ei
.fofs
<= pos
);
435 * skip searching in the tree since there is no
436 * larger extent node in the cache.
442 /* 2. invlidate all extent nodes in range [fofs, fofs + len - 1] */
444 struct rb_node
*node
;
452 node
= rb_next(&en
->rb_node
);
455 * 2.1 there are four cases when we invalidate blkaddr in extent
456 * node, |V: valid address, X: will be invalidated|
458 /* case#1, invalidate right part of extent node |VVVVVXXXXX| */
459 if (pos
> dei
.fofs
&& end
>= dei
.fofs
+ dei
.len
) {
460 en
->ei
.len
= pos
- dei
.fofs
;
462 if (en
->ei
.len
< F2FS_MIN_EXTENT_LEN
) {
463 __detach_extent_node(sbi
, et
, en
);
465 insert_parent
= NULL
;
469 if (__is_extent_same(&dei
, &et
->largest
))
470 et
->largest
= en
->ei
;
474 /* case#2, invalidate left part of extent node |XXXXXVVVVV| */
475 if (pos
<= dei
.fofs
&& end
< dei
.fofs
+ dei
.len
) {
477 en
->ei
.blk
+= end
- dei
.fofs
;
478 en
->ei
.len
-= end
- dei
.fofs
;
480 if (en
->ei
.len
< F2FS_MIN_EXTENT_LEN
) {
481 __detach_extent_node(sbi
, et
, en
);
483 insert_parent
= NULL
;
487 if (__is_extent_same(&dei
, &et
->largest
))
488 et
->largest
= en
->ei
;
492 __detach_extent_node(sbi
, et
, en
);
495 * if we remove node in rb-tree, our parent node pointer may
496 * point the wrong place, discard them.
499 insert_parent
= NULL
;
501 /* case#3, invalidate entire extent node |XXXXXXXXXX| */
502 if (pos
<= dei
.fofs
&& end
>= dei
.fofs
+ dei
.len
) {
503 if (__is_extent_same(&dei
, &et
->largest
))
509 * case#4, invalidate data in the middle of extent node
512 if (dei
.len
> F2FS_MIN_EXTENT_LEN
) {
515 /* insert left part of split extent into cache */
516 if (pos
- dei
.fofs
>= F2FS_MIN_EXTENT_LEN
) {
517 set_extent_info(&ei
, dei
.fofs
, dei
.blk
,
519 en1
= __insert_extent_tree(sbi
, et
, &ei
,
523 /* insert right part of split extent into cache */
524 endofs
= dei
.fofs
+ dei
.len
;
525 if (endofs
- end
>= F2FS_MIN_EXTENT_LEN
) {
526 set_extent_info(&ei
, end
,
527 end
- dei
.fofs
+ dei
.blk
,
529 en2
= __insert_extent_tree(sbi
, et
, &ei
,
534 /* 2.2 update in global extent list */
535 spin_lock(&sbi
->extent_lock
);
536 if (en
&& !list_empty(&en
->list
))
539 list_add_tail(&en1
->list
, &sbi
->extent_list
);
541 list_add_tail(&en2
->list
, &sbi
->extent_list
);
542 spin_unlock(&sbi
->extent_lock
);
544 /* 2.3 release extent node */
546 kmem_cache_free(extent_node_slab
, en
);
548 en
= node
? rb_entry(node
, struct extent_node
, rb_node
) : NULL
;
555 /* 3. update extent in extent cache */
557 struct extent_node
*den
= NULL
;
559 set_extent_info(&ei
, fofs
, blkaddr
, len
);
560 en3
= __try_merge_extent_node(sbi
, et
, &ei
, &den
,
563 en3
= __insert_extent_tree(sbi
, et
, &ei
,
564 insert_p
, insert_parent
);
566 /* give up extent_cache, if split and small updates happen */
568 prev
.len
< F2FS_MIN_EXTENT_LEN
&&
569 et
->largest
.len
< F2FS_MIN_EXTENT_LEN
) {
571 set_inode_flag(F2FS_I(inode
), FI_NO_EXTENT
);
574 spin_lock(&sbi
->extent_lock
);
576 if (list_empty(&en3
->list
))
577 list_add_tail(&en3
->list
, &sbi
->extent_list
);
579 list_move_tail(&en3
->list
, &sbi
->extent_list
);
581 if (den
&& !list_empty(&den
->list
))
582 list_del(&den
->list
);
583 spin_unlock(&sbi
->extent_lock
);
586 kmem_cache_free(extent_node_slab
, den
);
589 if (is_inode_flag_set(F2FS_I(inode
), FI_NO_EXTENT
))
590 __free_extent_tree(sbi
, et
, true);
592 write_unlock(&et
->lock
);
594 return !__is_extent_same(&prev
, &et
->largest
);
597 unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info
*sbi
, int nr_shrink
)
599 struct extent_tree
*treevec
[EXT_TREE_VEC_SIZE
];
600 struct extent_node
*en
, *tmp
;
601 unsigned long ino
= F2FS_ROOT_INO(sbi
);
602 struct radix_tree_root
*root
= &sbi
->extent_tree_root
;
604 unsigned int node_cnt
= 0, tree_cnt
= 0;
607 if (!test_opt(sbi
, EXTENT_CACHE
))
610 if (!down_write_trylock(&sbi
->extent_tree_lock
))
613 /* 1. remove unreferenced extent tree */
614 while ((found
= radix_tree_gang_lookup(root
,
615 (void **)treevec
, ino
, EXT_TREE_VEC_SIZE
))) {
618 ino
= treevec
[found
- 1]->ino
+ 1;
619 for (i
= 0; i
< found
; i
++) {
620 struct extent_tree
*et
= treevec
[i
];
622 if (!atomic_read(&et
->refcount
)) {
623 write_lock(&et
->lock
);
624 node_cnt
+= __free_extent_tree(sbi
, et
, true);
625 write_unlock(&et
->lock
);
627 radix_tree_delete(root
, et
->ino
);
628 kmem_cache_free(extent_tree_slab
, et
);
629 sbi
->total_ext_tree
--;
632 if (node_cnt
+ tree_cnt
>= nr_shrink
)
637 up_write(&sbi
->extent_tree_lock
);
639 /* 2. remove LRU extent entries */
640 if (!down_write_trylock(&sbi
->extent_tree_lock
))
643 remained
= nr_shrink
- (node_cnt
+ tree_cnt
);
645 spin_lock(&sbi
->extent_lock
);
646 list_for_each_entry_safe(en
, tmp
, &sbi
->extent_list
, list
) {
649 list_del_init(&en
->list
);
651 spin_unlock(&sbi
->extent_lock
);
653 while ((found
= radix_tree_gang_lookup(root
,
654 (void **)treevec
, ino
, EXT_TREE_VEC_SIZE
))) {
657 ino
= treevec
[found
- 1]->ino
+ 1;
658 for (i
= 0; i
< found
; i
++) {
659 struct extent_tree
*et
= treevec
[i
];
661 write_lock(&et
->lock
);
662 node_cnt
+= __free_extent_tree(sbi
, et
, false);
663 write_unlock(&et
->lock
);
665 if (node_cnt
+ tree_cnt
>= nr_shrink
)
670 up_write(&sbi
->extent_tree_lock
);
672 trace_f2fs_shrink_extent_tree(sbi
, node_cnt
, tree_cnt
);
674 return node_cnt
+ tree_cnt
;
677 unsigned int f2fs_destroy_extent_node(struct inode
*inode
)
679 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
680 struct extent_tree
*et
= F2FS_I(inode
)->extent_tree
;
681 unsigned int node_cnt
= 0;
686 write_lock(&et
->lock
);
687 node_cnt
= __free_extent_tree(sbi
, et
, true);
688 write_unlock(&et
->lock
);
693 void f2fs_destroy_extent_tree(struct inode
*inode
)
695 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
696 struct extent_tree
*et
= F2FS_I(inode
)->extent_tree
;
697 unsigned int node_cnt
= 0;
702 if (inode
->i_nlink
&& !is_bad_inode(inode
) && et
->count
) {
703 atomic_dec(&et
->refcount
);
707 /* free all extent info belong to this extent tree */
708 node_cnt
= f2fs_destroy_extent_node(inode
);
710 /* delete extent tree entry in radix tree */
711 down_write(&sbi
->extent_tree_lock
);
712 atomic_dec(&et
->refcount
);
713 f2fs_bug_on(sbi
, atomic_read(&et
->refcount
) || et
->count
);
714 radix_tree_delete(&sbi
->extent_tree_root
, inode
->i_ino
);
715 kmem_cache_free(extent_tree_slab
, et
);
716 sbi
->total_ext_tree
--;
717 up_write(&sbi
->extent_tree_lock
);
719 F2FS_I(inode
)->extent_tree
= NULL
;
721 trace_f2fs_destroy_extent_tree(inode
, node_cnt
);
724 bool f2fs_lookup_extent_cache(struct inode
*inode
, pgoff_t pgofs
,
725 struct extent_info
*ei
)
727 if (!f2fs_may_extent_tree(inode
))
730 return f2fs_lookup_extent_tree(inode
, pgofs
, ei
);
733 void f2fs_update_extent_cache(struct dnode_of_data
*dn
)
735 struct f2fs_inode_info
*fi
= F2FS_I(dn
->inode
);
738 if (!f2fs_may_extent_tree(dn
->inode
))
741 f2fs_bug_on(F2FS_I_SB(dn
->inode
), dn
->data_blkaddr
== NEW_ADDR
);
744 fofs
= start_bidx_of_node(ofs_of_node(dn
->node_page
), fi
) +
747 if (f2fs_update_extent_tree_range(dn
->inode
, fofs
, dn
->data_blkaddr
, 1))
751 void f2fs_update_extent_cache_range(struct dnode_of_data
*dn
,
752 pgoff_t fofs
, block_t blkaddr
, unsigned int len
)
755 if (!f2fs_may_extent_tree(dn
->inode
))
758 if (f2fs_update_extent_tree_range(dn
->inode
, fofs
, blkaddr
, len
))
762 void init_extent_cache_info(struct f2fs_sb_info
*sbi
)
764 INIT_RADIX_TREE(&sbi
->extent_tree_root
, GFP_NOIO
);
765 init_rwsem(&sbi
->extent_tree_lock
);
766 INIT_LIST_HEAD(&sbi
->extent_list
);
767 spin_lock_init(&sbi
->extent_lock
);
768 sbi
->total_ext_tree
= 0;
769 atomic_set(&sbi
->total_ext_node
, 0);
772 int __init
create_extent_cache(void)
774 extent_tree_slab
= f2fs_kmem_cache_create("f2fs_extent_tree",
775 sizeof(struct extent_tree
));
776 if (!extent_tree_slab
)
778 extent_node_slab
= f2fs_kmem_cache_create("f2fs_extent_node",
779 sizeof(struct extent_node
));
780 if (!extent_node_slab
) {
781 kmem_cache_destroy(extent_tree_slab
);
787 void destroy_extent_cache(void)
789 kmem_cache_destroy(extent_node_slab
);
790 kmem_cache_destroy(extent_tree_slab
);
This page took 0.048885 seconds and 5 git commands to generate.